# Find cheap stock


In [1]:
import pandas as pd
import calendar
import datetime as dt
import pandas_datareader.data as web
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import style
import yfinance as yf
import bs4 as bs
import pickle
import requests
import sys
import os
from tqdm import tqdm # progress

style.use('ggplot')
mpl.rcParams['figure.figsize'] = (16.0, 9.0)
mpl.rcParams['font.size'] = 12
mpl.rcParams['legend.fontsize'] = 'large'
mpl.rcParams['figure.titlesize'] = 'medium'

def prev_weekday(adate):
    # adate -= dt.timedelta(days=1)
    while adate.weekday() > 4: # Mon-Fri are 0-4
        adate -= dt.timedelta(days=1)
    return adate

date_start = prev_weekday(dt.datetime(2015, 1, 6))
date2 = dt.datetime(2020, 2, 19)
date_end = prev_weekday(dt.datetime.now())

print('Lasted weekday:', date_end)
      
def save_sp500_tickers():
    resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        tickers.append(ticker.rstrip().replace('.', '-'))
        
#     with open("sp500tickers.pickle","wb") as f:
#         pickle.dump(tickers,f)
        
    print('Total number: ', len(tickers))
    return tickers
            
tickers = save_sp500_tickers()[:]
today_price = []

  from pandas.util.testing import assert_frame_equal


Lasted weekday: 2020-06-08 19:21:55.069693
Total number:  505


In [12]:
def compile_data(tickers = tickers, avg_period = 7):

    dDay = 7
    anomaly = []
    main_df = pd.DataFrame()
    shiftDay = dt.timedelta(days=dDay)

    for count, ticker in enumerate(tqdm(tickers[:])):
        print(count, ': ', ticker)
        
        # Download stock data at 3 periods
        data1 = yf.download(ticker, start=date_start-shiftDay, end=date_start+shiftDay)
        data2 = yf.download(ticker, start=date2-shiftDay, end=date2+shiftDay)
        data3 = yf.download(ticker, start=date_end-shiftDay, end=date_end+shiftDay)
        
        # if Data was not availble, exit
        if data1.empty or data2.empty or data3.empty:
            anomaly.append(ticker)
            continue
        
#         time.sleep(0.5)
#         data1 = web.DataReader(ticker, 'yahoo', date_start, date_start)
#         data2 = web.DataReader(ticker, 'yahoo', date2, date2)
#         data3 = web.DataReader(ticker, 'yahoo', date_end, date_end)
        
        if avg_period:
            data1['Adj Close'] = data1['Adj Close'].rolling(window=avg_period, min_periods=0).mean()
            data2['Adj Close'] = data2['Adj Close'].rolling(window=avg_period, min_periods=0).mean()
            data3['Adj Close'] = data3['Adj Close'].rolling(window=avg_period, min_periods=0).mean()
            data1.dropna(inplace=True)
            data2.dropna(inplace=True)
            data3.dropna(inplace=True)
            mid = len(data1.index) // 2
            data = pd.concat([data1.iloc[[mid], :], data2.iloc[[mid], :], data3.iloc[[-1], :]])
        else:
            data = pd.concat([data1.tail(1), data2.tail(1), data3.tail(1)])
        
#         print(count, ticker, data)
        
        # get the adjust close price and volume on the last day
        volume = pd.DataFrame(data3['Volume'].tail(1))
        data = pd.DataFrame(data['Adj Close'])
        
        # reindex and transpose
        volume.reset_index(drop=True, inplace=True)
        volume.rename(columns={'Volume': ticker}, inplace=True)
        volume['Volume'] = 'Volume'
        volume.set_index(["Volume"], inplace=True)
        volume = volume.T
        data.rename(columns={'Adj Close': ticker}, inplace=True)
        
        df = data.T

        # Check null values
        if len(df.columns) < 3 or df.isnull().values.any():
            anomaly.append(ticker)
            continue
        
        # Rename the Date Column to avoid different date 
        df.rename(columns={df.columns[0]:df.columns[0].strftime("%Y-%m")}, inplace=True, errors="raise")
        df.rename(columns={df.columns[1]:df.columns[1].strftime("%Y-%m")}, inplace=True, errors="raise")
        df.rename(columns={df.columns[2]:df.columns[2].strftime("%Y-%m")}, inplace=True, errors="raise")

        # add volumen and sector column
        df = df.join(volume, how='outer')
        
        try:
            stockInfo = yf.Ticker(ticker).info
            df['Sector'] = [stockInfo['sector']]
            df['Industry'] = [stockInfo['industry']]
        except:
            df['Sector'] = df['Industry'] = None      
 
        if main_df.empty:
            main_df = pd.DataFrame(data=df)
        else:    
            main_df = main_df.append(df)
            
    main_df.index.name = 'Ticker'
    return main_df, anomaly

In [None]:
df, anomaly= compile_data()
print('Number of anomalous tickers:', len(anomaly))
if len(anomaly):
    print('Anomalous tickers:', anomaly)

print(df.tail())

In [10]:
# print(yf.Ticker("AAPL").info)
# print(yf.Ticker("MMM").info['sector'])

df['Ratio1'] = df.iloc[:, 1] / df.iloc[:, 0]
df['Ratio2'] = df.iloc[:, 2] / df.iloc[:, 1]
df['Ratio12'] = df['Ratio1'] / df['Ratio2']
df_sort = df.sort_values(by=['Ratio2', 'Ratio1'], ascending=[True, False])

print(df_sort.head())

# if not os.path.exists('sp500_bargain.csv'):
df_sort.to_csv('sp500_bargain.csv')

          2015-01    2020-02  2020-06       Volume              Sector  \
Ticker                                                                   
NCLH    46.392000  52.762000   18.936  109330900.0   Consumer Cyclical   
OXY     60.675123  40.803529   16.070  118124800.0              Energy   
COTY    17.661850  11.507415    4.650   32658500.0  Consumer Defensive   
CCL     38.579615  42.725340   18.182  113177600.0                None   
UAL     66.002000  80.479999   34.906  135203900.0                None   

                             Industry    Ratio1    Ratio2   Ratio12  
Ticker                                                               
NCLH                  Travel Services  1.137308  0.358895  3.168919  
OXY                     Oil & Gas E&P  0.672492  0.393838  1.707532  
COTY    Household & Personal Products  0.651541  0.404087  1.612376  
CCL                              None  1.107459  0.425555  2.602385  
UAL                              None  1.219357  0.433723  2.

## List of All Tickers
### Method 1

https://pypi.org/project/get-all-tickers/

### Method 2

Another more complete way:

https://pypi.org/project/Yahoo-ticker-downloader/

https://github.com/Benny-/Yahoo-ticker-symbol-downloader

>`YahooTickerDownloader.py`


In [2]:
from get_all_tickers import get_tickers as gt

list_of_tickers = gt.get_tickers()
# # or if you want to save them to a CSV file
# get.save_tickers()

print(len(list_of_tickers))

6372


In [3]:
data_path = '../data/all_stock'
date_end = prev_weekday(dt.datetime.now())

def compile_data_all(tickers = tickers, avg_period = 7, get_sector_info = True):

    dDay = 7
    anomaly = []
    main_df = pd.DataFrame()
    shiftDay = dt.timedelta(days=dDay)

    for count, ticker in enumerate(tqdm(tickers[:])):
#         print(count, '. ', ticker)
        
        ticker = ticker.lstrip().rstrip().replace('~', '').replace('$', '')
        
        useLocalData = os.path.exists(data_path+'/{}.csv'.format(ticker))

        if useLocalData:
#             print('Retrieve data from '+data_path+'/{}.csv'.format(ticker))
            df = pd.read_csv(data_path+'/{}.csv'.format(ticker))
        else:
            print(ticker, ' does not exist! Skip...')
            anomaly.append(ticker)
            continue
            
        try:
            data3 = web.DataReader(ticker, 'yahoo', date_end-shiftDay, date_end)
            data3.drop(['Open', 'High', 'Low', 'Close'], 1, inplace=True)
        except:
            print(ticker, 'No data fetched for', ticker, 'using YahooDailyReader')
            anomaly.append(ticker)
            continue
            
        df.set_index("Date", inplace=True)
        df.index = pd.to_datetime(df.index, format='%Y-%m-%d')
                
        df = df.loc[(date_start-shiftDay):, :].copy()
        
        # Download stock data at 3 periods
        data1 = df.iloc[:(2*dDay+1), :].copy()
        data2 = df.loc[(date2-shiftDay):(date2+shiftDay), :].copy()
#         data3 = df.loc[(date_end-shiftDay):, :].copy()

        
        # if Data was not availble, exit
        if data1.empty or data2.empty or data3.empty:
            anomaly.append(ticker)
            continue
        
        if avg_period:
            data1['Adj Close'] = data1['Adj Close'].rolling(window=avg_period, min_periods=0).mean()
            data2['Adj Close'] = data2['Adj Close'].rolling(window=avg_period, min_periods=0).mean()
#             data3['Adj Close'] = data3['Adj Close'].rolling(window=avg_period, min_periods=0).mean()
            data1.dropna(inplace=True)
            data2.dropna(inplace=True)
            data3.dropna(inplace=True)
            mid = (len(data2.index)-1) // 2
#             print(mid, len(data1.index), len(data2.index), len(data3.index))
            data = pd.concat([data1.iloc[[mid], :], data2.iloc[[mid+1], :], data3.iloc[[-1], :]])
        else:
            data = pd.concat([data1.tail(1), data2.tail(1), data3.tail(1)])
        
#         print(count, ticker, data)
        
        # get the adjust close price and volume on the last day
        volume = pd.DataFrame(data3['Volume'].tail(1))
        data = pd.DataFrame(data['Adj Close'])
        
        # reindex and transpose
        volume.reset_index(drop=True, inplace=True)
        volume.rename(columns={'Volume': ticker}, inplace=True)
        volume['Volume'] = 'Volume'
        volume.set_index(["Volume"], inplace=True)
        volume = volume.T
        data.rename(columns={'Adj Close': ticker}, inplace=True)
        
        df = data.T

        # Check null values
        if len(df.columns) < 3 or df.isnull().values.any():
            anomaly.append(ticker)
            continue
        
        # Rename the Date Column to avoid different date 
#         df.rename(columns={df.columns[0]:df.columns[0].strftime("%Y-%m")}, inplace=True, errors="raise")
        df.rename(columns={df.columns[1]:df.columns[1].strftime("%Y-%m")}, inplace=True, errors="raise")
        df.rename(columns={df.columns[2]:date_end}, inplace=True, errors="raise")
#         df.rename(columns={df.columns[2]:df.columns[2].strftime("%Y-%m-%d")}, inplace=True, errors="raise")
        
        # add volume and sector column
        df = df.join(volume, how='outer')
        
        if get_sector_info:
            try:
                stockInfo = yf.Ticker(ticker).info
                df['Sector'] = [stockInfo['sector']]
                df['Industry'] = [stockInfo['industry']]
            except:
                df['Sector'] = df['Industry'] = None      

        # Add ratio
        years = (date2 - df.columns.values[0]).days / 365
        if years < 1e-2:
            years = 1e3 
        df['Ratio1 in 5 yr'] = (df.iloc[:, 1] / df.iloc[:, 0]).pow(5 / years)
        df['Ratio2 since Feb'] = df.iloc[:, 2] / df.iloc[:, 1]
        
        df.drop([df.columns.values[0]], 1, inplace=True)
                
        if main_df.empty:
            main_df = pd.DataFrame(data=df)
        else:    
            main_df = main_df.append(df)
            
    main_df.index.name = 'Ticker'
    return main_df, anomaly

df_all, anomaly_all = compile_data_all(['GMBL', 'ADXN', 'ADT', 'ALTG', 'BPYPN'])
print(df_all)

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:13<00:00,  2.80s/it]

          2020-02  2020-06-08 19:22:06.121972   Volume       Sector  \
Ticker                                                                
GMBL     5.825000                        6.52   811271         None   
ADXN    11.300000                        7.28        0                
ADT      7.007494                        8.53  3049096  Industrials   
ALTG    10.281800                        8.06   135064         None   
BPYPN   24.706290                       20.17    76548  Real Estate   

                              Industry  Ratio1 in 5 yr  Ratio2 since Feb  
Ticker                                                                    
GMBL                              None        0.019205          1.119313  
ADXN                                          1.000000          0.644248  
ADT     Security & Protection Services        0.376004          1.217268  
ALTG                              None        1.652230          0.783909  
BPYPN             Real Estate Services        1.0000




In [None]:
sector_info = False 

df_all, anomaly_all = compile_data_all(list_of_tickers, get_sector_info=sector_info)
print('Number of anomalous tickers:', len(anomaly_all))
if len(anomaly_all):
    print('Anomalous tickers:', anomaly_all)

print(df_all.head())

df_all['Ratio1/Ratio2'] = df_all['Ratio1 in 5 yr'] / df_all['Ratio2 since Feb']

if sector_info:
    df_all_sort = df_all.sort_values(by=['Sector', 'Ratio2 since Feb', 'Ratio1 in 5 yr'], ascending=[True, True, False])
else:
    df_all_sort = df_all.sort_values(by=['Ratio2 since Feb', 'Ratio1 in 5 yr'], ascending=[True, False])

print(df_all_sort.head())

# if not os.path.exists('sp500_bargain.csv'):
if sector_info:
    df_all_sort.to_csv('./all_bargain_sectorInfo.csv')
else:
    df_all_sort.to_csv('./all_bargain.csv')

  1%|▋                                                                             | 56/6372 [02:33<4:53:58,  2.79s/it]

In [None]:
# add sector to csv
df_all2 = pd.read_csv('./all_bargain.csv')

sector = [None] * len(df_all2.index)
industry = [None] * len(df_all2.index)

for i, ticker in enumerate(tqdm(list(df_all2['Ticker']))):
   
    try:
        stockInfo = yf.Ticker(ticker).info
        sector[i] = stockInfo['sector']
        industry[i] = stockInfo['industry']
    except:
        pass

print(sector, industry)

df_all2['Sector'] = sector
df_all2['Industry'] = industry
        
df_all_sort2 = df_all2.sort_values(by=['Sector', 'Ratio2 since Feb', 'Ratio1 in 5 yr'], ascending=[True, True, False])

print(df_all_sort2.head())

df_all_sort2.to_csv('./all_bargain_sectorInfo.csv')

In [None]:
# tickers = ['AAPL', 'XOM']
prevWeek = dt.timedelta(days=3)

# Download stock data at 3 periods
data1 = yf.download(tickers, start=date_start-prevWeek, end=date_start)
data2 = yf.download(tickers, start=date2-prevWeek, end=date2)
data3 = yf.download(tickers, start=date_end-prevWeek, end=date_end)

# get the adjust close price and volume on the last day
adjClose_col = [name[0] in ['Adj Close'] for name in data1.columns]
volume_col = [name[0] in ['Volume'] for name in data3.columns]
volume = data3[data3.columns[volume_col]].tail(1)
data1 = data1[data1.columns[adjClose_col]].tail(1)
data2 = data2[data2.columns[adjClose_col]].tail(1)
data3 = data3[data3.columns[adjClose_col]].tail(1)

# prcess volume
volume.columns = [x[1] for x in volume.columns]
volume.reset_index(drop=True, inplace=True)
volume['Volume'] = 'Volume'
volume.set_index(["Volume"], inplace=True)
volume = volume.T

# combine price and volume and transpose
df = pd.concat([data1, data2, data3]).T
df.index = [x[1] for x in df.index]
df = df.join(volume, how='outer')
df.index.name = 'Ticker'
print(df, df.head())

In [None]:
print(df.head(), df.info(), df.describe())

In [None]:
def compile_data(tickers):

    main_df = pd.DataFrame()
    df = pd.DataFrame()
    prevWeek = dt.timedelta(days=7)

    for count, ticker in enumerate(tickers): ## tickers)):
        data1 = yf.download(ticker, start=date_start-prevWeek, end=date_start, group_by = 'ticker')
        data2 = yf.download(ticker, start=date2-prevWeek, end=date2)
        data3 = yf.download(ticker, start=date_end-prevWeek, end=date_end)
#         df.set_index('Date', inplace=True)

        data1 = data1['Adj Close'].tail(1)
        data2 = data2['Adj Close'].tail(1)
        data3 = data3['Adj Close'].tail(1)
        
#         print(data1)
        df = pd.concat([data1, data2, data3])
        df.rename(ticker, inplace=True) 
#         print(df, type(df))
    
        if main_df.empty:
            main_df = pd.DataFrame(data=df).T
        else:    
            main_df = main_df.append(df)
            
    main_df.index.name = 'Ticker'
    return main_df
    
compile_data(tickers)

In [None]:
for symbol in ['AAPL', 'XOM']:
    symbol = symbol.replace('.', '-')
    company = yf.Ticker(symbol)
    df = company.history(period="7d")
    df.dropna(inplace=True)
    today_price.append(df.iloc[-1, df.columns.get_loc('Close')])

In [None]:
today_price = []

for symbol in list(final_df['symbol']):
    symbol = symbol.replace('.', '-')
    company = yf.Ticker(symbol)
    df2 = company.history(period="7d")
    df2.dropna(inplace=True)
    today_price.append(df2.iloc[-1, df2.columns.get_loc('Close')])

final_df['Today_Price'] = today_price
final_df['Dividend_Perc'] = final_df['dividend_Rate'] / final_df['Today_Price'] * 100.0

In [None]:
# Output all stocks that dividend > threshold
threshold = 0.8
print(final_df[final_df['Dividend_Perc'] >= threshold])
# final_df[final_df['Dividend_Perc'] >= threshold].to_csv('stock_dividend.csv')
final_df[final_df['Dividend_Perc'] >= threshold].to_csv('stock_dividend_{}.csv'.format(day_today))


plt.plot(final_df['Dividend_Perc'], color='blue')
plt.plot(final_df.index, [1]*len(final_df.index), color='green', alpha=0.25)
plt.ylim([0, 5])

In [None]:
start = dt.datetime(year, month, 1)
end = dt.datetime.now()
start = end - dt.timedelta(days=7)
today_price = []

for symbol in list(final_df['symbol']):
    symbol = symbol.replace('.', '-')
    df2 = web.DataReader(symbol, 'yahoo', start, end)
    df2.reset_index(inplace=True)
    df2.set_index("Date", inplace=True)
    today_price.append(df2.iloc[-1, df2.columns.get_loc('Adj Close')])
    
# print(len(final_df.index), len(today_price))
# print(today_price)
final_df['Today_Price'] = today_price
    
print(final_df.tail())

In [None]:
plt.plot(final_df['dividend_Rate'] / final_df['Today_Price'] )