In [35]:
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import pandas_datareader.data as web
import urllib as u
import urllib.request
from bs4 import BeautifulSoup as bs
import datetime as dt
import numpy as np
import os
import pickle
import requests
import yfinance as yf

In [36]:
def get_price2book( symbol ):
    try:
        url = r'http://finviz.com/quote.ashx?t={}'\
        				.format(symbol.lower())
        html = u.request.urlopen(url).read()
        soup = bs(html, 'lxml')
        # Change the text below to get a diff metric
        pb =  soup.find(text = r'P/B')
        pb_ = pb.find_next(class_='snapshot-td2').text
        roe =  soup.find(text = r'ROE')
        roe_ = roe.find_next(class_='snapshot-td2').text
        mc =  soup.find(text = r'Market Cap')
        mc_ = mc.find_next(class_='snapshot-td2').text
        de =  soup.find(text = r'Debt/Eq')
        de_ = de.find_next(class_='snapshot-td2').text
        dp =  soup.find(text = r'Dividend %')
        dp_ = dp.find_next(class_='snapshot-td2').text
        #print( '{} price to book = {} '.format(symbol, pb_) )
        #print( '{} roe = {} '.format(symbol, roe_) )
        return (pb_,roe_,mc_,de_,dp_)
    except Exception as e:
        print(e)

In [26]:
style.use('ggplot')


def save_sp500_tickers():
    resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        tickers.append(ticker)
    with open("sp500tickers.pickle", "wb") as f:
        pickle.dump(tickers, f)
    return tickers


save_sp500_tickers()

['MMM\n',
 'ABT\n',
 'ABBV\n',
 'ABMD\n',
 'ACN\n',
 'ATVI\n',
 'ADBE\n',
 'AMD\n',
 'AAP\n',
 'AES\n',
 'AFL\n',
 'A\n',
 'APD\n',
 'AKAM\n',
 'ALK\n',
 'ALB\n',
 'ARE\n',
 'ALXN\n',
 'ALGN\n',
 'ALLE\n',
 'AGN\n',
 'ADS\n',
 'LNT\n',
 'ALL\n',
 'GOOGL\n',
 'GOOG\n',
 'MO\n',
 'AMZN\n',
 'AMCR\n',
 'AEE\n',
 'AAL\n',
 'AEP\n',
 'AXP\n',
 'AIG\n',
 'AMT\n',
 'AWK\n',
 'AMP\n',
 'ABC\n',
 'AME\n',
 'AMGN\n',
 'APH\n',
 'ADI\n',
 'ANSS\n',
 'ANTM\n',
 'AON\n',
 'AOS\n',
 'APA\n',
 'AIV\n',
 'AAPL\n',
 'AMAT\n',
 'APTV\n',
 'ADM\n',
 'ANET\n',
 'AJG\n',
 'AIZ\n',
 'T\n',
 'ATO\n',
 'ADSK\n',
 'ADP\n',
 'AZO\n',
 'AVB\n',
 'AVY\n',
 'BKR\n',
 'BLL\n',
 'BAC\n',
 'BK\n',
 'BAX\n',
 'BDX\n',
 'BRK.B\n',
 'BBY\n',
 'BIIB\n',
 'BLK\n',
 'BA\n',
 'BKNG\n',
 'BWA\n',
 'BXP\n',
 'BSX\n',
 'BMY\n',
 'AVGO\n',
 'BR\n',
 'BF.B\n',
 'CHRW\n',
 'COG\n',
 'CDNS\n',
 'CPB\n',
 'COF\n',
 'CPRI\n',
 'CAH\n',
 'KMX\n',
 'CCL\n',
 'CARR\n',
 'CAT\n',
 'CBOE\n',
 'CBRE\n',
 'CDW\n',
 'CE\n',
 'CNC\n',
 'CNP\

In [47]:
def get_data_from_yahoo(reload_sp500=False):
    if reload_sp500:
        tickers = save_sp500_tickers()
    else:
        with open("sp500tickers.pickle", "rb") as f:
            tickers = pickle.load(f)
    if not os.path.exists('stock_dfs'):
        os.makedirs('stock_dfs')

    start = dt.datetime(2010, 1, 1)
    end = dt.datetime.now()
    for ticker in tickers:
        print(ticker.rstrip())
        print(format(ticker))
        # just in case your connection breaks, we'd like to save our progress!
        if not os.path.exists('stock_dfs/{}.csv'.format(ticker.splitlines()[0])):
            df = yf.download(ticker, start, end)
            #df.reindex(columns=cols)
            df.reset_index(inplace=True)
            df.set_index("Date", inplace=True)
            df = df.drop("Adj Close", axis=1)
            print(df)
            df.to_csv('stock_dfs/{}.csv'.format(ticker))
        else:
            print('Already have {}'.format(ticker))

get_data_from_yahoo()
            
def compile_data():
    with open("sp500tickers.pickle", "rb") as f:
        tickers = pickle.load(f)

    main_df = pd.DataFrame()

    for count, ticker in enumerate(tickers):
        df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
        df.set_index('Date', inplace=True)

        df.rename(columns={'Adj Close': ticker}, inplace=True)
        df.drop(['Open', 'High', 'Low', 'Close', 'Volume'], 1, inplace=True)

        if main_df.empty:
            main_df = df
        else:
            main_df = main_df.join(df, how='outer')

        if count % 10 == 0:
            print(count)
    print(main_df.head())
    main_df.to_csv('sp500_joined_closes.csv')


def visualize_data():
    df = pd.read_csv('sp500_joined_closes.csv')
    df_corr = df.corr()
    print(df_corr.head())
    df_corr.to_csv('sp500corr.csv')
    data1 = df_corr.values
    fig1 = plt.figure()
    ax1 = fig1.add_subplot(111)

    heatmap1 = ax1.pcolor(data1, cmap=plt.cm.RdYlGn)
    fig1.colorbar(heatmap1)

    ax1.set_xticks(np.arange(data1.shape[1]) + 0.5, minor=False)
    ax1.set_yticks(np.arange(data1.shape[0]) + 0.5, minor=False)
    ax1.invert_yaxis()
    ax1.xaxis.tick_top()
    column_labels = df_corr.columns
    row_labels = df_corr.index
    ax1.set_xticklabels(column_labels)
    ax1.set_yticklabels(row_labels)
    plt.xticks(rotation=90)
    heatmap1.set_clim(-1, 1)
    plt.tight_layout()
    plt.show()



#visualize_data()

MMM
MMM

[*********************100%***********************]  1 of 1 completed
                  Open        High         Low       Close   Volume
Date                                                               
2010-01-04   83.089996   83.449997   82.669998   83.019997  3043700
2010-01-05   82.800003   83.230003   81.699997   82.500000  2847000
2010-01-06   83.879997   84.599998   83.510002   83.669998  5268500
2010-01-07   83.320000   83.760002   82.120003   83.730003  4470100
2010-01-08   83.690002   84.320000   83.300003   84.320000  3405800
...                ...         ...         ...         ...      ...
2020-04-07  147.500000  150.639999  144.399994  144.600006  5496300
2020-04-08  145.729996  149.720001  143.910004  148.990005  3071100
2020-04-09  148.460007  149.729996  146.539993  147.779999  4784200
2020-04-13  147.500000  148.580002  145.300003  146.399994  2642300
2020-04-14  149.619995  150.979996  148.050003  150.139999  3317700

[2587 rows x 5 columns]


OSError: [Errno 22] Invalid argument: 'stock_dfs/MMM\n.csv'

In [16]:
def analyse_data_from_yahoo():
    with open("sp500tickers.pickle", "rb") as f:
        tickers = pickle.load(f)
    analysis = pd.DataFrame(columns=['drop ratio','pb','roe','market cap','price on 04-09','Debt/Eq','Dividend %'])
    #analysis = pd.DataFrame(columns=['tick', 'ratio'])
    i=0
    for ticker in tickers:
        i=i+1
        print(ticker)
        #print(df.shape)
        ticker = ticker[:-1]
        if os.path.exists('stock_dfs/{}.csv'.format(ticker)):
            df = pd.read_csv('stock_dfs/{}.csv'.format(ticker), parse_dates=True, index_col=0)
            #print(ticker,df.shape,df.shape[0] )
            df['10ma']=(df['Adj Close'].rolling(window=10, min_periods=0).mean())
            #analysis.loc[i] =  [ticker  ,  df['10ma'].iloc[320]/df['10ma'].iloc[1] ]
            #analysis[ticker]  = [  df['10ma'].iloc[320]/df['10ma'].iloc[1] ]
            if df.shape[0]==321:
                analysis.loc[ticker,'drop ratio']  = [  df.loc['2020-04-09','10ma']/df.loc['2020-01-06','10ma'] ]
                analysis.loc[ticker,'price on 04-09']  = [  df.loc['2020-04-09','Adj Close'] ]
            print(ticker,i)
            try:
                temp=pbratio4.get_price2book(ticker)    
                analysis.loc[ticker,'pb']  = [ temp[0] ]
                analysis.loc[ticker,'roe']  = [ temp[1] ]
                analysis.loc[ticker,'market cap']  = [ temp[2] ]
                analysis.loc[ticker,'Debt/Eq']  = [ temp[3] ]
                analysis.loc[ticker,'Dividend %']  = [ temp[4] ]
            except:
                print("An exception occurred")
        else:
            print('Already have {}'.format(ticker))
    
    analysis.to_csv("500_analysis.csv")

In [17]:
analyse_data_from_yahoo()

MMM

Already have MMM
ABT

Already have ABT
ABBV

Already have ABBV
ABMD

Already have ABMD
ACN

Already have ACN
ATVI

Already have ATVI
ADBE

Already have ADBE
AMD

Already have AMD
AAP

Already have AAP
AES

Already have AES
AFL

Already have AFL
A

Already have A
APD

Already have APD
AKAM

Already have AKAM
ALK

Already have ALK
ALB

Already have ALB
ARE

Already have ARE
ALXN

Already have ALXN
ALGN

Already have ALGN
ALLE

Already have ALLE
AGN

Already have AGN
ADS

Already have ADS
LNT

Already have LNT
ALL

Already have ALL
GOOGL

Already have GOOGL
GOOG

Already have GOOG
MO

Already have MO
AMZN

Already have AMZN
AMCR

Already have AMCR
AEE

Already have AEE
AAL

Already have AAL
AEP

Already have AEP
AXP

Already have AXP
AIG

Already have AIG
AMT

Already have AMT
AWK

Already have AWK
AMP

Already have AMP
ABC

Already have ABC
AME

Already have AME
AMGN

Already have AMGN
APH

Already have APH
ADI

Already have ADI
ANSS

Already have ANSS
ANTM

Already have ANTM
AON

A

Already have WHR
WMB

Already have WMB
WLTW

Already have WLTW
WYNN

Already have WYNN
XEL

Already have XEL
XRX

Already have XRX
XLNX

Already have XLNX
XYL

Already have XYL
YUM

Already have YUM
ZBRA

Already have ZBRA
ZBH

Already have ZBH
ZION

Already have ZION
ZTS

Already have ZTS


'0.8.1'