In [9]:
import bs4 as bs
import datetime as dt
import os
import pandas as pd
import numpy as np
import pandas_datareader.data as web
import pickle 
import requests

def save_sp500_tickers():
    resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    names = []
    symbols=[[]]
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text.replace('.', '-')
        ticker = ticker[:-1]
        tickers.append(ticker)
        name = row.findAll('td')[1].text
        name = name[:-1]
        names.append(name)

    with open("sp500tickers.pickle", "wb") as f:
        pickle.dump(symbols, f)
    symbols=np.column_stack((tickers,names))
    return symbols
symbols=save_sp500_tickers()


In [10]:
print(len(symbols))
print(symbols)

505
[['MMM' '3M Compan']
 ['ABT' 'Abbott Laboratorie']
 ['ABBV' 'AbbVie Inc']
 ...
 ['ZBH' 'Zimmer Biomet Holding']
 ['ZION' 'Zions Bancor']
 ['ZTS' 'Zoeti']]


In [11]:

def get_data_from_yahoo(reload_sp500=True):
    if reload_sp500:
        symbols = save_sp500_tickers()
    else:
        with open("sp500tickers.pickle", "rb") as f:
            symbols = pickle.load(f)
    if not os.path.exists('stock_dfs'):
        os.makedirs('stock_dfs')

    start = dt.datetime(2014, 1, 1)
    end = dt.datetime.now()
    for i in range(len(symbols)):
        ticker=symbols[i][0]
    # just in case your connection breaks, we'd like to save our progress!
        if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
            
            df = web.DataReader(ticker, 'yahoo', start, end)
            df.reset_index(inplace=True)
            df.set_index("Date", inplace=True)
            df['Name'] = symbols[i][1]
            df['Symbol'] = symbols[i][0]
            df.to_csv('stock_dfs/{}.csv'.format(ticker))
        else:
            print('Already have {}'.format(ticker))


get_data_from_yahoo()

Already have MMM
Already have ABT
Already have ABBV
Already have ABMD
Already have ACN
Already have ATVI
Already have ADBE
Already have AMD
Already have AAP
Already have AES
Already have AFL
Already have A
Already have APD
Already have AKAM
Already have ALK
Already have ALB
Already have ARE
Already have ALXN
Already have ALGN
Already have ALLE
Already have ADS
Already have LNT
Already have ALL
Already have GOOGL
Already have GOOG
Already have MO
Already have AMZN
Already have AMCR
Already have AEE
Already have AAL
Already have AEP
Already have AXP
Already have AIG
Already have AMT
Already have AWK
Already have AMP
Already have ABC
Already have AME
Already have AMGN
Already have APH
Already have ADI
Already have ANSS
Already have ANTM
Already have AON
Already have AOS
Already have APA
Already have AIV
Already have AAPL
Already have AMAT
Already have APTV
Already have ADM
Already have ANET
Already have AJG
Already have AIZ
Already have T
Already have ATO
Already have ADSK
Already have AD

In [12]:
def compile_data():
    '''
    with open("sp500tickers.pickle", "rb") as f:
        symbols= pickle.load(f)
    '''
    #symbols = save_sp500_tickers()
    main_df = pd.DataFrame()

    for i in range(len(symbols)):
        ticker=symbols[i][0]
        df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
        df.set_index('Date', inplace=True)
        df.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'],1,inplace=True)
       
        if main_df.empty:
            main_df = df
        else:
            main_df=pd.concat([main_df,df])

    print(main_df.head())
    main_df.to_csv('sp500_append_closes.csv')


compile_data()

                 Close       Name Symbol
Date                                    
2014-01-02  138.130005  3M Compan    MMM
2014-01-03  138.449997  3M Compan    MMM
2014-01-06  137.630005  3M Compan    MMM
2014-01-07  137.649994  3M Compan    MMM
2014-01-08  136.630005  3M Compan    MMM
