In [None]:
import requests
import bs4
import datetime
from sqlite_methods import *
def obtain_parse_wiki_snp500():
    """
    Download and parse the Wikipedia list of S&P500
    constituents using requests and BeautifulSoup.
    Returns a list of tuples for to add to MySQL.
    """
    # Stores the current time, for the created_at record
    now = datetime.datetime.utcnow()
    # Use requests and BeautifulSoup to download the
    # list of S&P500 companies and obtain the symbol table
    response = requests.get(
    "http://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    )
    soup = bs4.BeautifulSoup(response.text)
    # This selects the first table, using CSS Selector syntax
    # and then ignores the header row ([1:])
    symbolslist = soup.select("table")[0].select("tr")[1:]
    # Obtain the symbol information for each
    # row in the S&P500 constituent table
    symbols = []
    for i, symbol in enumerate(symbolslist):
        tds = symbol.select("td")
        symbols.append(
        (tds[0].select("a")[0].text, # Ticker
        "stock",
        tds[1].select("a")[0].text, # Name
            tds[3].text, # Sector
        "USD", now, now
        )
        )
    return symbols

def obtain_list_of_db_tickers():
    """
    Obtains a list of the ticker symbols in the database.
    """
    with con:
        cur = con.cursor()
        cur.execute("SELECT id, ticketer FROM symbol")
        data = cur.fetchall()
    return [(d[0], d[1]) for d in data]

In [None]:
import yfinance as yf
import pandas as pd
con = create_connection()
tickerSymbols = [t[1] for t in obtain_list_of_db_tickers()]
daily_data = dict()
ticker_data = dict()

In [None]:
#Scrape trade data for each stock
for _,t in enumerate(tickerSymbols):
    if t not in daily_data:
        ticker = yf.Ticker(t)
        temp = ticker.history(period='60d',interval='5m')
        daily_data[t]=temp

In [None]:
#Scrape Ticketer metadata
for _,t in enumerate(tickerSymbols):
    if t not in ticker_data:
        ticker=yf.Ticker(t)
        temp = pd.DataFrame.from_dict(ticker.info,orient='index')
        temp.reset_index(inplace=True)
        temp.columns=['Attribute','Recent']
        ticker_data[t]=temp

In [None]:
#Save all data
if not os.path.exists('my_folder'):
    os.makedirs('my_folder')
for _,t in enumerate(tickerSymbols):
    path='snp500'+'/'+t
    try:
        os.mkdir(path)
    except:
        pass
    data = daily_data[t].reset_index()
    ticker = ticker_data[t]
    data.to_csv(path+'/'+'data.csv',index=False)
    ticker.to_csv(path+'/'+'ticker.csv',index=False)