In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from google.cloud import storage
from datetime import date
from tiingo import TiingoClient


#Generate a list of tickers for all S&P 500 companies plus SPY index
def gen_ticker_list():
    #Using BeutifulSoup scrape the Wiki page with a table of all the S&P 500 companies.
    page = requests.get("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies#S&P_500_component_stocks")
    soup = BeautifulSoup(page.content, 'html.parser')
    table = soup.find_all('table')
   #Convert table to pandas dataframe
    df = pd.read_html(str(table))[0]
    temp_list = df["Symbol"].tolist()
    #Add all the symbols to a list
    ticker_list = []
    for i in temp_list:
        temp = i.replace(".","-")
        ticker_list.append(temp)
    #Add S&P 500 index
    ticker_list.append('SPY')
    return ticker_list
 
def prepare_csv(ticker):
    #Tiingo API Config
    config = {}
    config['session'] = True
    config['api_key'] = "0015ea3a0ed951cea8f45258393fd6b595327627"
    client = TiingoClient(config)
    #Pull stock data into dictionary
    historical_prices = client.get_ticker_price(ticker, fmt='json', startDate='2019-12-31', endDate='2022-12-31', frequency='daily')
    #Convert to pandas dataframe
    df = pd.DataFrame(historical_prices)
    df["ticker"] = ticker
    return df

def load_csv(df, ticker):
    #GCS Config
    client = storage.Client()
    bucket = client.get_bucket('data_lake_stocks-data-pipeline')
    #Load dataframe to GCS as .csv
    bucket.blob(f'{ticker} Three Year Daily Price History').upload_from_string(df.to_csv(), "S&P 500 Three Year Daily Price History")

def main():
    #Generate list of S&P 500 tickers
    ticker_list = gen_ticker_list()
    #Add SPY index
    ticker_list.append('SPY')
    for ticker in ticker_list:
        #Prepare datafame of stockdata based on ticker list
        df = prepare_csv(ticker)
        #load df to GCS
        load_csv(df, ticker)
    
if __name__ == "__main__":
    main()
    
