In [19]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from google.cloud import storage


def scrape_data():
     #Using BeutifulSoup scrape the Wiki page with a table of all the S&P 500 companies.
    page = requests.get("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies#S&P_500_component_stocks")
    soup = BeautifulSoup(page.content, 'html.parser')
    table = soup.find_all('table')
    #Convert table to pandas dataframe
    df = pd.read_html(str(table))[0]
    #Make minor adjustments to pandas dataframe
    df = df.rename(columns={'SEC filings': 'SEC_Fillings', 'GICS Sector': 'GICS_Sector', 'GICS Sub-Industry' : 'GICS_Sub_Industry', 'Headquarters Location' : 'Headquarters_Location', 'Date first added' : 'Date_First_Added'})
    df.at[9,'CIK']= 1551152
    df['CIK'] = df['CIK'].astype('int')
    df.loc[len(df.index)] = ['SPY', 'S&P 500', 'N/A', 'N/A', 'N/A', 'N/A', '1957-01-01', 0, 1957]
    return df

#Load dataframe to GCS as .csv
def load_data(df):
    client = storage.Client()
    bucket = client.get_bucket('data_lake_stocks-data-pipeline')
    bucket.blob(f'S&P 500 Company Info').upload_from_string(df.to_csv(),'S&P 500 Company Info' )
    
def main():
    #prepare dataframe
    df = scrape_data()
    #load data to GCS
    load_data(df)
    
if __name__ == "__main__":
    main()

