# Import package

In [None]:
#Install module
# %pip install --user --upgrade pandas
%pip install --user SQLAlchemy

In [None]:
#Import package
import numpy as np
import pandas as pd
import requests
import time

# import os
# from tqdm import tqdm
# from datetime import datetime
# import yfinance as yf
# import lxml
# from bs4 import BeautifulSoup #BeutifulSoup

# Data Preparation

# Some useful functions

In [None]:
#Get Alpha Vantage api key
def get_apikey(filename: str):
    with open(filename) as f:
        api_key = f.read().strip()
    f.close
    return api_key

In [None]:
#Get daily core stock data from Alpha Vantage from 2000-01 to now
def getDailyStockdata(ticker: str, outputsize = 'compact', datatype = 'json'):
    function = 'TIME_SERIES_DAILY'
    datatype = datatype
    outputsize = outputsize
    alpha_vantage_apikey = get_apikey(filename= 'dist/apikey_AlphaVantage')

    url = f'https://www.alphavantage.co/query?function={function}&symbol={ticker}&outputsize={outputsize}&apikey={alpha_vantage_apikey}&datatype={datatype}'
    r = requests.get(url)
    df = pd.DataFrame.from_dict(r.json()['Time Series (Daily)'],orient='index')
    df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    df = df.rename_axis('Date').reset_index()
    df.insert(loc = 0, column = 'Ticker', value = ticker, allow_duplicates=True)
    return df

In [None]:
def getCompanyOverview(ticker: str):
    #Delay api call
    time.sleep(15)
    function = 'OVERVIEW'
    alpha_vantage_apikey = get_apikey(filename= 'dist/apikey_AlphaVantage')

    url = f'https://www.alphavantage.co/query?function={function}&symbol={ticker}&apikey={alpha_vantage_apikey}'
    r = requests.get(url)
    return r.json()

In [None]:
def connectToDatabase():
    import sqlite3
    conn = sqlite3.connect('StockData.db')
    cursor = conn.cursor()
    
    return conn, cursor

In [None]:
def isInDatabase(ticker: str, table: str, connection, cursor):
    sql = f'''
    SELECT *
    FROM {table}
    WHERE Ticker = '{ticker}';
    '''
    connection.execute(sql)
    if len(cursor.fetchall()) < 1: return False
    else: return True


In [None]:
def pd2sql(table: str, df: pd.Dataframe, connection, append=False):
    if append:
        df.to_sql(table, con = connection, if_exists='replace', index = True)
    else: df.to_sql(table, con = connection, if_exists='append', index = False)

## Web Scrapping

This section will perform web scrapping to scrap all S&P500 tickers on wikipedia. https://en.wikipedia.org/wiki/List_of_S%26P_500_companies

We can simply use padnas to get the table from wiki.

In [None]:
#Store the S&P information in pandas dataframe
wiki_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies#Selected_changes_to_the_list_of_S&P_500_components"
tickers_df_list = pd.read_html(wiki_url)
tickers_df = tickers_df_list[0]
industry_dimension = tickers_df.loc[:, ['Symbol','GICS Sector','GICS Sub-Industry']]

In [23]:
industry_dimension.tail()

Unnamed: 0,Symbol,GICS Sector,GICS Sub-Industry
498,YUM,Consumer Discretionary,Restaurants
499,ZBRA,Information Technology,Electronic Equipment & Instruments
500,ZBH,Health Care,Health Care Equipment
501,ZION,Financials,Regional Banks
502,ZTS,Health Care,Pharmaceuticals


In [None]:
#Turn pandas dataframe to the list
tickers = tickers_df['Symbol'].values.tolist()
print(tickers)

## Download the financial data

In [None]:
#Get connection and cursor from the SQLite3

conn, cursor = connectToDatabase()

In [None]:
#Define the columns and table name in SQLite3

table = 'StockPrice'
stock_df = pd.DataFrame(columns=['Ticker','Date','Open', 'High', 'Low', 'Close', 'Volume'])

In [None]:
#Download the financial data from Alpha vantage

for ticker in tickers:
    if not isInDatabase(ticker,table):
        try: stock_df = pd.concat([stock_df, getDailyStockdata(ticker,outputsize='full')], ignore_index = True)
        except:
            print(f'Next ticker: {ticker}') 
            continue

In [None]:
#Define the columns
temp_json = getCompanyOverview('IBM')
columns = [key for key in temp_json.keys()]

#Change the first column form Symbol to Ticker
columns[0] = 'Ticker'

In [None]:
#Create a new dict to store the stock overview data
StockOverview_dict = {}
for column in columns:
    StockOverview_dict[column] = []

In [None]:
#Download the company overview from Alpha vantage
#Append the value from api result
table = 'StockOverview'

for ticker in tickers:
    if not isInDatabase(ticker,table):
        try: r = getCompanyOverview(ticker)
        except: continue
        for key, value in r.items():
            try: StockOverview_dict[key].append(value)
            except: break

In [None]:
#Convert the dict to pandas dataframe

StockOverview_df = pd.DataFrame.from_dict(StockOverview_dict)
StockOverview_df.tail()

In [None]:
temp_json = getCompanyOverview('IBM')
columns = [key for key in temp_json.keys()]

In [None]:
conn, cursor = connectToDatabase()