# Import package

In [1]:
#Install module
# %pip install --user --upgrade pandas
%pip install --user SQLAlchemy

Note: you may need to restart the kernel to use updated packages.


In [3]:
#Import package
import numpy as np
import pandas as pd
from pandas_datareader import data as web
import requests
from sqlalchemy import BigInteger, Numeric, Date, Text, String
import time
# import os
# from tqdm import tqdm
# from datetime import datetime
# import yfinance as yf
# import lxml
# from bs4 import BeautifulSoup #BeutifulSoup

# Data Preparation

# Some useful functions

In [4]:
#Get Alpha Vantage api key
def get_apikey(filename: str):
    with open(filename) as f:
        api_key = f.read().strip()
    f.close
    return api_key

In [5]:
#Get daily core stock data from Alpha Vantage from 2000-01 to now
def getDailyStockdata(ticker: str, outputsize = 'compact', datatype = 'json') -> pd.DataFrame:
    function = 'TIME_SERIES_DAILY'
    datatype = datatype
    outputsize = outputsize
    alpha_vantage_apikey = get_apikey(filename= 'dist/apikey_AlphaVantage')

    url = f'https://www.alphavantage.co/query?function={function}&symbol={ticker}&outputsize={outputsize}&apikey={alpha_vantage_apikey}&datatype={datatype}'
    r = requests.get(url)
    df = pd.DataFrame.from_dict(r.json()['Time Series (Daily)'],orient='index')
    df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    df = df.rename_axis('Date').reset_index()
    df.insert(loc = 0, column = 'Ticker', value = ticker, allow_duplicates=True)
    return df

In [6]:
def getCompanyOverview(ticker: str) -> dict:
    #Delay api call
    time.sleep(15)
    function = 'OVERVIEW'
    alpha_vantage_apikey = get_apikey(filename= 'dist/apikey_AlphaVantage')

    url = f'https://www.alphavantage.co/query?function={function}&symbol={ticker}&apikey={alpha_vantage_apikey}'
    r = requests.get(url)
    return r.json()

In [11]:
def connectToDatabase():
    import sqlite3
    conn = sqlite3.connect('StockData.db')
    cursor = conn.cursor()
    
    return conn, cursor

In [15]:
def isInDatabase(ticker: str, table: str) -> bool:
    conn, cursor = connectToDatabase()
    
    sql = f'''
    SELECT *
    FROM {table}
    WHERE Symbol = '{ticker}'
    '''
    conn.execute(sql)
    if len(cursor.fetchall()) < 1: return False
    else: return True


## Web Scrapping

This section will perform web scrapping to scrap all S&P500 tickers on wikipedia. https://en.wikipedia.org/wiki/List_of_S%26P_500_companies

We can simply use padnas to get the table from wiki.

In [5]:
#Store the S&P information in pandas dataframe
wiki_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies#Selected_changes_to_the_list_of_S&P_500_components"
tickers_df_list = pd.read_html(wiki_url)
tickers_df = tickers_df_list[0]
industry_dimension = tickers_df.loc[:, ['Symbol','GICS Sector','GICS Sub-Industry']]

In [6]:
industry_dimension.tail()

Unnamed: 0,Symbol,GICS Sector,GICS Sub-Industry
498,YUM,Consumer Discretionary,Restaurants
499,ZBRA,Information Technology,Electronic Equipment & Instruments
500,ZBH,Health Care,Health Care Equipment
501,ZION,Financials,Regional Banks
502,ZTS,Health Care,Pharmaceuticals


In [7]:
#Turn pandas dataframe to the list
tickers = tickers_df['Symbol'].values.tolist()
print(tickers)

['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ATVI', 'ADM', 'ADBE', 'ADP', 'AAP', 'AES', 'AFL', 'A', 'APD', 'AKAM', 'ALK', 'ALB', 'ARE', 'ALGN', 'ALLE', 'LNT', 'ALL', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AMCR', 'AMD', 'AEE', 'AAL', 'AEP', 'AXP', 'AIG', 'AMT', 'AWK', 'AMP', 'ABC', 'AME', 'AMGN', 'APH', 'ADI', 'ANSS', 'AON', 'APA', 'AAPL', 'AMAT', 'APTV', 'ACGL', 'ANET', 'AJG', 'AIZ', 'T', 'ATO', 'ADSK', 'AZO', 'AVB', 'AVY', 'AXON', 'BKR', 'BALL', 'BAC', 'BBWI', 'BAX', 'BDX', 'WRB', 'BRK.B', 'BBY', 'BIO', 'TECH', 'BIIB', 'BLK', 'BK', 'BA', 'BKNG', 'BWA', 'BXP', 'BSX', 'BMY', 'AVGO', 'BR', 'BRO', 'BF.B', 'BG', 'CHRW', 'CDNS', 'CZR', 'CPT', 'CPB', 'COF', 'CAH', 'KMX', 'CCL', 'CARR', 'CTLT', 'CAT', 'CBOE', 'CBRE', 'CDW', 'CE', 'CNC', 'CNP', 'CDAY', 'CF', 'CRL', 'SCHW', 'CHTR', 'CVX', 'CMG', 'CB', 'CHD', 'CI', 'CINF', 'CTAS', 'CSCO', 'C', 'CFG', 'CLX', 'CME', 'CMS', 'KO', 'CTSH', 'CL', 'CMCSA', 'CMA', 'CAG', 'COP', 'ED', 'STZ', 'CEG', 'COO', 'CPRT', 'GLW', 'CTVA', 'CSGP', 'COST', 'CTRA', 'CCI', 'CSX', 'C

## Download the financial data

In [None]:
#Download the financial data from Alpha vantage
# tickers_test = ['AAPL','MMM','ABT', 'ABBV', 'ACN', 'ATVI', 'ADM', 'ADBE', 'ADP', 'AAP', 'AES', 'AFL']

table = 'StockPrice'
stock_df = pd.DataFrame(columns=['Ticker','Date','Open', 'High', 'Low', 'Close', 'Volume'])
for ticker in tickers:
    try: stock_df = pd.concat([stock_df, getDailyStockdata(ticker,outputsize='full')], ignore_index = True)
    except: continue
stock_df.tail()

In [None]:
#Download the company overview from Alpha vantage

#Define the columns
temp_json = getCompanyOverview('IBM')
columns = [key for key in temp_json.keys()]

#Create a new dict to store the stock overview data
StockOverview_dict = {}
for column in columns:
    StockOverview_dict[column] = []

#Append the value from api result
table = 'StockOverview'

for ticker in tickers:
    if not isinDatabase(ticker,table):
        try: r = getCompanyOverview(ticker)
        except: continue
        for key, value in r.items():
            try: StockOverview_dict[key].append(value)
            except: continue
            
#Convert the dict to pandas dataframe
StockOverview_df = pd.DataFrame.from_dict(StockOverview_dict)
StockOverview_df.tail()

In [19]:
conn, cursor = connectToDatabase()

In [None]:
#Store the data into SQLite3 database
StockPrice_tableName = 'StockPrice'
StockOverview_tableName = 'StockOverview'

#Create a table called, StockPrice, in SQLite3
stock_df.to_sql(StockPrice_tableName,conn,if_exists='replace',
                index=True,index_label='Record')
StockOverview_df.to_sql(StockOverview_tableName,conn,if_exists='replace',

                        index=True,index_label='Record')
conn.commit()


In [29]:
#Store teh stockOverview to the SQLite3

StockOverview_tableName = 'StockOverview'
StockOverview_df.to_sql(StockOverview_tableName,conn,if_exists='replace',
                        index=False,index_label='Record')
conn.commit()