In [1]:
#Import package
import numpy as np
import pandas as pd
import requests
import time
import sqlite3

# Some useful function

In [2]:
#Get Alpha Vantage api key
def get_apikey(filename: str):
    with open(filename) as f:
        api_key = f.read().strip()
    f.close
    return api_key

In [3]:
#Get daily core stock data from Alpha Vantage from 2000-01 to now
def getDailyStockdata(ticker: str, outputsize = 'compact', datatype = 'json'):
    function = 'TIME_SERIES_DAILY'
    datatype = datatype
    outputsize = outputsize
    alpha_vantage_apikey = get_apikey(filename= 'dist/apikey_AlphaVantage')

    url = f'https://www.alphavantage.co/query?function={function}&symbol={ticker}&outputsize={outputsize}&apikey={alpha_vantage_apikey}&datatype={datatype}'
    r = requests.get(url)
    df = pd.DataFrame.from_dict(r.json()['Time Series (Daily)'],orient='index')
    df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    df = df.rename_axis('Date').reset_index()
    df.insert(loc = 0, column = 'Ticker', value = ticker, allow_duplicates=True)
    return df

In [4]:
def getCompanyOverview(ticker: str):
    #Delay api call
    time.sleep(15)
    function = 'OVERVIEW'
    alpha_vantage_apikey = get_apikey(filename= 'dist/apikey_AlphaVantage')

    url = f'https://www.alphavantage.co/query?function={function}&symbol={ticker}&apikey={alpha_vantage_apikey}'
    r = requests.get(url)
    return r.json()

In [5]:
def connectToDatabase():
    conn = sqlite3.connect('StockData.db')
    cursor = conn.cursor()
    
    print('SQLite3 is connected')
    return conn, cursor

In [6]:
def isInDatabase(ticker: str, table: str, connection: sqlite3.Connection, cursor: sqlite3.Cursor):
    sql = f'''
    SELECT *
    FROM {table}
    WHERE Ticker = '{ticker}';
    '''
    connection.execute(sql)
    if len(cursor.fetchall()) < 1: 
        print(f'{ticker} is in the Database')
        return False
    else: 
        print(f'{ticker} is not in the Database')
        return True

In [7]:
def pd2sql(table: str, df: pd.DataFrame, connection: sqlite3.Connection, append=False):
    if not append:
        df.to_sql(table, con = connection, if_exists='replace', index = False)
    else: df.to_sql(table, con = connection, if_exists='append', index = False)
    return print('Pandas to SQL finished')

# Main

In [8]:
#Create a connection of SQLite3
conn,cursor = connectToDatabase()

SQLite3 is connected


In [21]:
#Store the S&P information in pandas dataframe
wiki_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies#Selected_changes_to_the_list_of_S&P_500_components"
tickers_df_list = pd.read_html(wiki_url)
tickers_df = tickers_df_list[0]
industry_dimension = tickers_df.loc[:, ['Symbol','GICS Sector','GICS Sub-Industry']]
industry_dimension = industry_dimension.rename(columns={'Symbol': 'Ticker', 'GICS Sector': 'Sector', 'GICS Sub-Industry': 'SubIndustry'})

In [22]:
#Store the industry_dimension to the SQLite3
pd2sql('IndustryDimension',industry_dimension,conn)

Pandas to SQL finished


In [10]:
#Turn pandas dataframe to the list
tickers = tickers_df['Symbol'].values.tolist()
len(tickers)

503

In [11]:
#Define the columns and table name in SQLite3
table = 'StockPrice'
stock_df = pd.DataFrame(columns=['Ticker','Date','Open', 'High', 'Low', 'Close', 'Volume'])

In [None]:
#Download the financial data from Alpha vantage
for ticker in tickers:
    if not isInDatabase(ticker, table, conn, cursor):
        try: stock_df = pd.concat([stock_df, getDailyStockdata(ticker,outputsize='full')], ignore_index = True)
        except:
            print(f'Next ticker: {ticker}') 
            continue

In [None]:
#Define the columns
temp_json = getCompanyOverview('IBM')
columns = [key for key in temp_json.keys()]

#Change the first column form Symbol to Ticker
columns[0] = 'Ticker'

In [1]:
#Create a new dict to store the stock overview data
StockOverview_dict = {}
table = 'StockOverview'
for column in columns:
    StockOverview_dict[column] = []

for ticker in tickers:
    if not isInDatabase(ticker, table, conn, cursor):
        try: r = getCompanyOverview(ticker)
        except: continue
        for key, value in r.items():
            try: StockOverview_dict[key].append(value)
            except: break

## Explore the data

In [29]:
sql = '''
SELECT Date, StockPrice.Ticker, Sector, SubIndustry, Open, High, Low, Close
FROM StockPrice
LEFT JOIN IndustryDimension ON StockPrice.Ticker = IndustryDimension.Ticker
'''
StockPrice_df = pd.read_sql(sql=sql, con=conn, dtype={'Open': np.double,'High': np.double, 'Low':np.double,'Close':np.double})
StockPrice_df.head()

Unnamed: 0,Date,Ticker,Sector,SubIndustry,Open,High,Low,Close
0,2023-07-28,MMM,Industrials,Industrial Conglomerates,111.87,112.41,111.045,111.88
1,2023-07-27,MMM,Industrials,Industrial Conglomerates,110.45,113.14,110.45,111.19
2,2023-07-26,MMM,Industrials,Industrial Conglomerates,109.84,113.07,109.84,112.64
3,2023-07-25,MMM,Industrials,Industrial Conglomerates,107.55,110.85,107.0092,109.83
4,2023-07-24,MMM,Industrials,Industrial Conglomerates,104.53,105.6041,103.98,104.27


In [30]:
StockPrice_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 116428 entries, 0 to 116427
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   Date         116428 non-null  object 
 1   Ticker       116428 non-null  object 
 2   Sector       116428 non-null  object 
 3   SubIndustry  116428 non-null  object 
 4   Open         116428 non-null  float64
 5   High         116428 non-null  float64
 6   Low          116428 non-null  float64
 7   Close        116428 non-null  float64
dtypes: float64(4), object(4)
memory usage: 7.1+ MB
