In [1]:
#Import package
import numpy as np
import pandas as pd
import requests
import time
import sqlite3

# Some useful function

In [2]:
#Get Alpha Vantage api key
def get_apikey(filename: str):
    with open(filename) as f:
        api_key = f.read().strip()
    f.close
    return api_key

In [3]:
#Get daily core stock data from Alpha Vantage from 2000-01 to now
def getDailyStockdata(ticker: str, outputsize = 'compact', datatype = 'json'):
    function = 'TIME_SERIES_DAILY'
    datatype = datatype
    outputsize = outputsize
    alpha_vantage_apikey = get_apikey(filename= 'dist/apikey_AlphaVantage')

    url = f'https://www.alphavantage.co/query?function={function}&symbol={ticker}&outputsize={outputsize}&apikey={alpha_vantage_apikey}&datatype={datatype}'
    r = requests.get(url)
    df = pd.DataFrame.from_dict(r.json()['Time Series (Daily)'],orient='index')
    df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    df = df.rename_axis('Date').reset_index()
    df.insert(loc = 0, column = 'Ticker', value = ticker, allow_duplicates=True)
    return df

In [4]:
def getCompanyOverview(ticker: str):
    #Delay api call
    time.sleep(15)
    function = 'OVERVIEW'
    alpha_vantage_apikey = get_apikey(filename= 'dist/apikey_AlphaVantage')

    url = f'https://www.alphavantage.co/query?function={function}&symbol={ticker}&apikey={alpha_vantage_apikey}'
    r = requests.get(url)
    print(f'the overview of {ticker} get')
    return r.json()

In [5]:
def connectToDatabase():
    conn = sqlite3.connect('StockData.db')
    cursor = conn.cursor()
    
    print('SQLite3 is connected')
    return conn, cursor

In [6]:
def isInDatabase(ticker: str, table: str, cursor: sqlite3.Cursor):
    sql = f'''
    SELECT *
    FROM {table}
    WHERE Ticker = '{ticker}';
    '''
    result = cursor.execute(sql)
    if len(result.fetchall()) < 1: 
        print(f'{ticker} is not in the database of {table}')
        return False
    else:
        print(f'{ticker} is in the database of {table}') 
        return True

In [7]:
def pd2sql(table: str, df: pd.DataFrame, connection: sqlite3.Connection, append=False):
    if not append:
        df.to_sql(table, con = connection, if_exists='replace', index = False)
    else: df.to_sql(table, con = connection, if_exists='append', index = False)
    return print('Pandas to SQL finished')

# Main

In [8]:
#Create a connection of SQLite3
conn, cursor = connectToDatabase()

SQLite3 is connected


In [9]:
#Store the S&P information in pandas dataframe
wiki_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies#Selected_changes_to_the_list_of_S&P_500_components"
tickers_df_list = pd.read_html(wiki_url)
tickers_df = tickers_df_list[0]
industry_dimension = tickers_df.loc[:, ['Symbol','GICS Sector','GICS Sub-Industry']]
industry_dimension = industry_dimension.rename(columns={'Symbol': 'Ticker', 'GICS Sector': 'Sector', 'GICS Sub-Industry': 'SubIndustry'})

In [10]:
#Store the industry_dimension to the SQLite3
pd2sql('IndustryDimension',industry_dimension,conn)

Pandas to SQL finished


In [11]:
#Turn pandas dataframe to the list
tickers = tickers_df['Symbol'].values.tolist()
len(tickers)

503

In [12]:
#Define the columns and table name in SQLite3
table = 'StockPrice'
stock_df = pd.DataFrame(columns=['Ticker','Date','Open', 'High', 'Low', 'Close', 'Volume'])

In [13]:
#Download the financial data from Alpha vantage
for ticker in tickers:
    if isInDatabase(ticker, table, cursor):
        try: stock_df = pd.concat([stock_df, getDailyStockdata(ticker,outputsize='full')], ignore_index = True)
        except:
            print(f'Next ticker: {ticker}') 
            continue

MMM is in the database of StockPrice
AOS is in the database of StockPrice
ABT is in the database of StockPrice
ABBV is in the database of StockPrice
ACN is in the database of StockPrice
ATVI is not in the database of StockPrice
ADM is not in the database of StockPrice
ADBE is not in the database of StockPrice
ADP is not in the database of StockPrice
AAP is not in the database of StockPrice
AES is not in the database of StockPrice
AFL is not in the database of StockPrice
A is not in the database of StockPrice
APD is not in the database of StockPrice
AKAM is not in the database of StockPrice
ALK is not in the database of StockPrice
ALB is not in the database of StockPrice
ARE is not in the database of StockPrice
ALGN is not in the database of StockPrice
ALLE is not in the database of StockPrice
LNT is not in the database of StockPrice
ALL is not in the database of StockPrice
GOOGL is not in the database of StockPrice
GOOG is not in the database of StockPrice
MO is not in the database of 

In [14]:
#Define the columns
columns = ['Symbol', 'AssetType', 'Name', 'Description', 'CIK', 'Exchange', 'Currency', 'Country', 'Sector', 'Industry',
           'Address', 'FiscalYearEnd', 'LatestQuarter', 'MarketCapitalization', 'EBITDA', 'PERatio', 'PEGRatio', 'BookValue', 'DividendPerShare',
           'DividendYield', 'EPS', 'RevenuePerShareTTM', 'ProfitMargin', 'OperatingMarginTTM', 'ReturnOnAssetsTTM', 'ReturnOnEquityTTM',
           'RevenueTTM', 'GrossProfitTTM', 'DilutedEPSTTM', 'QuarterlyEarningsGrowthYOY', 'QuarterlyRevenueGrowthYOY', 'AnalystTargetPrice', 'TrailingPE', 'ForwardPE',
           'PriceToSalesRatioTTM', 'PriceToBookRatio', 'EVToRevenue', 'EVToEBITDA', 'Beta', '52WeekHigh', '52WeekLow', '50DayMovingAverage', '200DayMovingAverage',
           'SharesOutstanding', 'DividendDate', 'ExDividendDate']

In [15]:
#Create a new dict to store the stock overview data
StockOverview_dict = {}
table = 'StockOverview'
isAPICallOver = False

for column in columns:
    StockOverview_dict[column] = []

for ticker in tickers:    
    if isAPICallOver: break
    elif isInDatabase(ticker, table, cursor): continue
    else:
        r = getCompanyOverview(ticker)
        for key, value in r.items():
            try: StockOverview_dict[key].append(value)
            except: isAPICallOver = True

MMM is in the database of StockOverview
AOS is in the database of StockOverview
ABT is in the database of StockOverview
ABBV is in the database of StockOverview
ACN is in the database of StockOverview
ATVI is in the database of StockOverview
ADM is in the database of StockOverview
ADBE is in the database of StockOverview
ADP is in the database of StockOverview
AAP is in the database of StockOverview
AES is in the database of StockOverview
AFL is in the database of StockOverview
A is in the database of StockOverview
APD is in the database of StockOverview
AKAM is in the database of StockOverview
ALK is in the database of StockOverview
ALB is in the database of StockOverview
ARE is in the database of StockOverview
ALGN is in the database of StockOverview
ALLE is in the database of StockOverview
LNT is in the database of StockOverview
ALL is in the database of StockOverview
GOOGL is in the database of StockOverview
GOOG is in the database of StockOverview
MO is in the database of StockOve

## Explore the data

In [16]:
sql = '''
SELECT Date, StockPrice.Ticker, Sector, SubIndustry, Open, High, Low, Close
FROM StockPrice
LEFT JOIN IndustryDimension ON StockPrice.Ticker = IndustryDimension.Ticker
'''
StockPrice_df = pd.read_sql(sql=sql, con=conn, dtype={'Open': np.double,'High': np.double, 'Low':np.double,'Close':np.double})
StockPrice_df.head()

Unnamed: 0,Date,Ticker,Sector,SubIndustry,Open,High,Low,Close
0,2023-07-28,MMM,Industrials,Industrial Conglomerates,111.87,112.41,111.045,111.88
1,2023-07-27,MMM,Industrials,Industrial Conglomerates,110.45,113.14,110.45,111.19
2,2023-07-26,MMM,Industrials,Industrial Conglomerates,109.84,113.07,109.84,112.64
3,2023-07-25,MMM,Industrials,Industrial Conglomerates,107.55,110.85,107.0092,109.83
4,2023-07-24,MMM,Industrials,Industrial Conglomerates,104.53,105.6041,103.98,104.27


In [20]:
sql = '''
Select Ticker
From StockPrice
'''

StockPrice_df = pd.read_sql(sql=sql, con=conn)

Unnamed: 0,Ticker
0,MMM
1,MMM
2,MMM
3,MMM
4,MMM


In [17]:
StockPrice_df.describe()

Unnamed: 0,Open,High,Low,Close
count,116428.0,116428.0,116428.0,116428.0
mean,57.748691,58.374194,57.109222,57.760798
std,40.354628,40.715566,39.998251,40.365872
min,0.96,1.0,0.89,0.91
25%,33.05,33.44,32.66,33.07
50%,49.41,49.97,48.85,49.42
75%,73.8925,74.68,73.07,73.87
max,416.26,417.37,413.68,415.42
