# Import package

In [16]:
#Install module
# %pip install --user --upgrade pandas
%pip install --user SQLAlchemy




In [17]:
#Import package
import numpy as np
import pandas as pd
from pandas_datareader import data as web
import requests
from sqlalchemy import BigInteger, Numeric, Date, Text, String

# import os
# from tqdm import tqdm
# from datetime import datetime
# import yfinance as yf
# import lxml
# from bs4 import BeautifulSoup #BeutifulSoup

# Data Preparation

# Some useful functions

In [2]:
#Get Alpha Vantage api key
def get_apikey(filename: str):
    with open(filename) as f:
        api_key = f.read().strip()
    f.close
    return api_key

In [3]:
#Get daily core stock data from Alpha Vantage from 2000-01 to now
def get_daily_stockdata(ticker:str, outputsize = 'compact', datatype = 'json'):
    function = 'TIME_SERIES_DAILY'
    datatype = datatype
    outputsize = outputsize
    alpha_vantage_apikey = get_apikey(filename= 'dist/apikey_AlphaVantage')

    url = f'https://www.alphavantage.co/query?function={function}&symbol={ticker}&outputsize={outputsize}&apikey={alpha_vantage_apikey}&datatype={datatype}'
    r = requests.get(url)
    df = pd.DataFrame.from_dict(r.json()['Time Series (Daily)'],orient='index')
    df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    df = df.rename_axis('Date').reset_index()
    df.insert(loc = 0, column = 'Ticker', value = ticker, allow_duplicates=True)
    return df

In [4]:
def get_company_overview(ticker:str):
    function = 'OVERVIEW'
    alpha_vantage_apikey = get_apikey(filename= 'dist/apikey_AlphaVantage')

    url = f'https://www.alphavantage.co/query?function={function}&symbol={ticker}&apikey={alpha_vantage_apikey}'
    r = requests.get(url)
    df = pd.DataFrame.from_dict(r.json(),orient='index')
    return r.json()

## Web Scrapping

This section will perform web scrapping to scrap all S&P500 tickers on wikipedia. https://en.wikipedia.org/wiki/List_of_S%26P_500_companies

We can simply use padnas to get the table from wiki.

In [10]:
#Store the S&P information in pandas dataframe
wiki_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies#Selected_changes_to_the_list_of_S&P_500_components"
tickers_df_list = pd.read_html(wiki_url)
tickers_df = tickers_df_list[0]
tickers_df.head()

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
0,MMM,3M,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1957-03-04,66740,1902
1,AOS,A. O. Smith,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,91142,1916
2,ABT,Abbott,Health Care,Health Care Equipment,"North Chicago, Illinois",1957-03-04,1800,1888
3,ABBV,AbbVie,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
4,ACN,Accenture,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989


In [11]:
#Turn pandas dataframe to the list
tickers = tickers_df['Symbol'].values.tolist()
print(tickers)

['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ATVI', 'ADM', 'ADBE', 'ADP', 'AAP', 'AES', 'AFL', 'A', 'APD', 'AKAM', 'ALK', 'ALB', 'ARE', 'ALGN', 'ALLE', 'LNT', 'ALL', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AMCR', 'AMD', 'AEE', 'AAL', 'AEP', 'AXP', 'AIG', 'AMT', 'AWK', 'AMP', 'ABC', 'AME', 'AMGN', 'APH', 'ADI', 'ANSS', 'AON', 'APA', 'AAPL', 'AMAT', 'APTV', 'ACGL', 'ANET', 'AJG', 'AIZ', 'T', 'ATO', 'ADSK', 'AZO', 'AVB', 'AVY', 'AXON', 'BKR', 'BALL', 'BAC', 'BBWI', 'BAX', 'BDX', 'WRB', 'BRK.B', 'BBY', 'BIO', 'TECH', 'BIIB', 'BLK', 'BK', 'BA', 'BKNG', 'BWA', 'BXP', 'BSX', 'BMY', 'AVGO', 'BR', 'BRO', 'BF.B', 'BG', 'CHRW', 'CDNS', 'CZR', 'CPT', 'CPB', 'COF', 'CAH', 'KMX', 'CCL', 'CARR', 'CTLT', 'CAT', 'CBOE', 'CBRE', 'CDW', 'CE', 'CNC', 'CNP', 'CDAY', 'CF', 'CRL', 'SCHW', 'CHTR', 'CVX', 'CMG', 'CB', 'CHD', 'CI', 'CINF', 'CTAS', 'CSCO', 'C', 'CFG', 'CLX', 'CME', 'CMS', 'KO', 'CTSH', 'CL', 'CMCSA', 'CMA', 'CAG', 'COP', 'ED', 'STZ', 'CEG', 'COO', 'CPRT', 'GLW', 'CTVA', 'CSGP', 'COST', 'CTRA', 'CCI', 'CSX', 'C

## Download the financial data

In [12]:
#Download the financial data from Alpha vantage
# tickers_test = ['AAPL','MMM','ABT', 'ABBV', 'ACN', 'ATVI', 'ADM', 'ADBE', 'ADP', 'AAP', 'AES', 'AFL']

stock_df = pd.DataFrame(columns=['Ticker','Date','Open', 'High', 'Low', 'Close', 'Volume'])
for ticker in tickers:
    try: stock_df = pd.concat([stock_df, get_daily_stockdata(ticker,outputsize='full')], ignore_index = True)
    except: continue

In [18]:
#Connect SQLite
import sqlite3

conn = sqlite3.connect('StockData.db')
stock_cursor = conn.cursor()

In [33]:
#Store the data into SQLite3 database
table_name = 'StockPrice'

#Create a table called, StockPrice, in SQLite3
stock_df.to_sql(table_name,conn,if_exists='replace',
                index=True,index_label='Record')
conn.commit()


In [31]:
#Select all column and limit to show top 100
sql = '''
SELECT * 
FROM StockPrice
LIMIT 100;  
'''

test = pd.read_sql(sql,conn)
test

Unnamed: 0,Record,Ticker,Date,Open,High,Low,Close,Volume
0,0,MMM,2023-07-28,111.8700,112.4100,111.0450,111.8800,2910759
1,1,MMM,2023-07-27,110.4500,113.1400,110.4500,111.1900,5613488
2,2,MMM,2023-07-26,109.8400,113.0700,109.8400,112.6400,5594986
3,3,MMM,2023-07-25,107.5500,110.8500,107.0092,109.8300,8778880
4,4,MMM,2023-07-24,104.5300,105.6041,103.9800,104.2700,3333380
...,...,...,...,...,...,...,...,...
95,95,MMM,2023-03-13,103.6700,104.6800,102.7300,103.5000,4211092
96,96,MMM,2023-03-10,105.5400,107.0000,103.7400,104.0600,3693856
97,97,MMM,2023-03-09,107.6600,108.5600,105.4200,105.5200,3463725
98,98,MMM,2023-03-08,107.4400,107.7700,106.1800,107.1600,3434665
