In [None]:
import pandas as pd
import yfinance as yf
import os
import time

def fetch_stock_data_to_csv(ticker_list_file:str, ticker_output_path:str, ticker_err_list_file:str, max_ticker_retreival:int=100000):
    ticker_list = pd.read_csv(ticker_list_file)
    #create the output directory if it does not exist
    if not os.path.exists(ticker_output_path):
        os.mkdir(ticker_output_path)
    #create the error list file if it does not exist
    if not os.path.exists(ticker_err_list_file):
        ticker_err_list = pd.DataFrame(columns=['Symbol','Error'])
        ticker_err_list.to_csv(ticker_err_list_file)
    else:
        ticker_err_list = pd.read_csv(ticker_err_list_file)

    #change symbol ^ to - for yahoo finance
    ticker_list['Symbol'] = ticker_list['Symbol'].str.replace('^','-')
    ticker_list['Symbol'] = ticker_list['Symbol'].str.replace('/','-')

    downloaded = 0
    err_cnt = 0
    suc = 0
    #iterate through the list of tickers and download the data, limit the number of tickers to download to 100000
    for row in ticker_list.iterrows():
        if suc >= max_ticker_retreival:
            print('{}/{} Reached max ticker retreival '.format(suc,max_ticker_retreival))
            break
        #check if we already downlaoded the data,if not then download it
        if os.path.exists('{}/{}.csv'.format(ticker_output_path,row[1]['Symbol'])):
            downloaded += 1
            print('{}/{} Already  downloaded {}/{}'.format(suc,max_ticker_retreival,ticker_output_path,row[1]['Symbol']))
            continue
        elif ticker_err_list[ticker_err_list['Symbol']==row[1]['Symbol']].empty == False:
            print('{}/{} Already  error      {}/{}'.format(suc,max_ticker_retreival,ticker_output_path,row[1]['Symbol']))
        else:   
            time.sleep(1)    
            try:
                #measure time to download data
                start = time.time()
                data = yf.download(row[1]['Symbol'])
                end = time.time()
                
            except:
                #use pd.concat to append to the dataframe
                ticker_err_list = pd.concat([ticker_err_list,pd.DataFrame({'Symbol':[row[1]['Symbol']],'Error':['Error downloading data']})])
                print('{}/{} Error downloading data for {}'.format(suc,max_ticker_retreival,row[1]['Symbol']))
                err_cnt += 1
                continue
            if(data.empty):
                #use pd.concat to append to the dataframe
                ticker_err_list = pd.concat([ticker_err_list,pd.DataFrame({'Symbol':[row[1]['Symbol']],'Error':['No data']})])
                err_cnt += 1
                print('{}/{} No data for {}/{}'.format(suc,max_ticker_retreival, ticker_output_path,row[1]['Symbol']))
                continue
            data.to_csv('{}/{}.csv'.format(ticker_output_path,row[1]['Symbol']))
            suc += 1
            print('{}/{} Downloaded:{}s '.format(suc,max_ticker_retreival,row[1]['Symbol'],end-start,data.size))
    ticker_err_list.to_csv(ticker_err_list_file)
    print('Downloaded {} checked {} errors {}'.format(suc,downloaded,err_cnt))


In [None]:
#download the data for AMEX
fetch_stock_data_to_csv('data/AMEX-TICKER-LIST-20230306.csv','data/AMEX','data/AMEX-TICKER-ERR-LIST-20230306.csv')

In [None]:
#download the data for NASDAQ
fetch_stock_data_to_csv('data/NASDAQ-TICKER-LIST-20230306.csv','data/NASDAQ', 'data/NASDAQ-TICKER-ERR-LIST-20230306.csv')

In [None]:
fetch_stock_data_to_csv('data/NYSE-TICKER-LIST-20230307.csv', 'data/NYSE', 'data/NYSE-TICKER-ERR-LIST-20230307.csv')

In [41]:
#load the data from txt file to dictionary
import pandas as pd
#load the data from txt file to dictionary
#sample {'AI':  'C3.ai,  Inc.',  'BA':  'Boeing  Company  (The)'}

with open('data/tmp.txt', 'r') as f:
    contents = f.read()
    data_dict = eval(contents)
#convert the dictionary to dataframe
data_df = pd.DataFrame.from_dict(data_dict, orient='index', columns=['Name'])
#drop the 'Name' column
data_df = data_df.drop(columns=['Name'])
data_df




AI
BA


In [None]:
import sqlite3
import os
import pandas as pd
def put_data_into_db(exchange:str, conn:sqlite3.Connection):
    #iterate through the csv files and load the data into the database
    for file in os.listdir('data/{}'.format(exchange)):
        if file.endswith('.csv'):
            print(exchange,'/',file, flush=True)
            #read the csv file
            df = pd.read_csv('data/{}/{}'.format(exchange,file))
            df['Ticker'] = file[:-4]
            df['Exchange'] = exchange
            #convert the Date column to datetime
            df['Date'] = pd.to_datetime(df['Date'])
            #set the Date and Ticker as the index
            df.set_index(['Date','Ticker'],inplace=True)
            #load the data into the database
            df.to_sql('stock_history',conn,if_exists='append')
            conn.commit()
    conn.execute("VACUUM")


In [None]:
#connect to the database
conn = sqlite3.connect('data/stock_data.db')
#create the table if not exists
conn.execute('''CREATE TABLE IF NOT EXISTS stock_history
            (Date DATE NOT NULL,
            Ticker TEXT NOT NULL,
            Open REAL,
            High REAL,
            Low REAL,
            Close REAL,
            'Adj Close' REAL,
            Volume REAL,
            Exchange TEXT NOT NULL,
            PRIMARY KEY (Date,Ticker));''')
conn.commit()

#put the data into the database

put_data_into_db('AMEX',conn)
put_data_into_db('NYSE',conn)
put_data_into_db('NASDAQ',conn)

#close the connection
conn.close()
        


In [None]:
#sample pulling data from the database

import sqlite3
import pandas as pd

conn = sqlite3.connect('data/stock_data.db')

#print the column names of the table
print(pd.read_sql_query("SELECT * FROM sqlite_master WHERE type='table'", conn))
#read the 1000 line data from the table
df = pd.read_sql_query("SELECT * FROM stock_history Where Ticker='ACLS' limit 100000", conn)
#print the first 5 rows of the data
pd.set_option('display.max_rows', 1000)
print(df.tail(10))
#close the connection
conn.close()