In [None]:
import pandas as pd
import yfinance as yf
import os
import time

def fetch_stock_data_to_csv(ticker_list_file:str, ticker_output_path:str, ticker_err_list_file:str, max_ticker_retreival:int=100000):
    ticker_list = pd.read_csv(ticker_list_file)
    #create the output directory if it does not exist
    if not os.path.exists(ticker_output_path):
        os.mkdir(ticker_output_path)
    #create the error list file if it does not exist
    if not os.path.exists(ticker_err_list_file):
        ticker_err_list = pd.DataFrame(columns=['Symbol','Error'])
        ticker_err_list.to_csv(ticker_err_list_file)
    else:
        ticker_err_list = pd.read_csv(ticker_err_list_file)
    downloaded = 0
    err_cnt = 0
    suc = 0
    #iterate through the list of tickers and download the data, limit the number of tickers to download to 100000
    for row in ticker_list.iterrows():
        if suc >= max_ticker_retreival:
            print('Reached max ticker retreival {}/{}'.format(suc,max_ticker_retreival))
            break
        #check if we already downlaoded the data,if not then download it
        if os.path.exists('{}/{}.csv'.format(ticker_output_path,row[1]['Symbol'])):
            downloaded += 1
            print('Already {}/{} downloaded {}/{}'.format(suc,max_ticker_retreival,ticker_output_path,row[1]['Symbol']))
            continue
        elif ticker_err_list[ticker_err_list['Symbol']==row[1]['Symbol']].empty == False:
            print('Already {}/{} error      {}/{}'.format(suc,max_ticker_retreival,ticker_output_path,row[1]['Symbol']))
        else:   
            time.sleep(1)    
            try:
                #measure time to download data
                start = time.time()
                data = yf.download(row[1]['Symbol'])
                end = time.time()
                
            except:
                #use pd.concat to append to the dataframe
                ticker_err_list = pd.concat([ticker_err_list,pd.DataFrame({'Symbol':[row[1]['Symbol']],'Error':['Error downloading data']})],ignore_index=True)
                print('Error downloading data for {}'.format(row[1]['Symbol']))
                err_cnt += 1
                continue
            if(data.empty):
                #use pd.concat to append to the dataframe
                ticker_err_list = pd.concat([ticker_err_list,pd.DataFrame({'Symbol':[row[1]['Symbol']],'Error':['No data']})],ignore_index=True)
                err_cnt += 1
                print('No data for {}'.format(row[1]['Symbol']))
                continue
            data.to_csv('{}/{}.csv'.format(ticker_output_path,row[1]['Symbol']))
            suc += 1
            print('Downloaded {}: '.format(row[1]['Symbol'],end-start,data.size))
            #sleep for 1 second to avoid getting blocked by yahoo finance
            
    print('Downloaded {} checked {} errors {}'.format(suc,downloaded,err_cnt))
    ticker_err_list.to_csv(ticker_err_list_file)


In [None]:
#download the data for AMEX
fetch_stock_data_to_csv('data/AMEX/AMEX-TICKER-LIST-20230306.csv','data/AMEX/','data/AMEX/AMEX-TICKER-ERR-LIST-20230306.csv',100000)

In [None]:
#download the data for NASDAQ
fetch_stock_data_to_csv('data/NASDAQ/NASDAQ-TICKER-LIST-20230306.csv','data/NASDAQ', 'data/NASDAQ/NASDAQ-TICKER-ERR-LIST-20230306.csv')

In [None]:
#download the data for NYSE
ticker_list_file = 'data/NYSE/NYSE-TICKER-LIST-20230307.csv'
ticker_output_path = 'data/NYSE'
ticker_err_list_file = 'data/NYSE/NYSE-TICKER-ERR-LIST-20230307.csv'

fetch_stock_data_to_csv(ticker_list_file, ticker_output_path, ticker_err_list_file, 100)