In [53]:
import pandas as pd
import yfinance as yf
import os
import time

def fetch_stock_data_to_csv(ticker_list_file:str, ticker_output_path:str, ticker_err_list_file:str, max_ticker_retreival:int=100000):
    ticker_list = pd.read_csv(ticker_list_file)
    #create the output directory if it does not exist
    if not os.path.exists(ticker_output_path):
        os.mkdir(ticker_output_path)
    #create the error list file if it does not exist
    if not os.path.exists(ticker_err_list_file):
        ticker_err_list = pd.DataFrame(columns=['Symbol','Error'])
        ticker_err_list.to_csv(ticker_err_list_file)
    else:
        ticker_err_list = pd.read_csv(ticker_err_list_file)
    downloaded = 0
    err_cnt = 0
    suc = 0
    #iterate through the list of tickers and download the data, limit the number of tickers to download to 100000
    for row in ticker_list.iterrows():
        if suc >= max_ticker_retreival:
            print('Reached max ticker retreival {}/{}'.format(suc,max_ticker_retreival))
            break
        #check if we already downlaoded the data,if not then download it
        if os.path.exists('{}/{}.csv'.format(ticker_output_path,row[1]['Symbol'])):
            downloaded += 1
            print('Already {}/{} downloaded {}/{}'.format(suc,max_ticker_retreival,ticker_output_path,row[1]['Symbol']))
            continue
        elif ticker_err_list[ticker_err_list['Symbol']==row[1]['Symbol']].empty == False:
            print('Already {}/{} error      {}/{}'.format(suc,max_ticker_retreival,ticker_output_path,row[1]['Symbol']))
        else:   
            time.sleep(1)    
            try:
                #measure time to download data
                start = time.time()
                data = yf.download(row[1]['Symbol'])
                end = time.time()
                
            except:
                #use pd.concat to append to the dataframe
                ticker_err_list = pd.concat([ticker_err_list,pd.DataFrame({'Symbol':[row[1]['Symbol']],'Error':['Error downloading data']})],ignore_index=True)
                print('Error downloading data for {}'.format(row[1]['Symbol']))
                err_cnt += 1
                continue
            if(data.empty):
                #use pd.concat to append to the dataframe
                ticker_err_list = pd.concat([ticker_err_list,pd.DataFrame({'Symbol':[row[1]['Symbol']],'Error':['No data']})],ignore_index=True)
                err_cnt += 1
                print('No data for {}'.format(row[1]['Symbol']))
                continue
            data.to_csv('{}/{}.csv'.format(ticker_output_path,row[1]['Symbol']))
            suc += 1
            print('Downloaded {}: '.format(row[1]['Symbol'],end-start,data.size))
            #sleep for 1 second to avoid getting blocked by yahoo finance
            
    print('Downloaded {} checked {} errors {}'.format(suc,downloaded,err_cnt))
    ticker_err_list.to_csv(ticker_err_list_file)


In [None]:
#download the data for AMEX
fetch_stock_data_to_csv('data/AMEX/AMEX-TICKER-LIST-20230306.csv','data/AMEX/','data/AMEX/AMEX-TICKER-ERR-LIST-20230306.csv',100000)

In [54]:
#download the data for NASDAQ
fetch_stock_data_to_csv('data/NASDAQ/NASDAQ-TICKER-LIST-20230306.csv','data/NASDAQ', 'data/NASDAQ/NASDAQ-TICKER-ERR-LIST-20230306.csv')

Already 0/100000 downloaded data/NASDAQ/AACG
Already 0/100000 downloaded data/NASDAQ/AACI
Already 0/100000 downloaded data/NASDAQ/AACIU
Already 0/100000 downloaded data/NASDAQ/AACIW
Already 0/100000 downloaded data/NASDAQ/AADI
Already 0/100000 downloaded data/NASDAQ/AAL
Already 0/100000 downloaded data/NASDAQ/AAME
Already 0/100000 downloaded data/NASDAQ/AAOI
Already 0/100000 downloaded data/NASDAQ/AAON
Already 0/100000 downloaded data/NASDAQ/AAPL
Already 0/100000 downloaded data/NASDAQ/AAWW
Already 0/100000 downloaded data/NASDAQ/ABCB
Already 0/100000 downloaded data/NASDAQ/ABCL
Already 0/100000 downloaded data/NASDAQ/ABCM
Already 0/100000 downloaded data/NASDAQ/ABEO
Already 0/100000 downloaded data/NASDAQ/ABIO
Already 0/100000 downloaded data/NASDAQ/ABNB
Already 0/100000 downloaded data/NASDAQ/ABOS
Already 0/100000 downloaded data/NASDAQ/ABSI
Already 0/100000 downloaded data/NASDAQ/ABST
Already 0/100000 downloaded data/NASDAQ/ABUS
Already 0/100000 downloaded data/NASDAQ/ABVC
Already 0

In [55]:
#download the data for NYSE
ticker_list_file = 'data/NYSE/NYSE-TICKER-LIST-20230307.csv'
ticker_output_path = 'data/NYSE'
ticker_err_list_file = 'data/NYSE/NYSE-TICKER-ERR-LIST-20230307.csv'

fetch_stock_data_to_csv(ticker_list_file, ticker_output_path, ticker_err_list_file, 100)

Already 0/100 downloaded data/NYSE/A
Already 0/100 downloaded data/NYSE/AA
Already 0/100 downloaded data/NYSE/AAC
Already 0/100 downloaded data/NYSE/AAIC
Already 0/100 error      data/NYSE/AAIC^B
Already 0/100 error      data/NYSE/AAIC^C
Already 0/100 downloaded data/NYSE/AAIN
Already 0/100 error      data/NYSE/AAM^A
Already 0/100 error      data/NYSE/AAM^B
Already 0/100 downloaded data/NYSE/AAN
Already 0/100 downloaded data/NYSE/AAP
Already 0/100 downloaded data/NYSE/AAT
Already 0/100 downloaded data/NYSE/AB
Already 0/100 downloaded data/NYSE/ABB
Already 0/100 downloaded data/NYSE/ABBV
Already 0/100 downloaded data/NYSE/ABC
Already 0/100 downloaded data/NYSE/ABEV
Already 0/100 downloaded data/NYSE/ABG
Already 0/100 downloaded data/NYSE/ABM
Already 0/100 downloaded data/NYSE/ABR
Already 0/100 error      data/NYSE/ABR^D
Already 0/100 error      data/NYSE/ABR^E
Already 0/100 error      data/NYSE/ABR^F
Already 0/100 downloaded data/NYSE/ABT
Already 0/100 downloaded data/NYSE/AC
Already 0/