## Configs

In [None]:
offset = 0
limit = 3000
period = 'max' # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max

## Download all NASDAQ traded symbols

In [None]:
import pandas as pd
import os
import yfinance as yf
import contextlib

In [None]:
download_etfs = 'N'

In [None]:

data_clean = pd.DataFrame()
if not os.path.isfile('data_clean.csv'):
    data = pd.read_csv("http://www.nasdaqtrader.com/dynamic/SymDir/nasdaqtraded.txt", sep='|')
    data_clean = data[data['Test Issue'] == 'N']
    data_clean.to_csv('data_clean.csv', index=False)
else:
    data_clean = pd.read_csv('data_clean.csv')

print(data_clean.head())

data_clean = data_clean[data_clean['ETF'] == download_etfs]

symbols = data_clean['NASDAQ Symbol'].tolist()
print('total number of symbols traded = {}'.format(len(symbols)))

## Download Historic data

In [None]:
!mkdir hist
!mkdir invalid

In [None]:
def get_symbols(dir):
    return {filename.replace('.csv', '') for filename in os.listdir(dir)}
    
known_good = get_symbols("hist")
known_bad = get_symbols("invalid")

In [None]:
%%time

limit = limit if limit else len(symbols)
end = min(offset + limit, len(symbols))
is_valid = [False] * len(symbols)

data = pd.DataFrame()
# force silencing of verbose API
with open(os.devnull, 'w') as devnull:
    with contextlib.redirect_stdout(devnull):
        for i, symbol in enumerate(symbols):
            if symbol in known_good:
                is_valid[i] = True
                continue

            if symbol in known_bad:
                continue

            try:
                data = yf.download(symbol, period=period, threads=4, progress=False, auto_adjust=True)
            except Exception as exp:
                print(f"failed to download {symbol}")

            if len(data.index) == 0:
                data.to_csv(f"invalid/{symbol}.csv")
                continue

            is_valid[i] = True
            data = data.reset_index()
            data.columns = data.columns.droplevel(level=1)
            data.to_csv(f"hist/{symbol}.csv")

print('Total number of valid symbols downloaded = {}'.format(sum(is_valid)))

In [None]:
valid_data = data_clean[is_valid]
valid_data.to_csv('symbols_valid_meta.csv', index=False)

## Separating ETFs and Stocks

In [None]:
!mkdir stocks
!mkdir etfs

In [None]:
etfs = valid_data[valid_data['ETF'] == 'Y']['NASDAQ Symbol'].tolist()
stocks = valid_data[valid_data['ETF'] == 'N']['NASDAQ Symbol'].tolist()

In [None]:
import shutil
from os.path import join

def move_symbols(symbols, dest):
    for s in symbols:
        filename = '{}.csv'.format(s)
        shutil.move(join('hist', filename), join(dest, filename))
        
move_symbols(etfs, "etfs")
move_symbols(stocks, "stocks")

In [None]:
!rmdir hist