## Configs

In [53]:
offset = 0
limit = 10
period = 'max' # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max

## Download all NASDAQ traded symbols

In [54]:
import pandas as pd

data = pd.read_csv("http://www.nasdaqtrader.com/dynamic/SymDir/nasdaqtraded.txt", sep='|')
data_clean = data[data['Test Issue'] == 'N']
symbols = data_clean['NASDAQ Symbol'].tolist()
print('total number of symbols traded = {}'.format(len(symbols)))

total number of symbols traded = 11305


## Download Historic data

In [55]:
! pip install yfinance > /dev/null 2>&1
! mkdir stocks

mkdir: stocks: File exists


In [56]:
import yfinance as yf
import os, contextlib

In [60]:
# # Single Stock
s = "AAPL"
folder = "stocks"
os.makedirs(folder, exist_ok=True) 

data = yf.download(s, period='max', auto_adjust=False)

if len(data.index) > 0:
    if isinstance(data.columns, pd.MultiIndex):
        data.columns = data.columns.get_level_values(0)

    filename = os.path.join(folder, f"{s}.csv")
    data.to_csv(filename)
    print(f"✔ Downloaded {s} to {filename}")
else:
    print(f"✘ No data found for {s}")

[*********************100%***********************]  1 of 1 completed

✔ Downloaded AAPL to stocks/AAPL.csv





In [57]:
# %%time
import random
random_select = True

if random_select:
    symbols_sample = random.sample(symbols, k=limit)
else:
    limit = limit if limit else len(symbols)
    end = min(offset + limit, len(symbols))
    symbols_sample = symbols[offset:end]

is_valid = [False] * len(symbols_sample)

# force silencing of verbose API
with open(os.devnull, 'w') as devnull:
    with contextlib.redirect_stdout(devnull):
        for i, s in enumerate(symbols_sample):
            data = yf.download(s, period=period, auto_adjust=False)
            if len(data.index) == 0:
                continue
            
            if isinstance(data.columns, pd.MultiIndex):
                data.columns = data.columns.get_level_values(0)
        
            is_valid[i] = True
            data.to_csv('stocks/{}.csv'.format(s))

print('Total number of valid symbols downloaded = {}'.format(sum(is_valid)))

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['CNO-A']: YFTzMissingError('possibly delisted; no timezone found')
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Total number of valid symbols downloaded = 9





In [58]:
print("Length of symbols:", len(symbols))
print("Length of is_valid:", len(is_valid))

Length of symbols: 11305
Length of is_valid: 10


In [59]:
valid_symbols = [s for i, s in enumerate(symbols_sample) if is_valid[i]]
print(valid_symbols)

['PSX', 'FYX', 'USMF', 'GECC', 'BAFE', 'SNCY', 'OTTR', 'AP', 'DFH']
