In [5]:
import requests
import json
import schedule
import time
import datetime
from dotenv import load_dotenv
import os
import yfinance as yf
from pymongo import MongoClient
import requests_cache
from requests import Session
from requests_cache import CacheMixin, SQLiteCache
from requests_ratelimiter import LimiterMixin, MemoryQueueBucket
from pyrate_limiter import Duration, RequestRate, Limiter
class CachedLimiterSession(CacheMixin, LimiterMixin, Session):
    pass

session = CachedLimiterSession(
    limiter=Limiter(RequestRate(2, Duration.SECOND*5)),  # max 2 requests per 5 seconds
    bucket_class=MemoryQueueBucket,
    backend=SQLiteCache("yfinance.cache"),
)

load_dotenv()
uri = os.getenv("MONGO_URL")
client = MongoClient(uri)
db = client["yahoo_finance"]

PROXY_SERVER=os.getenv("PROXY_SERVER")

session = requests_cache.CachedSession('yfinance.cache')
session.headers['User-agent'] = 'moneybot/1.0'


### Implement Rate Limiting, Request Caching, and Proxy Server to Prevent Blacklisting

In [6]:
file_path = "Colab Notebooks/company_tickers.json"

def read_company_tickers(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    return data

company_tickers = read_company_tickers(file_path)
tickers = [item.get("ticker") for item in company_tickers.values()]


### Get Tickers

In [None]:
def fetch_and_store_ticker_data(n1, n2):
    yf_col = db["tickers"]
    batch_start_time = time.time()
    for t in tickers[n1:n2]:
        try:
            df = yf.download(t, period="max")
            if df.empty:
                print(f"No data found for {t}, possible delisting")
                continue
            data_dict = df.reset_index().to_dict(orient='records')
            
            yf_col.insert_many(data_dict)
        except Exception as e:
            print(f"Error fetching data for {t}: {str(e)}")
            if "429" in str(e):
                    print("429 error detected, delaying to respect rate limit...")
                time.sleep(1.0)

    batch_end_time = time.time()
    batch_total_time = batch_end_time - batch_start_time
    print(f"Batch ran for {batch_total_time:.2f} seconds")

In [None]:
# start_time = time.time()
# num_batches = len(tickers/500)
# n1, n2 = 0, 499 
# for b in range(0,num_batches):
#     fetch_and_store_ticker_data(n1, n2)
#     n1 += 500    
#     n2 += 500    
# end_time = time.time()
# total_time = end_time - start_time
# print(f"Program ran for {total_time:.2f} seconds")

### Get Ticker

In [46]:
def fetch_and_store_history_data(db, n1, n2):
    yf_col = db["tickers"]
    sleep_time = 2.5
    batch_start_time = time.time()
    for t in tickers[n1:n2]:
        try:
            df = yf.Ticker(t).history(
               period="max",
               prepost=True,
               actions=True,
               proxy=PROXY_SERVER,
               keepna=True
            )
            if df.empty:
                print(f"No data found for {t}, possible delisting")
                continue
            data_dict = df.reset_index().to_dict(orient='records')
            
            for record in data_dict:
                ts = str(record['Date']).replace('-', '')
                record['_id'] = f'{t}_{ts[:8]}'  # Generate a unique ID for each record
            
            yf_col.insert_many(data_dict)
        except Exception as e:
            print(f"Error fetching data for {t}: {str(e)}")
            if "429" in str(e):
                if sleep_time<3.0:
                    sleep_time += 0.5
                    print("429 error detected, delaying to respect rate limit...")
                time.sleep(10.0)
        time.sleep(sleep_time)
    batch_end_time = time.time()
    batch_total_time = batch_end_time - batch_start_time
    print(f"Batch ran for {batch_total_time:.2f} seconds")

In [47]:
start_time = time.time()
num_batches = (len(tickers) // 500) + 1
# num_batches = 1
n1, n2 = 0, 499 
for b in range(0, num_batches):
    fetch_and_store_history_data(db, n1, n2)
    n1 += 500    
    n2 += 500    
end_time = time.time()
total_time = end_time - start_time
print(f"Program ran for {total_time:.2f} seconds")


Batch ran for 1347.77 seconds


TMIN: No timezone found, symbol may be delisted


No data found for TMIN, possible delisting


PMVC: No timezone found, symbol may be delisted


No data found for PMVC, possible delisting
Batch ran for 1346.01 seconds


CDAKQ: No timezone found, symbol may be delisted


No data found for CDAKQ, possible delisting


AIDG: No timezone found, symbol may be delisted


No data found for AIDG, possible delisting


Failed to get ticker 'ZLME' reason: Response ended prematurely
ZLME: No timezone found, symbol may be delisted


No data found for ZLME, possible delisting


GCGJ: No timezone found, symbol may be delisted


No data found for GCGJ, possible delisting
Error fetching data for BKHA: index 0 is out of bounds for axis 0 with size 0


ODRS: No timezone found, symbol may be delisted


No data found for ODRS, possible delisting


SSOK.: No timezone found, symbol may be delisted


No data found for SSOK., possible delisting


FLLZ: No timezone found, symbol may be delisted


No data found for FLLZ, possible delisting


MTRS: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for MTRS, possible delisting


MAAI: No timezone found, symbol may be delisted


No data found for MAAI, possible delisting


SCRP: No timezone found, symbol may be delisted


No data found for SCRP, possible delisting


VISTA: No timezone found, symbol may be delisted


No data found for VISTA, possible delisting


KBSG: No timezone found, symbol may be delisted


No data found for KBSG, possible delisting


OMG: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for OMG, possible delisting


SKUB: No timezone found, symbol may be delisted


No data found for SKUB, possible delisting


DOCO: No timezone found, symbol may be delisted


No data found for DOCO, possible delisting


YMAT: No timezone found, symbol may be delisted


No data found for YMAT, possible delisting


GELS: No timezone found, symbol may be delisted


No data found for GELS, possible delisting


RITR: No timezone found, symbol may be delisted


No data found for RITR, possible delisting


HCWC: No timezone found, symbol may be delisted


No data found for HCWC, possible delisting


JBS: Period 'max' is invalid, must be one of ['1d', '5d']
JPO: Period 'max' is invalid, must be one of ['1d', '5d']


No data found for JBS, possible delisting
No data found for JPO, possible delisting


NTCL: No timezone found, symbol may be delisted
XCH: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for NTCL, possible delisting
No data found for XCH, possible delisting


EHGO: No timezone found, symbol may be delisted
GGL: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for EHGO, possible delisting
No data found for GGL, possible delisting


IMSV: No timezone found, symbol may be delisted


No data found for IMSV, possible delisting


ORKT: No timezone found, symbol may be delisted


No data found for ORKT, possible delisting


TRUV: No timezone found, symbol may be delisted


No data found for TRUV, possible delisting


JBDI: No timezone found, symbol may be delisted


No data found for JBDI, possible delisting


HRLR: No timezone found, symbol may be delisted
PSII: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for HRLR, possible delisting
No data found for PSII, possible delisting


YAAS: No timezone found, symbol may be delisted


No data found for YAAS, possible delisting


ENGS: No timezone found, symbol may be delisted
RAN: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for ENGS, possible delisting
No data found for RAN, possible delisting


SPHL: No timezone found, symbol may be delisted


No data found for SPHL, possible delisting


BTOC: No timezone found, symbol may be delisted


No data found for BTOC, possible delisting


WOK: Period 'max' is invalid, must be one of ['1d', '5d']


No data found for WOK, possible delisting


FFFZ: No timezone found, symbol may be delisted


No data found for FFFZ, possible delisting


LNZNY: No timezone found, symbol may be delisted
XXC: Period 'max' is invalid, must be one of ['1d', '5d']
APRI: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for LNZNY, possible delisting
No data found for XXC, possible delisting
No data found for APRI, possible delisting


TURO: No timezone found, symbol may be delisted


No data found for TURO, possible delisting


OPSC: No timezone found, symbol may be delisted


No data found for OPSC, possible delisting


PITA: No timezone found, symbol may be delisted


No data found for PITA, possible delisting


RPGL: No timezone found, symbol may be delisted


No data found for RPGL, possible delisting


IZTC: No timezone found, symbol may be delisted


No data found for IZTC, possible delisting


KAPA: No timezone found, symbol may be delisted


No data found for KAPA, possible delisting


PLUT: No timezone found, symbol may be delisted


No data found for PLUT, possible delisting


WYHG: No timezone found, symbol may be delisted


No data found for WYHG, possible delisting


GAUZ: No timezone found, symbol may be delisted


No data found for GAUZ, possible delisting


PLRZ: No timezone found, symbol may be delisted
SKK: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)
SAG: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)
LSH: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for PLRZ, possible delisting
No data found for SKK, possible delisting
No data found for SAG, possible delisting
No data found for LSH, possible delisting


DLOG: No timezone found, symbol may be delisted
PRGY: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)
QQJ: Period 'max' is invalid, must be one of ['1d', '5d']
CAST: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for DLOG, possible delisting
No data found for PRGY, possible delisting
No data found for QQJ, possible delisting
No data found for CAST, possible delisting


MDRN: No timezone found, symbol may be delisted


No data found for MDRN, possible delisting


IOTR: No timezone found, symbol may be delisted


No data found for IOTR, possible delisting


ORIS: No timezone found, symbol may be delisted


No data found for ORIS, possible delisting


CGBS: No timezone found, symbol may be delisted


No data found for CGBS, possible delisting


HPAI: No timezone found, symbol may be delisted
CUPR: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for HPAI, possible delisting
No data found for CUPR, possible delisting


MJID: No timezone found, symbol may be delisted


No data found for MJID, possible delisting


SQVI: No timezone found, symbol may be delisted


No data found for SQVI, possible delisting


VITT: No timezone found, symbol may be delisted


No data found for VITT, possible delisting
Batch ran for 1213.39 seconds


JDZG: No timezone found, symbol may be delisted
WDSP: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for JDZG, possible delisting
No data found for WDSP, possible delisting


SYNSY: No timezone found, symbol may be delisted


No data found for SYNSY, possible delisting


LBRJ: No timezone found, symbol may be delisted


No data found for LBRJ, possible delisting


MCTR: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for MCTR, possible delisting


BLMZ: No timezone found, symbol may be delisted


No data found for BLMZ, possible delisting


SJA: No timezone found, symbol may be delisted


No data found for SJA, possible delisting


NVAM: No timezone found, symbol may be delisted


No data found for NVAM, possible delisting


DRJT: No timezone found, symbol may be delisted


No data found for DRJT, possible delisting


PGHL: No timezone found, symbol may be delisted


No data found for PGHL, possible delisting


FDSB: No timezone found, symbol may be delisted


No data found for FDSB, possible delisting


JTGEY: No timezone found, symbol may be delisted
SFHG: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for JTGEY, possible delisting
No data found for SFHG, possible delisting


LRTX: No timezone found, symbol may be delisted
OCP: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for LRTX, possible delisting
No data found for OCP, possible delisting


MKDW: No timezone found, symbol may be delisted


No data found for MKDW, possible delisting


SUKE: No timezone found, symbol may be delisted


No data found for SUKE, possible delisting


LZMH: No timezone found, symbol may be delisted


No data found for LZMH, possible delisting


ZENA: No timezone found, symbol may be delisted


No data found for ZENA, possible delisting


DDCIU: No timezone found, symbol may be delisted


No data found for DDCIU, possible delisting


GCDT: No timezone found, symbol may be delisted


No data found for GCDT, possible delisting


ABVE: No timezone found, symbol may be delisted
WCT: Period 'max' is invalid, must be one of ['1d', '5d']


No data found for ABVE, possible delisting
No data found for WCT, possible delisting


SOBO: No timezone found, symbol may be delisted


No data found for SOBO, possible delisting


OLSI: No timezone found, symbol may be delisted


No data found for OLSI, possible delisting


TLIH: No timezone found, symbol may be delisted


No data found for TLIH, possible delisting


CGTL: No timezone found, symbol may be delisted
DSY: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for CGTL, possible delisting
No data found for DSY, possible delisting


NVIQ: No timezone found, symbol may be delisted


No data found for NVIQ, possible delisting


PTHL: No timezone found, symbol may be delisted


No data found for PTHL, possible delisting


AKO.A: No timezone found, symbol may be delisted


No data found for AKO.A, possible delisting


HWEC: No timezone found, symbol may be delisted


No data found for HWEC, possible delisting


LVDW: No timezone found, symbol may be delisted


No data found for LVDW, possible delisting


CPGRA: No timezone found, symbol may be delisted


No data found for CPGRA, possible delisting


TOYO: No timezone found, symbol may be delisted


No data found for TOYO, possible delisting


BDMD: No timezone found, symbol may be delisted


No data found for BDMD, possible delisting


LLLL: No timezone found, symbol may be delisted


No data found for LLLL, possible delisting


PFAB: No timezone found, symbol may be delisted
JCY: Period 'max' is invalid, must be one of ['1d', '5d']


No data found for PFAB, possible delisting
No data found for JCY, possible delisting


LCCTY: No timezone found, symbol may be delisted


No data found for LCCTY, possible delisting


PSUS: No timezone found, symbol may be delisted


No data found for PSUS, possible delisting


UGCC: No timezone found, symbol may be delisted


No data found for UGCC, possible delisting


BOW: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for BOW, possible delisting


FLAI: No timezone found, symbol may be delisted


No data found for FLAI, possible delisting


RMSG: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for RMSG, possible delisting


JPL: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for JPL, possible delisting


AIMA: No timezone found, symbol may be delisted
LUD: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for AIMA, possible delisting
No data found for LUD, possible delisting


GPAT: No timezone found, symbol may be delisted


No data found for GPAT, possible delisting


RECT: No timezone found, symbol may be delisted


No data found for RECT, possible delisting


CNCL: No timezone found, symbol may be delisted


No data found for CNCL, possible delisting


CRGH: No timezone found, symbol may be delisted


No data found for CRGH, possible delisting


ADOB: No timezone found, symbol may be delisted


No data found for ADOB, possible delisting


BNEV: No timezone found, symbol may be delisted


No data found for BNEV, possible delisting


ALEH: No timezone found, symbol may be delisted


No data found for ALEH, possible delisting


TDTH: No timezone found, symbol may be delisted


No data found for TDTH, possible delisting


FLYE: No timezone found, symbol may be delisted
ADVB: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for FLYE, possible delisting
No data found for ADVB, possible delisting


GMHS: No timezone found, symbol may be delisted


No data found for GMHS, possible delisting


LXTM: No timezone found, symbol may be delisted
GEAR: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for LXTM, possible delisting
No data found for GEAR, possible delisting


KDLY: No timezone found, symbol may be delisted


No data found for KDLY, possible delisting


JLJT: No timezone found, symbol may be delisted


No data found for JLJT, possible delisting


FBGL: No timezone found, symbol may be delisted


No data found for FBGL, possible delisting


ADGM: No timezone found, symbol may be delisted


No data found for ADGM, possible delisting


FBLA: No timezone found, symbol may be delisted


No data found for FBLA, possible delisting


RFAI: No timezone found, symbol may be delisted
VEG: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-10)


No data found for RFAI, possible delisting
No data found for VEG, possible delisting
Batch ran for 1217.07 seconds
Batch ran for 1359.24 seconds


QTZM: No timezone found, symbol may be delisted


No data found for QTZM, possible delisting


BRIPF: No timezone found, symbol may be delisted


No data found for BRIPF, possible delisting


GTECW: No timezone found, symbol may be delisted


No data found for GTECW, possible delisting


PGDE: No timezone found, symbol may be delisted


No data found for PGDE, possible delisting


TCMEF: No timezone found, symbol may be delisted


No data found for TCMEF, possible delisting


ALIV: No timezone found, symbol may be delisted


No data found for ALIV, possible delisting


KLDIW: No timezone found, symbol may be delisted


No data found for KLDIW, possible delisting


EPDU: No timezone found, symbol may be delisted


No data found for EPDU, possible delisting


PMVCD: No timezone found, symbol may be delisted


No data found for PMVCD, possible delisting


ZTOEF: No timezone found, symbol may be delisted


No data found for ZTOEF, possible delisting


ICDX: No timezone found, symbol may be delisted


No data found for ICDX, possible delisting


GLEI: No timezone found, symbol may be delisted


No data found for GLEI, possible delisting


ADRTW: No timezone found, symbol may be delisted


No data found for ADRTW, possible delisting


TANAF: No timezone found, symbol may be delisted


No data found for TANAF, possible delisting


ICCRW: No timezone found, symbol may be delisted


No data found for ICCRW, possible delisting


MINR: No timezone found, symbol may be delisted


No data found for MINR, possible delisting


BAKR: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-11)


No data found for BAKR, possible delisting


ALMP: No timezone found, symbol may be delisted


No data found for ALMP, possible delisting


OFSWF: No timezone found, symbol may be delisted


No data found for OFSWF, possible delisting
Batch ran for 1284.74 seconds


HSDTW: No timezone found, symbol may be delisted


No data found for HSDTW, possible delisting


IHETW: No timezone found, symbol may be delisted


No data found for IHETW, possible delisting


AERGP: No timezone found, symbol may be delisted


No data found for AERGP, possible delisting


CRSLF: No timezone found, symbol may be delisted


No data found for CRSLF, possible delisting


DESLF: No timezone found, symbol may be delisted


No data found for DESLF, possible delisting


TMEF: No timezone found, symbol may be delisted


No data found for TMEF, possible delisting


BKFPF: No timezone found, symbol may be delisted


No data found for BKFPF, possible delisting


BKFDF: No timezone found, symbol may be delisted


No data found for BKFDF, possible delisting


BRFPF: No timezone found, symbol may be delisted


No data found for BRFPF, possible delisting


BKFOF: No timezone found, symbol may be delisted


No data found for BKFOF, possible delisting


BRCFF: No timezone found, symbol may be delisted


No data found for BRCFF, possible delisting


FIISP: No timezone found, symbol may be delisted


No data found for FIISP, possible delisting


GFSAY: No timezone found, symbol may be delisted


No data found for GFSAY, possible delisting


BNPZY: No timezone found, symbol may be delisted


No data found for BNPZY, possible delisting


RSGPY: No timezone found, symbol may be delisted


No data found for RSGPY, possible delisting


ATLEW: No timezone found, symbol may be delisted


No data found for ATLEW, possible delisting


XFOWW: No timezone found, symbol may be delisted


No data found for XFOWW, possible delisting


FRTSF: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-11)


No data found for FRTSF, possible delisting


FTPSF: No timezone found, symbol may be delisted


No data found for FTPSF, possible delisting


MPLNW: No timezone found, symbol may be delisted


No data found for MPLNW, possible delisting


FUPEY: No timezone found, symbol may be delisted


No data found for FUPEY, possible delisting


HCIIP: No timezone found, symbol may be delisted


No data found for HCIIP, possible delisting


SNLIF: No timezone found, symbol may be delisted


No data found for SNLIF, possible delisting


SNLFF: No timezone found, symbol may be delisted


No data found for SNLFF, possible delisting


RGPX: No timezone found, symbol may be delisted


No data found for RGPX, possible delisting


CWLXF: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-11)


No data found for CWLXF, possible delisting


EEXAP: No timezone found, symbol may be delisted


No data found for EEXAP, possible delisting


AYWWF: No timezone found, symbol may be delisted


No data found for AYWWF, possible delisting


GLCP: No timezone found, symbol may be delisted


No data found for GLCP, possible delisting


BAZNF: No timezone found, symbol may be delisted


No data found for BAZNF, possible delisting


PSPX: No timezone found, symbol may be delisted


No data found for PSPX, possible delisting


NSA-PB: No timezone found, symbol may be delisted


No data found for NSA-PB, possible delisting


GRFGF: No timezone found, symbol may be delisted


No data found for GRFGF, possible delisting


CSSEZ: No timezone found, symbol may be delisted


No data found for CSSEZ, possible delisting


MQMNW: No timezone found, symbol may be delisted


No data found for MQMNW, possible delisting


IFN-RT: No timezone found, symbol may be delisted


No data found for IFN-RT, possible delisting
Batch ran for 1252.14 seconds


NUWEW: No timezone found, symbol may be delisted


No data found for NUWEW, possible delisting


MCKPF: No timezone found, symbol may be delisted


No data found for MCKPF, possible delisting


SNTUF: No timezone found, symbol may be delisted


No data found for SNTUF, possible delisting


PLSAY: No timezone found, symbol may be delisted


No data found for PLSAY, possible delisting


CTRVP: Period 'max' is invalid, must be one of ['1d', '5d']


No data found for CTRVP, possible delisting


TDDWW: No timezone found, symbol may be delisted


No data found for TDDWW, possible delisting


TEOF: No timezone found, symbol may be delisted


No data found for TEOF, possible delisting


GFRWF: No timezone found, symbol may be delisted


No data found for GFRWF, possible delisting


ICRL: No timezone found, symbol may be delisted


No data found for ICRL, possible delisting


PAAPU: No timezone found, symbol may be delisted


No data found for PAAPU, possible delisting


NXPT: No timezone found, symbol may be delisted


No data found for NXPT, possible delisting


FEAV: No timezone found, symbol may be delisted


No data found for FEAV, possible delisting


STSR: No timezone found, symbol may be delisted


No data found for STSR, possible delisting


HGTPW: No timezone found, symbol may be delisted


No data found for HGTPW, possible delisting


AENPP: No timezone found, symbol may be delisted


No data found for AENPP, possible delisting


MKZR: No timezone found, symbol may be delisted


No data found for MKZR, possible delisting


KUSA: No timezone found, symbol may be delisted


No data found for KUSA, possible delisting


MSTLW: No timezone found, symbol may be delisted


No data found for MSTLW, possible delisting


GERNW: No timezone found, symbol may be delisted


No data found for GERNW, possible delisting


SENEL: No timezone found, symbol may be delisted


No data found for SENEL, possible delisting
Batch ran for 1274.51 seconds


BRFFF: Period 'max' is invalid, must be one of ['1d', '5d']


No data found for BRFFF, possible delisting


CEAI: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-11)


No data found for CEAI, possible delisting


RAJAF: No timezone found, symbol may be delisted


No data found for RAJAF, possible delisting


PPLOF: No timezone found, symbol may be delisted


No data found for PPLOF, possible delisting


TACPF: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-11)


No data found for TACPF, possible delisting


QLUNF: No timezone found, symbol may be delisted


No data found for QLUNF, possible delisting


JMKJD: No timezone found, symbol may be delisted


No data found for JMKJD, possible delisting


TCENF: No timezone found, symbol may be delisted


No data found for TCENF, possible delisting


TCNCF: No timezone found, symbol may be delisted


No data found for TCNCF, possible delisting


GDHLF: No timezone found, symbol may be delisted


No data found for GDHLF, possible delisting


APPDW: No timezone found, symbol may be delisted


No data found for APPDW, possible delisting


BMOLF: No timezone found, symbol may be delisted


No data found for BMOLF, possible delisting


SSPFF: No price data found, symbol may be delisted (1d 1925-06-04 -> 2024-05-11)


No data found for SSPFF, possible delisting


DMTKW: No timezone found, symbol may be delisted


No data found for DMTKW, possible delisting


TTNPW: No timezone found, symbol may be delisted


No data found for TTNPW, possible delisting


MDLVY: No timezone found, symbol may be delisted


No data found for MDLVY, possible delisting


SAAYY: No timezone found, symbol may be delisted


No data found for SAAYY, possible delisting
Batch ran for 892.38 seconds
Program ran for 27825.08 seconds


### Get Actions

In [10]:
def fetch_and_store_actions_data(db):
    yf_col = db["actions"]
    sleep_time = 2.5
    batch_start_time = time.time()
    for t in tickers:
        try:
            df = yf.Ticker(t).get_actions(
               proxy=PROXY_SERVER
            )
            if df.empty:
                print(f"No data found for {t}, possible delisting")
                continue
            data_dict = df.reset_index().to_dict(orient='records')
            
            for record in data_dict:
                ts = str(record['Date']).replace('-', '')
                record['_id'] = f'{t}_ACTN_{ts[:8]}'  # Generate a unique ID for each record
            
            yf_col.insert_many(data_dict)
        except Exception as e:
            print(f"Error fetching data for {t}: {str(e)}")
            if "429" in str(e):
                if sleep_time<3.0:
                    sleep_time += 0.5
                    print("429 error detected, delaying to respect rate limit...")
                time.sleep(10.0)
        time.sleep(sleep_time)

    batch_end_time = time.time()
    batch_total_time = batch_end_time - batch_start_time
    print(f"Batch ran for {batch_total_time:.2f} seconds")

In [11]:
fetch_and_store_actions_data(db)


Error fetching data for MSFT: get_actions() got an unexpected keyword argument 'period'
Error fetching data for AAPL: get_actions() got an unexpected keyword argument 'period'


KeyboardInterrupt: 

### Get Dividends

In [None]:
def fetch_and_store_dividend_data(db, n1, n2):
    yf_col = db["dividends"]
    batch_start_time = time.time()
    for t in tickers[n1:n2]:
        try:
            df = yf.Ticker(t).get_dividends(
               period="max",
               prepost=True,
               proxy=PROXY_SERVER,
               keepna=True
            )
            if df.empty:
                print(f"No data found for {t}, possible delisting")
                continue
            data_dict = df.reset_index().to_dict(orient='records')
            
            for record in data_dict:
                ts = str(record['Date']).replace('-', '')
                record['_id'] = f'{t}_DVDND_{ts[:8]}'  # Generate a unique ID for each record
            
            yf_col.insert_many(data_dict)
        except Exception as e:
            print(f"Error fetching data for {t}: {str(e)}")
            if "429" in str(e):
                if sleep_time<3.0:
                    sleep_time += 0.5
                    print("429 error detected, delaying to respect rate limit...")
                time.sleep(10.0)
        time.sleep(sleep_time)

    batch_end_time = time.time()
    batch_total_time = batch_end_time - batch_start_time
    print(f"Batch ran for {batch_total_time:.2f} seconds")

In [None]:
start_time = time.time()
num_batches = (len(tickers) // 500) + 1
# num_batches = 1
n1, n2 = 0, 499 
for b in range(0, num_batches):
    fetch_and_store_dividend_data(db, n1, n2)
    n1 += 500    
    n2 += 500    
end_time = time.time()
total_time = end_time - start_time
print(f"Program ran for {total_time:.2f} seconds")

### Get Splits

In [None]:
def fetch_and_store_split_data(db, n1, n2):
    yf_col = db["splits"]
    batch_start_time = time.time()
    for t in tickers[n1:n2]:
        try:
            df = yf.Ticker(t).get_splits(
               period="max",
               prepost=True,
               proxy=PROXY_SERVER
            )
            if df.empty:
                print(f"No data found for {t}, possible delisting")
                continue
            data_dict = df.reset_index().to_dict(orient='records')
            
            for record in data_dict:
                ts = str(record['Date']).replace('-', '')
                record['_id'] = f'{t}_SPLT_{ts[:8]}'  # Generate a unique ID for each record
            
            yf_col.insert_many(data_dict)
        except Exception as e:
            print(f"Error fetching data for {t}: {str(e)}")
            if "429" in str(e):
                if sleep_time<3.0:
                    sleep_time += 0.5
                    print("429 error detected, delaying to respect rate limit...")
                time.sleep(10.0)
        time.sleep(sleep_time)

    batch_end_time = time.time()
    batch_total_time = batch_end_time - batch_start_time
    print(f"Batch ran for {batch_total_time:.2f} seconds")

In [None]:
start_time = time.time()
num_batches = (len(tickers) // 500) + 1
# num_batches = 1
n1, n2 = 0, 499 
for b in range(0, num_batches):
    fetch_and_store_split_data(db, n1, n2)
    n1 += 500    
    n2 += 500    
end_time = time.time()
total_time = end_time - start_time
print(f"Program ran for {total_time:.2f} seconds")

### Get Balance Sheet

In [82]:
def fetch_and_store_balance_sheet_data(db):
    yf_col = db["balance_sheets"]
    sleep_time = 2.5

    
    for t in tickers:
        try:
            ticker_obj = yf.Ticker(t)
            df = ticker_obj.get_balance_sheet(proxy=PROXY_SERVER)
            if df.empty:
                print(f"No data found for {t}, possible delisting")
                continue

            # Reset index to work with columns easily
            df = df.reset_index()
            df.columns = ['Metric'] + [x.strftime('%Y%m%d') for x in df.columns[1:]]  # Convert Timestamps to string
            
            data_dicts = []
            for _, row in df.iterrows():
                for date in df.columns[1:]:
                    data_dict = {
                        # '_id': f'{t}_BAL_{date}',  # Unique ID per ticker, metric, and date
                        'ticker': t,
                        'date': date,
                        'metric': row['Metric'],
                        'value': row[date]
                    }
                    data_dicts.append(data_dict)
            
            yf_col.insert_many(data_dicts)
        except Exception as e:
            print(f"Error fetching data for {t}: {str(e)}")
            if "429" in str(e):
                if sleep_time<3.0:
                    sleep_time += 0.5
                    print("429 error detected, delaying to respect rate limit...")
                time.sleep(10.0)
        time.sleep(sleep_time)


In [83]:
start_time = time.time()
fetch_and_store_balance_sheet_data(db) 
end_time = time.time()
total_time = end_time - start_time
print(f"Program ran for {total_time:.2f} seconds")

No data found for AVBH, possible delisting
No data found for DBE, possible delisting
No data found for FXY, possible delisting
No data found for LION, possible delisting
No data found for EAXR, possible delisting
No data found for BMBN, possible delisting
No data found for RFM, possible delisting
No data found for FOTB, possible delisting
No data found for BAYA, possible delisting
No data found for DWNX, possible delisting
No data found for REDW, possible delisting
No data found for AMIX, possible delisting
No data found for GBUX, possible delisting
No data found for KSBI, possible delisting
No data found for BBUZ, possible delisting
No data found for TECTP, possible delisting
No data found for CPBI, possible delisting
No data found for WBQNL, possible delisting
No data found for FCCI, possible delisting
No data found for PCSV, possible delisting
No data found for CWPE, possible delisting
No data found for SLDX, possible delisting
No data found for PCST, possible delisting
No data foun

KeyboardInterrupt: 

### Get Cashflow

In [109]:
def fetch_and_store_cashflow_data(db):
    yf_col = db["cashflow"]
    sleep_time = 2.5

    for t in tickers:
        try:
            # Fetch cash flow data
            df = yf.Ticker(t).get_cashflow(proxy=PROXY_SERVER)
            if df.empty:
                print(f"No data found for {t}, possible delisting")
                continue

            # Reset index to work with columns easily
            df = df.reset_index()
            # Ensure datetime columns are formatted as string 'YYYYMMDD'
            df.columns = ['Metric'] + [str(col.date()) for col in df.columns[1:]]
            
            # Iterate over each date to create a separate document for each date
            for date in df.columns[1:]:
                data_dict = {'ticker': t, 'date': date}
                for _, row in df.iterrows():
                    data_dict[row['Metric']] = row[date]
                yf_col.insert_one(data_dict)

        except Exception as e:
            print(f"Error fetching data for {t}: {str(e)}")
            if "429" in str(e):
                if sleep_time<3.0:
                    sleep_time += 0.5
                    print("429 error detected, delaying to respect rate limit...")
                time.sleep(10.0)
        time.sleep(sleep_time)

In [110]:
fetch_and_store_cashflow_data(db)


No data found for SPY, possible delisting
No data found for QQQ, possible delisting
No data found for GLD, possible delisting
No data found for IAU, possible delisting
No data found for GBTC, possible delisting
No data found for DIA, possible delisting
No data found for MDY, possible delisting
No data found for USO, possible delisting
No data found for SLV, possible delisting
No data found for RBRK, possible delisting
No data found for DBC, possible delisting
No data found for EXG, possible delisting
No data found for ADX, possible delisting
No data found for ETY, possible delisting
No data found for USA, possible delisting
No data found for NUV, possible delisting
No data found for CLM, possible delisting
No data found for TY, possible delisting
No data found for BMEZ, possible delisting
No data found for PDO, possible delisting
No data found for ETV, possible delisting
No data found for GSG, possible delisting
No data found for BXMX, possible delisting
No data found for NFJ, possible

### Get Option Chain

In [135]:
def fetch_and_store_option_chain_data(db):
    yf_col = db["options"]
    sleep_time = 2.5
    count = 0
    for t in tickers:
        try:
            options = yf.Ticker(t).option_chain()
            calls = options.calls
            puts = options.puts

            # Process each call and put option
            for _, call in calls.iterrows():
                call_data = {
                    'ticker': t,
                    'type': 'call',
                    'contractSymbol': call['contractSymbol'],
                    'lastTradeDate': call['lastTradeDate'],
                    'strike': call['strike'],
                    'lastPrice': call['lastPrice'],
                    'bid': call['bid'],
                    'ask': call['ask'],
                    'change': call['change'],
                    'percentChange': call['percentChange'],
                    'volume': call['volume'],
                    'openInterest': call['openInterest'],
                    'impliedVolatility': call['impliedVolatility'],
                    'inTheMoney': call['inTheMoney'],
                    'contractSize': call['contractSize'],
                    'currency': call['currency']
                }
                yf_col.insert_one(call_data)  # Insert each call into MongoDB

            for _, put in puts.iterrows():
                put_data = {
                    'ticker': t,
                    'type': 'put',
                    'contractSymbol': put['contractSymbol'],
                    'lastTradeDate': put['lastTradeDate'],
                    'strike': put['strike'],
                    'lastPrice': put['lastPrice'],
                    'bid': put['bid'],
                    'ask': put['ask'],
                    'change': put['change'],
                    'percentChange': put['percentChange'],
                    'volume': put['volume'],
                    'openInterest': put['openInterest'],
                    'impliedVolatility': put['impliedVolatility'],
                    'inTheMoney': put['inTheMoney'],
                    'contractSize': put['contractSize'],
                    'currency': put['currency']
                }
                yf_col.insert_one(put_data)  # Insert each put into MongoDB
            count += 1

        except Exception as e:
            print(f"Error fetching data for {t}: {str(e)}")
            if "429" in str(e):
                return
        time.sleep(sleep_time)
    print(f'{count} distinct tickers inserted')


In [136]:
fetch_and_store_option_chain_data(db)


Error fetching data for LTMAY: 'calls'
Error fetching data for RTNTF: 'calls'
Error fetching data for EADSY: 'calls'
Error fetching data for AIQUY: 'calls'
Error fetching data for TOELY: 'calls'
Error fetching data for ABBNY: 'calls'
Error fetching data for CSLLY: 'calls'
Error fetching data for ATLKY: 'calls'
Error fetching data for BNPQY: 'calls'
Error fetching data for CFRUY: 'calls'
Error fetching data for SHECY: 'calls'
Error fetching data for GLNCY: 'calls'
Error fetching data for PXD: 'calls'
Error fetching data for ANZGY: 'calls'
Error fetching data for BAESY: 'calls'
Error fetching data for DNZOY: 'calls'
Error fetching data for CTA-PA: 'calls'
Error fetching data for CRARY: 'calls'
Error fetching data for OLCLY: 'calls'
Error fetching data for CODYY: 'calls'
Error fetching data for DKILY: 'calls'
Error fetching data for PPERY: 'calls'
Error fetching data for GBTC: 'calls'
Error fetching data for PTCAY: 'calls'
Error fetching data for WTKWY: 'calls'
Error fetching data for MRA