In [27]:
import pandas as pd
import re
import requests
from tqdm import tqdm
import time
from datetime import datetime, timedelta
from io import StringIO

In [30]:
# A python class to download historical stock prices in csv files.
# It makes two requests, one to get the cookie and the crumb and another one to get the data.
# Source : https://stackoverflow.com/questions/44225771/scraping-historical-data-from-yahoo-finance-with-python

class YahooFinanceHistory:
    timeout = 5
    crumb_link = 'https://finance.yahoo.com/quote/{0}/history?p={0}'
    crumble_regex = r'CrumbStore":{"crumb":"(.*?)"}'
    quote_link = 'https://query1.finance.yahoo.com/v7/finance/download/{quote}?period1={dfrom}&period2={dto}&interval=1d&events=history&crumb={crumb}'

    def __init__(self, symbol, days_back=7):
        self.symbol = symbol
        self.session = requests.Session()
        self.dt = timedelta(days=days_back)

    def get_crumb(self):
        response = self.session.get(self.crumb_link.format(self.symbol), timeout=self.timeout)
        response.raise_for_status()
        match = re.search(self.crumble_regex, response.text)
        if not match:
            raise ValueError('Could not get crumb from Yahoo Finance')
        else:
            self.crumb = match.group(1)

    def get_quote(self):
        if not hasattr(self, 'crumb') or len(self.session.cookies) == 0:
            self.get_crumb()
        now = datetime.utcnow()
        dateto = int(now.timestamp())
        datefrom = int((now - self.dt).timestamp())
        url = self.quote_link.format(quote=self.symbol, dfrom=datefrom, dto=dateto, crumb=self.crumb)
        response = self.session.get(url)
        response.raise_for_status()
        return pd.read_csv(StringIO(response.text), parse_dates=['Date'])

In [31]:
# Sample Data
df = YahooFinanceHistory('ICICIBANK.NS', days_back=30).get_quote()
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-10-19,402.049988,418.75,401.149994,417.100006,417.100006,41072871
1,2020-10-20,413.5,416.899994,411.0,414.600006,414.600006,26158669
2,2020-10-21,416.0,428.5,410.299988,420.299988,420.299988,43124966
3,2020-10-22,416.399994,419.649994,407.649994,412.899994,412.899994,33088074
4,2020-10-23,417.0,420.5,411.75,416.950012,416.950012,22151463


In [32]:
# Import Lookup Data
nifty50_lookup = pd.read_csv("nifty50_lookuptable.csv")
nifty50_lookup.head()

Unnamed: 0,Sr.No.,Company Name,Sector,Weightage,thehindu_searchstring,mcontrol_substring,ticker,yfin_ticker
0,1,Reliance Industries Ltd.,Petroleum Products,14.93%,reliance%20petroleum,RI,RELIANCE,RELIANCE
1,2,HDFC Bank Ltd.,Banks,9.69%,hdfc%20bank,HDF01,HDFCBANK,HDFCBANK
2,3,Infosys Limited,Software,7.63%,infosys,IT,INFY,INFY
3,4,Housing Development Fin. Corp. Ltd.,Finance,6.44%,hdfc,HDF,HDFC,HDFC
4,5,Tata Consultancy Services Ltd.,Software,5.41%,tcs,TCS,TCS,TCS


In [13]:
# Sample scraping to validate yfin_ticker
for tkr in tqdm(nifty50_lookup['yfin_ticker']):
    df = YahooFinanceHistory(tkr+'.NS').get_quote()

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [01:47<00:00,  2.15s/it]


In [33]:
# Scraping for past 'yr' number of years
yr = 3
d = yr*365
for tkr in tqdm(nifty50_lookup['yfin_ticker']):
    df = YahooFinanceHistory(tkr+'.NS', days_back=d).get_quote()
    print("Stock Prices Extracted for Past {} Years for {}".format(yr, tkr))
    print("Total Usable Scraped Data :",df.shape)
    filename = "stocks_yfinance_" + tkr + "_3yrs" + ".pkl"
    df.to_pickle(filename)
    print("Oldest Available Article :", min(df['Date']).date())
    print("Data saved for {} : {}".format(tkr, filename))
    time.sleep(5)

  0%|                                                                                           | 0/50 [00:00<?, ?it/s]

Stock Prices Extracted for Past 3 Years for RELIANCE
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for RELIANCE : stocks_yfinance_RELIANCE_3yrs.pkl


  2%|█▋                                                                                 | 1/50 [00:07<05:46,  7.07s/it]

Stock Prices Extracted for Past 3 Years for HDFCBANK
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for HDFCBANK : stocks_yfinance_HDFCBANK_3yrs.pkl


  4%|███▎                                                                               | 2/50 [00:14<05:41,  7.11s/it]

Stock Prices Extracted for Past 3 Years for INFY
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for INFY : stocks_yfinance_INFY_3yrs.pkl


  6%|████▉                                                                              | 3/50 [00:22<05:51,  7.48s/it]

Stock Prices Extracted for Past 3 Years for HDFC
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for HDFC : stocks_yfinance_HDFC_3yrs.pkl


  8%|██████▋                                                                            | 4/50 [00:29<05:42,  7.44s/it]

Stock Prices Extracted for Past 3 Years for TCS
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for TCS : stocks_yfinance_TCS_3yrs.pkl


 10%|████████▎                                                                          | 5/50 [00:37<05:38,  7.52s/it]

Stock Prices Extracted for Past 3 Years for ICICIBANK
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for ICICIBANK : stocks_yfinance_ICICIBANK_3yrs.pkl


 12%|█████████▉                                                                         | 6/50 [00:44<05:23,  7.36s/it]

Stock Prices Extracted for Past 3 Years for KOTAKBANK
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for KOTAKBANK : stocks_yfinance_KOTAKBANK_3yrs.pkl


 14%|███████████▌                                                                       | 7/50 [00:52<05:17,  7.38s/it]

Stock Prices Extracted for Past 3 Years for HINDUNILVR
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for HINDUNILVR : stocks_yfinance_HINDUNILVR_3yrs.pkl


 16%|█████████████▎                                                                     | 8/50 [00:59<05:05,  7.28s/it]

Stock Prices Extracted for Past 3 Years for ITC
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for ITC : stocks_yfinance_ITC_3yrs.pkl


 18%|██████████████▉                                                                    | 9/50 [01:09<05:37,  8.22s/it]

Stock Prices Extracted for Past 3 Years for LT
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for LT : stocks_yfinance_LT_3yrs.pkl


 20%|████████████████▍                                                                 | 10/50 [01:16<05:16,  7.91s/it]

Stock Prices Extracted for Past 3 Years for AXISBANK
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for AXISBANK : stocks_yfinance_AXISBANK_3yrs.pkl


 22%|██████████████████                                                                | 11/50 [01:23<04:58,  7.66s/it]

Stock Prices Extracted for Past 3 Years for BHARTIARTL
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for BHARTIARTL : stocks_yfinance_BHARTIARTL_3yrs.pkl


 24%|███████████████████▋                                                              | 12/50 [01:30<04:45,  7.52s/it]

Stock Prices Extracted for Past 3 Years for ASIANPAINT
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for ASIANPAINT : stocks_yfinance_ASIANPAINT_3yrs.pkl


 26%|█████████████████████▎                                                            | 13/50 [01:38<04:42,  7.65s/it]

Stock Prices Extracted for Past 3 Years for MARUTI
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for MARUTI : stocks_yfinance_MARUTI_3yrs.pkl


 28%|██████████████████████▉                                                           | 14/50 [01:45<04:26,  7.41s/it]

Stock Prices Extracted for Past 3 Years for HCLTECH
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for HCLTECH : stocks_yfinance_HCLTECH_3yrs.pkl


 30%|████████████████████████▌                                                         | 15/50 [01:53<04:19,  7.43s/it]

Stock Prices Extracted for Past 3 Years for BAJFINANCE
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for BAJFINANCE : stocks_yfinance_BAJFINANCE_3yrs.pkl


 32%|██████████████████████████▏                                                       | 16/50 [02:00<04:14,  7.49s/it]

Stock Prices Extracted for Past 3 Years for SBIN
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for SBIN : stocks_yfinance_SBIN_3yrs.pkl


 34%|███████████████████████████▉                                                      | 17/50 [02:08<04:10,  7.59s/it]

Stock Prices Extracted for Past 3 Years for DRREDDY
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for DRREDDY : stocks_yfinance_DRREDDY_3yrs.pkl


 36%|█████████████████████████████▌                                                    | 18/50 [02:15<03:56,  7.39s/it]

Stock Prices Extracted for Past 3 Years for M%26M
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for M%26M : stocks_yfinance_M%26M_3yrs.pkl


 38%|███████████████████████████████▏                                                  | 19/50 [02:22<03:42,  7.16s/it]

Stock Prices Extracted for Past 3 Years for NESTLEIND
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for NESTLEIND : stocks_yfinance_NESTLEIND_3yrs.pkl


 40%|████████████████████████████████▊                                                 | 20/50 [02:29<03:32,  7.07s/it]

Stock Prices Extracted for Past 3 Years for SUNPHARMA
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for SUNPHARMA : stocks_yfinance_SUNPHARMA_3yrs.pkl


 42%|██████████████████████████████████▍                                               | 21/50 [02:37<03:36,  7.46s/it]

Stock Prices Extracted for Past 3 Years for TITAN
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for TITAN : stocks_yfinance_TITAN_3yrs.pkl


 44%|████████████████████████████████████                                              | 22/50 [02:44<03:25,  7.35s/it]

Stock Prices Extracted for Past 3 Years for TECHM
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for TECHM : stocks_yfinance_TECHM_3yrs.pkl


 46%|█████████████████████████████████████▋                                            | 23/50 [02:51<03:16,  7.28s/it]

Stock Prices Extracted for Past 3 Years for ULTRACEMCO
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for ULTRACEMCO : stocks_yfinance_ULTRACEMCO_3yrs.pkl


 48%|███████████████████████████████████████▎                                          | 24/50 [03:00<03:20,  7.70s/it]

Stock Prices Extracted for Past 3 Years for WIPRO
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for WIPRO : stocks_yfinance_WIPRO_3yrs.pkl


 50%|█████████████████████████████████████████                                         | 25/50 [03:06<03:03,  7.35s/it]

Stock Prices Extracted for Past 3 Years for BRITANNIA
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for BRITANNIA : stocks_yfinance_BRITANNIA_3yrs.pkl


 52%|██████████████████████████████████████████▋                                       | 26/50 [03:14<02:58,  7.43s/it]

Stock Prices Extracted for Past 3 Years for HDFCLIFE
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for HDFCLIFE : stocks_yfinance_HDFCLIFE_3yrs.pkl


 54%|████████████████████████████████████████████▎                                     | 27/50 [03:21<02:46,  7.24s/it]

Stock Prices Extracted for Past 3 Years for POWERGRID
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for POWERGRID : stocks_yfinance_POWERGRID_3yrs.pkl


 56%|█████████████████████████████████████████████▉                                    | 28/50 [03:28<02:38,  7.20s/it]

Stock Prices Extracted for Past 3 Years for NTPC
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for NTPC : stocks_yfinance_NTPC_3yrs.pkl


 58%|███████████████████████████████████████████████▌                                  | 29/50 [03:35<02:30,  7.18s/it]

Stock Prices Extracted for Past 3 Years for HEROMOTOCO
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for HEROMOTOCO : stocks_yfinance_HEROMOTOCO_3yrs.pkl


 60%|█████████████████████████████████████████████████▏                                | 30/50 [03:42<02:22,  7.13s/it]

Stock Prices Extracted for Past 3 Years for CIPLA
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for CIPLA : stocks_yfinance_CIPLA_3yrs.pkl


 62%|██████████████████████████████████████████████████▊                               | 31/50 [03:49<02:14,  7.06s/it]

Stock Prices Extracted for Past 3 Years for DIVISLAB
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for DIVISLAB : stocks_yfinance_DIVISLAB_3yrs.pkl


 64%|████████████████████████████████████████████████████▍                             | 32/50 [03:56<02:07,  7.10s/it]

Stock Prices Extracted for Past 3 Years for BAJAJ-AUTO
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for BAJAJ-AUTO : stocks_yfinance_BAJAJ-AUTO_3yrs.pkl


 66%|██████████████████████████████████████████████████████                            | 33/50 [04:03<01:57,  6.93s/it]

Stock Prices Extracted for Past 3 Years for BAJAJFINSV
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for BAJAJFINSV : stocks_yfinance_BAJAJFINSV_3yrs.pkl


 68%|███████████████████████████████████████████████████████▊                          | 34/50 [04:10<01:51,  6.97s/it]

Stock Prices Extracted for Past 3 Years for SBILIFE
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for SBILIFE : stocks_yfinance_SBILIFE_3yrs.pkl


 70%|█████████████████████████████████████████████████████████▍                        | 35/50 [04:18<01:51,  7.42s/it]

Stock Prices Extracted for Past 3 Years for EICHERMOT
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for EICHERMOT : stocks_yfinance_EICHERMOT_3yrs.pkl


 72%|███████████████████████████████████████████████████████████                       | 36/50 [04:25<01:42,  7.31s/it]

Stock Prices Extracted for Past 3 Years for INDUSINDBK
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for INDUSINDBK : stocks_yfinance_INDUSINDBK_3yrs.pkl


 74%|████████████████████████████████████████████████████████████▋                     | 37/50 [04:33<01:35,  7.33s/it]

Stock Prices Extracted for Past 3 Years for GRASIM
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for GRASIM : stocks_yfinance_GRASIM_3yrs.pkl


 76%|██████████████████████████████████████████████████████████████▎                   | 38/50 [04:40<01:26,  7.19s/it]

Stock Prices Extracted for Past 3 Years for BPCL
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for BPCL : stocks_yfinance_BPCL_3yrs.pkl


 78%|███████████████████████████████████████████████████████████████▉                  | 39/50 [04:47<01:18,  7.18s/it]

Stock Prices Extracted for Past 3 Years for JSWSTEEL
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for JSWSTEEL : stocks_yfinance_JSWSTEEL_3yrs.pkl


 80%|█████████████████████████████████████████████████████████████████▌                | 40/50 [04:55<01:14,  7.41s/it]

Stock Prices Extracted for Past 3 Years for UPL
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for UPL : stocks_yfinance_UPL_3yrs.pkl


 82%|███████████████████████████████████████████████████████████████████▏              | 41/50 [05:02<01:05,  7.27s/it]

Stock Prices Extracted for Past 3 Years for SHREECEM
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for SHREECEM : stocks_yfinance_SHREECEM_3yrs.pkl


 84%|████████████████████████████████████████████████████████████████████▉             | 42/50 [05:09<00:57,  7.18s/it]

Stock Prices Extracted for Past 3 Years for TATASTEEL
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for TATASTEEL : stocks_yfinance_TATASTEEL_3yrs.pkl


 86%|██████████████████████████████████████████████████████████████████████▌           | 43/50 [05:15<00:49,  7.03s/it]

Stock Prices Extracted for Past 3 Years for HINDALCO
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for HINDALCO : stocks_yfinance_HINDALCO_3yrs.pkl


 88%|████████████████████████████████████████████████████████████████████████▏         | 44/50 [05:22<00:42,  7.02s/it]

Stock Prices Extracted for Past 3 Years for ADANIPORTS
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for ADANIPORTS : stocks_yfinance_ADANIPORTS_3yrs.pkl


 90%|█████████████████████████████████████████████████████████████████████████▊        | 45/50 [05:29<00:34,  6.87s/it]

Stock Prices Extracted for Past 3 Years for ONGC
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for ONGC : stocks_yfinance_ONGC_3yrs.pkl


 92%|███████████████████████████████████████████████████████████████████████████▍      | 46/50 [05:36<00:28,  7.04s/it]

Stock Prices Extracted for Past 3 Years for COALINDIA
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for COALINDIA : stocks_yfinance_COALINDIA_3yrs.pkl


 94%|█████████████████████████████████████████████████████████████████████████████     | 47/50 [05:43<00:21,  7.07s/it]

Stock Prices Extracted for Past 3 Years for TATAMOTORS
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for TATAMOTORS : stocks_yfinance_TATAMOTORS_3yrs.pkl


 96%|██████████████████████████████████████████████████████████████████████████████▋   | 48/50 [05:51<00:14,  7.33s/it]

Stock Prices Extracted for Past 3 Years for IOC
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for IOC : stocks_yfinance_IOC_3yrs.pkl


 98%|████████████████████████████████████████████████████████████████████████████████▎ | 49/50 [05:58<00:07,  7.24s/it]

Stock Prices Extracted for Past 3 Years for GAIL
Total Usable Scraped Data : (738, 7)
Oldest Available Article : 2017-11-20
Data saved for GAIL : stocks_yfinance_GAIL_3yrs.pkl


100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [06:05<00:00,  7.32s/it]
