In [34]:
import yfinance as yf
import pandas as pd
import datetime
import os
from tqdm import tqdm
import requests_cache
from requests import Session
from requests_cache import CacheMixin, SQLiteCache
from requests_ratelimiter import LimiterMixin, MemoryQueueBucket
from pyrate_limiter import Duration, RequestRate, Limiter


class CachedLimiterSession(CacheMixin, LimiterMixin, Session):
    pass
session = CachedLimiterSession(
    limiter=Limiter(RequestRate(2, Duration.SECOND*5)),  # max 2 requests per 5 seconds
    bucket_class=MemoryQueueBucket,
    backend=SQLiteCache("yfinance.cache"),
)

session = requests_cache.CachedSession('yfinance.cache')
session.headers['User-agent'] = 'my-program/1.0'

## Define function to extract historical data for each ticker
def fetch_data(ticker_symbol):
    ticker = yf.Ticker(ticker_symbol)
    today = datetime.date.today().strftime('%Y-%m-%d')
    data = ticker.history(start="2015-01-01", end=today)
    data.reset_index(inplace=True)
    
    # Check if the 'Date' column is of datetime type
    if pd.api.types.is_datetime64_any_dtype(data['Date']):
        data['Date'] = data['Date'].dt.strftime('%Y-%m-%d')
    
    # Drop the unnecessary columns
    data.drop(columns=['Dividends', 'Stock Splits', 'Adj Close'], inplace=True, errors='ignore')
    
    data['ticker'] = ticker_symbol  # Convert to lowercase
    
    # Convert other column names to lowercase
    data.columns = [col.lower() for col in data.columns]

    return data

## Extract all symbols
symbols= pd.read_csv("tickers.txt", sep = "\t", engine="python", header=0,
                     names=("Sr", "Symbol", "Name", "Market_cap"))

NSE = symbols.Symbol + ".NS"

'''
#Loop over each symbol and append data to the master DataFrame
all_data = []
for symbol in tqdm(NSE, desc="Fetching data"):  # tqdm progress bar!
    all_data.append(fetch_data(symbol))

# Concatenate all the individual datasets into one
master_data = pd.concat(all_data, ignore_index=True)

#Save the master DataFrame as a CSV
master_data.to_csv('all_NSE_symbols_data.csv', index=False)
'''

# Save individual CSVs for each index
# Create a directory in the output to store individual datasets
output_dir = 'individual_indices_data'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

for symbol in tqdm(NSE, desc="Saving individual datasets"):
    #print(symbol)
    # tqdm progress bar!
    single_data = fetch_data(symbol)
    
    # Drop the 'ticker' column if it exists
    if 'ticker' in single_data.columns:
        single_data.drop('ticker', axis=1, inplace=True)
    
    single_data.to_csv(f'{output_dir}/{symbol}_data.csv', index=False)

Saving individual datasets:   3%|█▋                                                              | 61/2255 [00:17<1:18:39,  2.15s/it]$HINDZINC.NS: possibly delisted; No price data found  (1d 2015-01-01 -> 2024-08-26)
Saving individual datasets:   5%|██▉                                                              | 103/2255 [01:21<12:20,  2.91it/s]$MCDOWELL-N.NS: possibly delisted; No timezone found
Saving individual datasets:   5%|███▎                                                             | 113/2255 [01:27<26:56,  1.32it/s]$COLPAL.NS: possibly delisted; No price data found  (1d 2015-01-01 -> 2024-08-26)
Saving individual datasets:   5%|███▏                                                           | 114/2255 [01:37<2:06:15,  3.54s/it]$LUPIN.NS: possibly delisted; No price data found  (1d 2015-01-01 -> 2024-08-26)
Saving individual datasets:   5%|███▏                                                           | 115/2255 [01:47<3:16:03,  5.50s/it]$NAUKRI.NS: possibly delisted; No p

KeyboardInterrupt: 