In [None]:
# 
# Script to Download NSE Listed Companies Data and Store in SQLite Database from yfinance
#

In [5]:
# 1. Python Library Dependencies
# !pip install yfinance sqlalchemy pandas --quiet

import sys
import pandas as pd
import yfinance as yf
import sqlalchemy
import time
import os
import numpy as np


# --- CONFIGURATION ---
os.makedirs("data", exist_ok=True)                  # Ensuring existence of data directory
EXCEL_FILE = 'data/NSE_listed_companies_Filtered.xlsx'       # csv with names of companies
DB_NAME = 'data/NSE_Companies_filtered.db'

# 1. Configuration Parameters
START_DATE = '2004-09-01'            # start date
VOLATILITY_WINDOW = 30               # 30-Day Rolling Volatility

# 2. Connect to New Database
engine = sqlalchemy.create_engine(f'sqlite:///{DB_NAME}')
print(f"Created new database: {DB_NAME}")

# ==========================================
# PART 1: RESTORING SECTOR INFO
# ==========================================
print("\nPART 1: Creating 'stock_info' table from CSV...")

if not os.path.exists(EXCEL_FILE):
    print(f"CRITICAL: '{EXCEL_FILE}' not found.")
    sys.exit(1)

try:
    df = pd.read_excel(EXCEL_FILE)

    # Selecting relevant columns
    # Adjusting column names
    master_df = df[['TickerSymbol', 'Name', 'Sector']].copy()

    # Standardizing Column Names for SQL_smallcaps and using _
    master_df.columns = ['symbol', 'company_name', 'industry']

    # Adding .NS extension to symbol for yfinance
    master_df['symbol'] = master_df['symbol'].apply(lambda x: f"{x}.NS")

    # Saving to DB
    master_df.to_sql('stock_info', engine, if_exists='replace', index=False)

    # Retrieving list of tickers
    tickers = master_df['symbol'].tolist()
    print(f"Restored {len(tickers)} stocks into 'stock_info'.")

except Exception as e:
    print(f"Error reading CSV: {e}")
    sys.exit(1)

# ==========================================
# PART 2: DOWNLOADING 20 YEARS HISTORY
# ==========================================
print(f"\nPART 2: Downloading Daily Data (Start: {START_DATE})...")

success_count = 0
batch_size = 100                       # Downloading in Batches

# Looping through tickers
for i, ticker in enumerate(tickers):
    try:
        # A. Downloading Data
                                                                                # auto_adjust=True to handle Splits/Dividends
        df = yf.download(ticker, start=START_DATE, progress=False, interval = "1d",
                         auto_adjust=True, threads=True)

        if not df.empty:
            # Flatten columns
            if isinstance(df.columns, pd.MultiIndex):
                df.columns = [col[0] for col in df.columns]

            # B. Cleaning the Data
            df.reset_index(inplace=True)
            df.rename(columns={'Date': 'date', 'Open': 'open', 'High': 'high',
                               'Low': 'low', 'Close': 'close', 'Volume': 'volume'}, inplace=True)

            # Ensuring lowercasing of columns
            df.columns = [c.lower() for c in df.columns]

            # C. Feature Engineering (Volatility & Returns)
            # 1. Log Returns
            df['log_return'] = np.log(df['close'] / df['close'].shift(1))

            # 2. 5-30-60_Day Volatility
            df['volatility_5d'] = df['log_return'].rolling(window=5).std()                      # 5 day volatility
            df['volatility_30d'] = df['log_return'].rolling(window=VOLATILITY_WINDOW).std()     # 30 day volatility
            df['volatility_60d'] = df['log_return'].rolling(window=2*VOLATILITY_WINDOW).std()   # 60 day volatility

            # Dropping NaN rows generated by rolling window
            df.dropna(inplace=True)

            # D. Saving to Database
            # Table name = Ticker                                               (will save like "RELIANCE.NS"... so need to handle ticker names wtih EIKON separately)
            df.to_sql(ticker, engine, if_exists='replace', index=False)

            success_count += 1

        # Progress Update
        if i % 100 == 0:
            print(f"Processed {i}/{len(tickers)}: {ticker}")

    except Exception as e:
        print(f"Error {ticker}: {e}")

print(f"\n Download Complete ! Database created with {success_count} stocks.")

Created new database: data/NSE_Companies_filtered.db

PART 1: Creating 'stock_info' table from CSV...
Restored 2159 stocks into 'stock_info'.

PART 2: Downloading Daily Data (Start: 2004-09-01)...
Processed 0/2159: BEL.NS
Processed 100/2159: SPLPETRO.NS
Processed 200/2159: JYOTISTRUC.NS
Processed 300/2159: KSB.NS
Processed 400/2159: KMSUGAR.NS
Processed 500/2159: WSTCSTPAPR.NS
Processed 600/2159: SALSTEEL.NS



1 Failed download:
['TULSYAN.NS']: YFPricesMissingError('possibly delisted; no price data found  (1d 2004-09-01 -> 2025-11-28)')


Processed 700/2159: KSERASERA.NS
Processed 800/2159: IPCALAB.NS


  result = getattr(ufunc, method)(*inputs, **kwargs)


Processed 900/2159: BSOFT.NS


Failed to get ticker 'WESTLIFE.NS' reason: Failed to perform, curl: (28) Operation timed out after 10005 milliseconds with 0 bytes received. See https://curl.se/libcurl/c/libcurl-errors.html first for more details.

1 Failed download:
['WESTLIFE.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['522229.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['513252.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['515043.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['500178.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['523248.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['523638.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['504908.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['505163.NS']: YFTzMissingE

Processed 1000/2159: 513687.NS



1 Failed download:
['500240.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530711.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530369.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['517286.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['500389.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['526471.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['507155.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['507435.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['508136.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['508670.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['519532.NS']: YFTzMissingError('possibly delisted; no timez

Processed 1100/2159: 506520.NS



1 Failed download:
['524748.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531280.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['524046.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531397.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530991.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530433.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['526901.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530883.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['509895.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['524204.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['500306.NS']: YFTzMissingError('possibly delisted; no timez

Processed 1200/2159: 508941.NS



1 Failed download:
['517467.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531201.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['517514.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['504084.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['517564.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['517370.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531460.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531306.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531889.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['517423.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531888.NS']: YFTzMissingError('possibly delisted; no timez

Processed 1300/2159: 526125.NS



1 Failed download:
['519606.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['519506.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['519230.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530461.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['519242.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['519031.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['519566.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530953.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530735.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530291.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['519064.NS']: YFTzMissingError('possibly delisted; no timez

Processed 1400/2159: 522183.NS



1 Failed download:
['530267.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['522152.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['505320.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['522171.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['508860.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['505693.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531661.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531727.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['505893.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['500068.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['505872.NS']: YFTzMissingError('possibly delisted; no timez

Processed 1500/2159: 506003.NS



1 Failed download:
['506528.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['509525.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['513418.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['500399.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['513460.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['513496.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['513513.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['500245.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['500365.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['513566.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['517449.NS']: YFTzMissingError('possibly delisted; no timez

Processed 1600/2159: 530499.NS



1 Failed download:
['511533.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530695.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531091.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['532160.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['532053.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531169.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531950.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['511626.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['526500.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530809.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530249.NS']: YFTzMissingError('possibly delisted; no timez

Processed 1700/2159: 511734.NS



1 Failed download:
['500143.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530669.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530289.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530265.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['511563.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['512020.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['526883.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531080.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530439.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['512589.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['511571.NS']: YFTzMissingError('possibly delisted; no timez

Processed 1800/2159: 526349.NS



1 Failed download:
['517548.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['526439.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531552.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['523120.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['507970.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['519299.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['502901.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['526407.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['526479.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['526481.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['526525.NS']: YFTzMissingError('possibly delisted; no timez

Processed 1900/2159: 531300.NS



1 Failed download:
['531119.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['521244.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['526468.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['532015.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['526965.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['521068.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['514460.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['516110.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['504365.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530179.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['521238.NS']: YFTzMissingError('possibly delisted; no timez

Processed 2000/2159: 530617.NS



1 Failed download:
['530125.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['524703.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531637.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531173.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531015.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['519574.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['530665.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['524632.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['500672.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['514197.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['519307.NS']: YFTzMissingError('possibly delisted; no timez

Processed 2100/2159: 511644.NS



1 Failed download:
['531769.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['511447.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['517415.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531533.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['526443.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531126.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531960.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['532340.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['519463.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['531917.NS']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['500346.NS']: YFTzMissingError('possibly delisted; no timez


 Download Complete ! Database created with 982 stocks.


In [5]:
print("Current Working Directory:", os.getcwd())

Current Working Directory: c:\Users\Akash Mittal\Documents\GitHub\Quantamental Study\code
