In [1]:
import pandas as pd
import yfinance as yf
from pandas_datareader import DataReader
from concurrent.futures import ThreadPoolExecutor


In [2]:
url = 'https://archives.nseindia.com/content/indices/ind_nifty500list.csv'
nifty_const = pd.read_csv(url)
nifty_const['Symbol'] = nifty_const['Symbol'].apply(lambda x: x + '.NS')


In [3]:
def fetch_market_cap(symbol):
    try:
        info = yf.Ticker(symbol).info
        return info.get('marketCap', None)
    except:
        return None

In [4]:
# nifty_const['MarketCap'] = nifty_const['Symbol'].apply(fetch_market_cap)
with ThreadPoolExecutor() as executor:
    market_caps = list(executor.map(fetch_market_cap, nifty_const['Symbol']))

In [5]:
nifty_const['MarketCap'] = market_caps

In [6]:
large_cap_threshold = int(20_000e7)
mid_cap_threshold = int(50_000e7)
nifty_const.sort_values('MarketCap', ascending=False)

Unnamed: 0,Company Name,Industry,Symbol,Series,ISIN Code,MarketCap
391,Reliance Industries Ltd.,Oil Gas & Consumable Fuels,RELIANCE.NS,EQ,INE002A01018,1.732334e+13
194,HDFC Bank Ltd.,Financial Services,HDFCBANK.NS,EQ,INE040A01034,1.263108e+13
442,Tata Consultancy Services Ltd.,Information Technology,TCS.NS,EQ,INE467B01029,1.232478e+13
213,ICICI Bank Ltd.,Financial Services,ICICIBANK.NS,EQ,INE090A01021,6.975570e+12
207,Hindustan Unilever Ltd.,Fast Moving Consumer Goods,HINDUNILVR.NS,EQ,INE030A01027,6.122212e+12
...,...,...,...,...,...,...
220,IFB Industries Ltd.,Consumer Durables,IFBIND.NS,EQ,INE559A01017,3.384941e+10
468,UFLEX Ltd.,Capital Goods,UFLEX.NS,EQ,INE516A01017,3.186418e+10
304,Mahindra Logistics Ltd.,Services,MAHLOG.NS,EQ,INE766P01016,2.946281e+10
434,TCNS Clothing Co. Ltd.,Textiles,TCNSBRANDS.NS,EQ,INE778U01029,2.624641e+10


In [7]:
def get_stocks(capitalization=None):
    if capitalization is None:
        stocks = nifty_const.Symbol.to_list() 
    elif capitalization == 'LARGECAP':
        stocks = nifty_const.sort_values('MarketCap')[-150:].Symbol.to_list()
    elif capitalization == 'MIDCAP':
        stocks = nifty_const.sort_values('MarketCap')[-300:-150].Symbol.to_list()
    elif capitalization == 'SMALLCAP':
        stocks = nifty_const.sort_values('MarketCap')[:150].Symbol.to_list()
    return stocks

In [8]:
def download_data(stocks, start_date='2015-01-01', end_date='2020-01-01'):
    data = yf.download(stocks, start=start_date, end=end_date)
    return data['Close']

In [25]:
data = download_data(get_stocks('LARGECAP')[:10])

[*********************100%***********************]  10 of 10 completed


1 Failed download:
['NYKAA.NS']: Exception("%ticker%: Data doesn't exist for startDate = 1420050600, endDate = 1577817000")





In [26]:
data

Unnamed: 0_level_0,BHARATFORG.NS,CONCOR.NS,HINDPETRO.NS,INDIANB.NS,LTTS.NS,LUPIN.NS,NYKAA.NS,PAGEIND.NS,RECLTD.NS,UBL.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-01,468.975006,434.528015,122.344444,220.199997,,1429.199951,,12334.400391,125.887497,834.599976
2015-01-02,471.600006,434.432007,121.388885,215.350006,,1432.300049,,12419.650391,125.587502,835.599976
2015-01-05,473.149994,440.959991,124.822220,212.199997,,1430.300049,,12080.750000,125.231247,832.900024
2015-01-06,446.350006,431.743988,123.588890,200.300003,,1395.849976,,11639.849609,119.943748,832.450012
2015-01-07,450.200012,433.743988,126.355553,196.699997,,1377.599976,,11388.599609,119.193748,897.099976
...,...,...,...,...,...,...,...,...,...,...
2019-12-24,486.200012,575.200012,262.450012,104.750000,1478.250000,765.150024,,21930.449219,103.012497,1299.400024
2019-12-26,492.200012,567.250000,258.600006,104.199997,1485.500000,757.500000,,22284.599609,103.837502,1273.900024
2019-12-27,488.750000,573.049988,266.600006,104.900002,1478.949951,770.950012,,23534.000000,106.162498,1276.000000
2019-12-30,491.500000,576.700012,267.200012,104.599998,1479.650024,769.900024,,23761.550781,105.937500,1278.699951


In [32]:
momentum = data.pct_change(60).shift()
momentum

Unnamed: 0_level_0,BHARATFORG.NS,CONCOR.NS,HINDPETRO.NS,INDIANB.NS,LTTS.NS,LUPIN.NS,NYKAA.NS,PAGEIND.NS,RECLTD.NS,UBL.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-01,,,,,,,,,,
2015-01-02,,,,,,,,,,
2015-01-05,,,,,,,,,,
2015-01-06,,,,,,,,,,
2015-01-07,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
2019-12-24,0.087571,0.001291,-0.059749,-0.290878,-0.060974,0.038485,,-0.057661,0.057342,-0.096587
2019-12-26,0.099254,-0.041653,-0.080413,-0.263361,-0.071393,0.046144,,-0.014601,0.082348,-0.041599
2019-12-27,0.092928,-0.082936,-0.151157,-0.246838,-0.072084,0.038738,,0.011945,0.076594,-0.059644
2019-12-30,0.094012,-0.030618,-0.128331,-0.214232,-0.063541,0.074869,,0.054263,0.121188,-0.055654


In [35]:
portfolio = pd.DataFrame(0, index=data.index, columns=data.columns)
portfolio

Unnamed: 0_level_0,BHARATFORG.NS,CONCOR.NS,HINDPETRO.NS,INDIANB.NS,LTTS.NS,LUPIN.NS,NYKAA.NS,PAGEIND.NS,RECLTD.NS,UBL.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-01,0,0,0,0,0,0,0,0,0,0
2015-01-02,0,0,0,0,0,0,0,0,0,0
2015-01-05,0,0,0,0,0,0,0,0,0,0
2015-01-06,0,0,0,0,0,0,0,0,0,0
2015-01-07,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
2019-12-24,0,0,0,0,0,0,0,0,0,0
2019-12-26,0,0,0,0,0,0,0,0,0,0
2019-12-27,0,0,0,0,0,0,0,0,0,0
2019-12-30,0,0,0,0,0,0,0,0,0,0


In [66]:
rebalance_dates = pd.date_range(start=data.index.min(), end=data.index.max(), freq='W-TUE')
rebalance_dates

DatetimeIndex(['2015-01-06', '2015-01-13', '2015-01-20', '2015-01-27',
               '2015-02-03', '2015-02-10', '2015-02-17', '2015-02-24',
               '2015-03-03', '2015-03-10',
               ...
               '2019-10-29', '2019-11-05', '2019-11-12', '2019-11-19',
               '2019-11-26', '2019-12-03', '2019-12-10', '2019-12-17',
               '2019-12-24', '2019-12-31'],
              dtype='datetime64[ns]', length=261, freq='W-TUE')

In [86]:
date = rebalance_dates[16]
date

Timestamp('2015-04-28 00:00:00')

In [87]:
momentum.index = pd.to_datetime(momentum.index)

In [88]:
momentum.head(62)


Unnamed: 0_level_0,BHARATFORG.NS,CONCOR.NS,HINDPETRO.NS,INDIANB.NS,LTTS.NS,LUPIN.NS,NYKAA.NS,PAGEIND.NS,RECLTD.NS,UBL.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-01,,,,,,,,,,
2015-01-02,,,,,,,,,,
2015-01-05,,,,,,,,,,
2015-01-06,,,,,,,,,,
2015-01-07,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
2015-03-26,,,,,,,,,,
2015-03-27,,,,,,,,,,
2015-03-30,,,,,,,,,,
2015-03-31,,,,,,,,,,


In [90]:
momentum.loc[date]

BHARATFORG.NS    0.173656
CONCOR.NS        0.127102
HINDPETRO.NS     0.030384
INDIANB.NS      -0.298889
LTTS.NS               NaN
LUPIN.NS         0.156904
NYKAA.NS              NaN
PAGEIND.NS       0.082931
RECLTD.NS       -0.013044
UBL.NS          -0.095458
Name: 2015-04-28 00:00:00, dtype: float64

In [94]:
mom = momentum.loc[date]
mom = mom.dropna().sort_values(ascending=False)
winners = mom.head(50)
winners

BHARATFORG.NS    0.173656
LUPIN.NS         0.156904
CONCOR.NS        0.127102
PAGEIND.NS       0.082931
HINDPETRO.NS     0.030384
RECLTD.NS       -0.013044
UBL.NS          -0.095458
INDIANB.NS      -0.298889
Name: 2015-04-28 00:00:00, dtype: float64

In [99]:
portfolio.loc[date, winners.index] = 1 / len(winners)
portfolio.head(80)

Unnamed: 0_level_0,BHARATFORG.NS,CONCOR.NS,HINDPETRO.NS,INDIANB.NS,LTTS.NS,LUPIN.NS,NYKAA.NS,PAGEIND.NS,RECLTD.NS,UBL.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-01,0.000,0.000,0.000,0.000,0,0.000,0,0.000,0.000,0.000
2015-01-02,0.000,0.000,0.000,0.000,0,0.000,0,0.000,0.000,0.000
2015-01-05,0.000,0.000,0.000,0.000,0,0.000,0,0.000,0.000,0.000
2015-01-06,0.000,0.000,0.000,0.000,0,0.000,0,0.000,0.000,0.000
2015-01-07,0.000,0.000,0.000,0.000,0,0.000,0,0.000,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...
2015-04-24,0.000,0.000,0.000,0.000,0,0.000,0,0.000,0.000,0.000
2015-04-27,0.000,0.000,0.000,0.000,0,0.000,0,0.000,0.000,0.000
2015-04-28,0.125,0.125,0.125,0.125,0,0.125,0,0.125,0.125,0.125
2015-04-29,0.000,0.000,0.000,0.000,0,0.000,0,0.000,0.000,0.000


In [102]:
data.pct_change().head(80)

Unnamed: 0_level_0,BHARATFORG.NS,CONCOR.NS,HINDPETRO.NS,INDIANB.NS,LTTS.NS,LUPIN.NS,NYKAA.NS,PAGEIND.NS,RECLTD.NS,UBL.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-01,,,,,,,,,,
2015-01-02,0.005597,-0.000221,-0.007810,-0.022025,,0.002169,,0.006912,-0.002383,0.001198
2015-01-05,0.003287,0.015026,0.028284,-0.014627,,-0.001396,,-0.027287,-0.002837,-0.003231
2015-01-06,-0.056642,-0.020900,-0.009881,-0.056079,,-0.024086,,-0.036496,-0.042222,-0.000540
2015-01-07,0.008626,0.004632,0.022386,-0.017973,,-0.013074,,-0.021585,-0.006253,0.077662
...,...,...,...,...,...,...,...,...,...,...
2015-04-24,0.025773,-0.031521,-0.031693,-0.023559,,0.018136,,0.015985,-0.002943,-0.003313
2015-04-27,-0.018512,0.031680,-0.047455,-0.033257,,-0.019350,,-0.015810,-0.024542,-0.119043
2015-04-28,0.030389,0.030738,0.043434,-0.012816,,-0.000812,,0.020183,0.010032,-0.011053
2015-04-29,0.024927,-0.007705,-0.004026,-0.028357,,0.012981,,0.039635,-0.013401,0.029284


In [106]:
returns = (portfolio * data.pct_change()).sum(axis=1)
returns.head(80)

Date
2015-01-01    0.000000
2015-01-02    0.000000
2015-01-05    0.000000
2015-01-06    0.000000
2015-01-07    0.000000
                ...   
2015-04-24    0.000000
2015-04-27    0.000000
2015-04-28    0.013762
2015-04-29    0.000000
2015-04-30    0.000000
Length: 80, dtype: float64

In [151]:
def compute_momentum(prices, lookback_period):
    return prices.pct_change(lookback_period).shift()

def backtest(data, lookback_period=60, rebalance_freq='W-TUE'):
    momentum = compute_momentum(data, lookback_period)

    # convert to date column to datetime, else  .loc on rebalanced dates wont work
    momentum.index = pd.to_datetime(momentum.index)

    # Create a DataFrame to hold our portfolio
    portfolio = pd.DataFrame(index=data.index)

    rebalance_dates = pd.date_range(
        start=data.index.min(), end=data.index.max(), freq=rebalance_freq)

    for date in rebalance_dates:
        if date in momentum.index:
            # Get the momentum for this date
            mom = momentum.loc[date]

            # Drop any stocks that don't have a momentum value
            mom = mom.dropna()
            if len(mom) == 0:
                continue

            # Separate into winners and losers
            # winners = mom[mom > mom.median()]

            # chose top 50
            mom = mom.sort_values(ascending=False)
            
            # # Select top 50 stocks
            winners = mom.head(50)

            # Assign weights
            portfolio.loc[date, winners.index] = 1 / len(winners)
            # print(f'date {date}')
            # print(f'mom {mom}')
            # print(f'winners {len(winners)} ,  {winners}')
            # print(f'portfolio {portfolio.loc[date]}')
            # break

    portfolio.fillna(0, inplace=True)
    print(portfolio)

    # Calculate returns
    returns = (portfolio * data.pct_change()).sum(axis=1)

    return returns

In [113]:
data = download_data(get_stocks('LARGECAP'), start_date='2015-01-01', end_date='2020-01-01')

[*********************100%***********************]  150 of 150 completed


11 Failed downloads:
['AWL.NS', 'LICI.NS', 'ZOMATO.NS', 'NYKAA.NS', 'MANKIND.NS', 'MAXHEALTH.NS', 'PAYTM.NS', 'SBICARD.NS', 'LODHA.NS', 'IRFC.NS']: Exception("%ticker%: Data doesn't exist for startDate = 1420050600, endDate = 1577817000")
['DUMMYREL.NS']: Exception('%ticker%: No timezone found, symbol may be delisted')





In [120]:
data.shape

(1229, 150)

In [152]:
retruns = backtest(data=data, lookback_period=45,rebalance_freq='W-TUE')

            TATAELXSI.NS  SIEMENS.NS  BHARATFORG.NS  HCLTECH.NS  BOSCHLTD.NS  \
Date                                                                           
2015-01-01          0.00         0.0           0.00         0.0         0.00   
2015-01-02          0.00         0.0           0.00         0.0         0.00   
2015-01-05          0.00         0.0           0.00         0.0         0.00   
2015-01-06          0.00         0.0           0.00         0.0         0.00   
2015-01-07          0.00         0.0           0.00         0.0         0.00   
...                  ...         ...            ...         ...          ...   
2019-12-24          0.02         0.0           0.02         0.0         0.02   
2019-12-26          0.00         0.0           0.00         0.0         0.00   
2019-12-27          0.00         0.0           0.00         0.0         0.00   
2019-12-30          0.00         0.0           0.00         0.0         0.00   
2019-12-31          0.02         0.0    

In [156]:
full_data = download_data(get_stocks(), start_date='2015-01-01', end_date='2023-07-10')

[*********************100%***********************]  502 of 502 completed


1 Failed download:
['DUMMYREL.NS']: Exception('%ticker%: No timezone found, symbol may be delisted')





In [158]:
full_data.to_csv('nifty_stock_prices.csv')

In [160]:
nifty_const.to_csv('nifty_const.csv')