In [33]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
import random
import os 
import sys
import warnings
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import plotly.express as px
import nsepython as nse
from datetime import datetime


## Data Acquisition Component

In [34]:
start_date = "01-May-2014"
end_date = "10-Sep-2023"

nifty50 = "NIFTY 50"
niftyBank = "NIFTY BANK"
niftyIT = "NIFTY IT"

nifty_50 = nse.index_total_returns(nifty50,start_date,end_date)
# print("Nifty 50 returns: ",nifty_50)

nifty_bank = nse.index_total_returns(niftyBank,start_date,end_date)
# print("Nifty Bank returns: ",nifty_bank)

nifty_it = nse.index_total_returns(niftyIT,start_date,end_date)
# print("Nifty IT returns: ",nifty_it)

In [35]:
# convert all data to dataframes
nifty_50 = pd.DataFrame(nifty_50)
nifty_bank = pd.DataFrame(nifty_bank)
nifty_it = pd.DataFrame(nifty_it)

In [36]:
# reverse all rows in dataframes, so that the latest date is at the bottom. indices need to be reversed too
nifty_50 = nifty_50.iloc[::-1]
nifty_50 = nifty_50.reset_index(drop=True) # drop=True prevents old indices from being added as a column
nifty_bank = nifty_bank.iloc[::-1]
nifty_bank = nifty_bank.reset_index(drop=True)
nifty_it = nifty_it.iloc[::-1]
nifty_it = nifty_it.reset_index(drop=True)




In [37]:
nifty_50

Unnamed: 0,Index Name,Date,TotalReturnsIndex
0,Nifty 50,02 May 2014,8727.93
1,Nifty 50,05 May 2014,8733.92
2,Nifty 50,06 May 2014,8754.69
3,Nifty 50,07 May 2014,8672.88
4,Nifty 50,08 May 2014,8682.37
...,...,...,...
2311,NIFTY 50,04 Sep 2023,28647.16
2312,NIFTY 50,05 Sep 2023,28714.78
2313,NIFTY 50,06 Sep 2023,28767.78
2314,NIFTY 50,07 Sep 2023,28937.96


In [38]:
nifty_bank

Unnamed: 0,Index Name,Date,TotalReturnsIndex
0,Nifty Bank,02 May 2014,16575.34
1,Nifty Bank,05 May 2014,16625.25
2,Nifty Bank,06 May 2014,16735.48
3,Nifty Bank,07 May 2014,16699.52
4,Nifty Bank,08 May 2014,16827.53
...,...,...,...
2311,NIFTY BANK,04 Sep 2023,61132.07
2312,NIFTY BANK,05 Sep 2023,61068.84
2313,NIFTY BANK,06 Sep 2023,60900.04
2314,NIFTY BANK,07 Sep 2023,61543.56


In [39]:
nifty_it

Unnamed: 0,Index Name,Date,TotalReturnsIndex
0,Nifty IT,02 May 2014,10483.14
1,Nifty IT,05 May 2014,10375.15
2,Nifty IT,06 May 2014,10325.79
3,Nifty IT,07 May 2014,10063.57
4,Nifty IT,08 May 2014,10082.73
...,...,...,...
2311,NIFTY IT,04 Sep 2023,43579.42
2312,NIFTY IT,05 Sep 2023,43852.28
2313,NIFTY IT,06 Sep 2023,43780.45
2314,NIFTY IT,07 Sep 2023,43964.79


In [40]:
# for safety purposes, we will save the dataframes to csv files

nifty_50.to_csv("nifty_50.csv")
nifty_bank.to_csv("nifty_bank.csv")
nifty_it.to_csv("nifty_it.csv")

## Data Cleaning and Preparation Component

In [41]:
# Calculate relative percentage change in index return value from start date
# formula : 100 * (return at date of interest - return at start date) / return at start date

# convert the TotalIndexReturn column data to number type first in all dataframes
nifty_50['TotalReturnsIndex'] = pd.to_numeric(nifty_50['TotalReturnsIndex'])
nifty_bank['TotalReturnsIndex'] = pd.to_numeric(nifty_bank['TotalReturnsIndex'])
nifty_it['TotalReturnsIndex'] = pd.to_numeric(nifty_it['TotalReturnsIndex'])

# calculate relative percentage change in index return value from start date

nifty_50_start = nifty_50['TotalReturnsIndex'][0]
nifty_bank_start = nifty_bank['TotalReturnsIndex'][0]
nifty_it_start = nifty_it['TotalReturnsIndex'][0]

nifty_50['RelativePercentageChange'] = 100 * (nifty_50['TotalReturnsIndex'] - nifty_50_start) / nifty_50_start  

nifty_bank['RelativePercentageChange'] = 100 * (nifty_bank['TotalReturnsIndex'] - nifty_bank_start) / nifty_bank_start

nifty_it['RelativePercentageChange'] = 100 * (nifty_it['TotalReturnsIndex'] - nifty_it_start) / nifty_it_start


In [42]:
nifty_50

Unnamed: 0,Index Name,Date,TotalReturnsIndex,RelativePercentageChange
0,Nifty 50,02 May 2014,8727.93,0.000000
1,Nifty 50,05 May 2014,8733.92,0.068630
2,Nifty 50,06 May 2014,8754.69,0.306602
3,Nifty 50,07 May 2014,8672.88,-0.630734
4,Nifty 50,08 May 2014,8682.37,-0.522002
...,...,...,...,...
2311,NIFTY 50,04 Sep 2023,28647.16,228.223989
2312,NIFTY 50,05 Sep 2023,28714.78,228.998743
2313,NIFTY 50,06 Sep 2023,28767.78,229.605989
2314,NIFTY 50,07 Sep 2023,28937.96,231.555821


In [43]:
nifty_bank

Unnamed: 0,Index Name,Date,TotalReturnsIndex,RelativePercentageChange
0,Nifty Bank,02 May 2014,16575.34,0.000000
1,Nifty Bank,05 May 2014,16625.25,0.301110
2,Nifty Bank,06 May 2014,16735.48,0.966134
3,Nifty Bank,07 May 2014,16699.52,0.749185
4,Nifty Bank,08 May 2014,16827.53,1.521477
...,...,...,...,...
2311,NIFTY BANK,04 Sep 2023,61132.07,268.813370
2312,NIFTY BANK,05 Sep 2023,61068.84,268.431899
2313,NIFTY BANK,06 Sep 2023,60900.04,267.413519
2314,NIFTY BANK,07 Sep 2023,61543.56,271.295913


In [44]:
nifty_it

Unnamed: 0,Index Name,Date,TotalReturnsIndex,RelativePercentageChange
0,Nifty IT,02 May 2014,10483.14,0.000000
1,Nifty IT,05 May 2014,10375.15,-1.030130
2,Nifty IT,06 May 2014,10325.79,-1.500982
3,Nifty IT,07 May 2014,10063.57,-4.002331
4,Nifty IT,08 May 2014,10082.73,-3.819562
...,...,...,...,...
2311,NIFTY IT,04 Sep 2023,43579.42,315.709606
2312,NIFTY IT,05 Sep 2023,43852.28,318.312452
2313,NIFTY IT,06 Sep 2023,43780.45,317.627257
2314,NIFTY IT,07 Sep 2023,43964.79,319.385699


In [45]:
#Combine the updated index data to create a single data frame. Rename the RelativePercentageChange and TotalReturnsIndex columns to distinguish the relative percent change data for the three index funds from each other. 

# rename the columns
nifty_50 = nifty_50.rename(columns={'TotalReturnsIndex': 'nifty50_totalReturnsIndex', 'RelativePercentageChange': 'nifty50_relative_percentage_change'})
nifty_bank = nifty_bank.rename(columns={'TotalReturnsIndex': 'niftyBank_totalReturnsIndex', 'RelativePercentageChange': 'niftyBank_relative_percentage_change'})
nifty_it = nifty_it.rename(columns={'TotalReturnsIndex': 'niftyIT_totalReturnsIndex', 'RelativePercentageChange': 'niftyIT_relative_percentage_change'})



# merge the dataframes
df = pd.merge(nifty_50, nifty_bank, how='inner', on='Date')
df = pd.merge(df, nifty_it, how='inner', on='Date')

# drop index Name_x, index Name_y, index Name_z columns
df = df.drop(['Index Name_x', 'Index Name_y', 'Index Name'], axis=1)

df

Unnamed: 0,Date,nifty50_totalReturnsIndex,nifty50_relative_percentage_change,niftyBank_totalReturnsIndex,niftyBank_relative_percentage_change,niftyIT_totalReturnsIndex,niftyIT_relative_percentage_change
0,02 May 2014,8727.93,0.000000,16575.34,0.000000,10483.14,0.000000
1,05 May 2014,8733.92,0.068630,16625.25,0.301110,10375.15,-1.030130
2,06 May 2014,8754.69,0.306602,16735.48,0.966134,10325.79,-1.500982
3,07 May 2014,8672.88,-0.630734,16699.52,0.749185,10063.57,-4.002331
4,08 May 2014,8682.37,-0.522002,16827.53,1.521477,10082.73,-3.819562
...,...,...,...,...,...,...,...
2311,04 Sep 2023,28647.16,228.223989,61132.07,268.813370,43579.42,315.709606
2312,05 Sep 2023,28714.78,228.998743,61068.84,268.431899,43852.28,318.312452
2313,06 Sep 2023,28767.78,229.605989,60900.04,267.413519,43780.45,317.627257
2314,07 Sep 2023,28937.96,231.555821,61543.56,271.295913,43964.79,319.385699


## Data Visualization Component

In [46]:
# Use plotly express to create line plots of the relative percentage change in the three index funds over time, on a single chart.

# plotly express line plot
fig = px.line(df, x="Date", y=["nifty50_relative_percentage_change", "niftyBank_relative_percentage_change", "niftyIT_relative_percentage_change"], title='Relative Percentage Change in Index Funds over Time')
fig.show()