### Import packages

In [1]:
import yfinance as yf
import pandas as pd
pd.set_option('float_format', '{:.2f}'.format)

### Import the list of Companies from NSE India website

In [2]:
mcap_class = ['largecap', 'midcap', 'smallcap']

# Import the Nifty50 companies
largecap = pd.read_csv('https://www1.nseindia.com/content/indices/ind_nifty50list.csv')
largecap['MCAP Class'] = 'largecap'

# Import the Nifty50  Midcap companies
midcap = pd.read_csv('https://archives.nseindia.com/content/indices/ind_niftymidcap50list.csv')
midcap['MCAP Class'] = 'midcap'

# Import the Nifty50  Smallcap companies
smallcap = pd.read_csv('https://archives.nseindia.com/content/indices/ind_niftysmallcap50list.csv')
smallcap['MCAP Class'] = 'smallcap'

### Populate Market capitalization

In [3]:
def populate_marketcap(df):
  # Find market cap for all the companies to find top 5 companies
  for tick in df.Symbol:
    company = yf.Ticker(tick+'.ns')
    df.loc[(df.Symbol == tick),['MCAP']] = int(company.info["marketCap"])
  df = df.nlargest(5, ['MCAP'])
  return(df)

#### Choose top 5 companies in each marketcap segment 

In [4]:
largecap_top_5 = populate_marketcap(largecap)
midcap_top_5 = populate_marketcap(midcap)
smallcap_top_5 = populate_marketcap(smallcap)

#### List of  top 5 largecap companies

In [5]:
largecap_top_5

Unnamed: 0,Company Name,Industry,Symbol,Series,ISIN Code,MCAP Class,MCAP
36,Reliance Industries Ltd.,OIL & GAS,RELIANCE,EQ,INE002A01018,largecap,17408949485568.0
41,Tata Consultancy Services Ltd.,IT,TCS,EQ,INE467B01029,largecap,13131627626496.0
16,HDFC Bank Ltd.,FINANCIAL SERVICES,HDFCBANK,EQ,INE040A01034,largecap,8589652000768.0
26,Infosys Ltd.,IT,INFY,EQ,INE009A01021,largecap,7595713626112.0
20,Hindustan Unilever Ltd.,CONSUMER GOODS,HINDUNILVR,EQ,INE030A01027,largecap,5690706886656.0


#### List of  top 5 midcap companies

In [6]:
midcap_top_5

Unnamed: 0,Company Name,Industry,Symbol,Series,ISIN Code,MCAP Class,MCAP
32,MindTree Ltd.,IT,MINDTREE,EQ,INE018I01017,midcap,812352798720.0
42,Tata Power Co. Ltd.,POWER,TATAPOWER,EQ,INE245A01021,midcap,772874043392.0
20,Godrej Properties Ltd.,CONSTRUCTION,GODREJPROP,EQ,INE484J01027,midcap,648513388544.0
33,MphasiS Ltd.,IT,MPHASIS,EQ,INE356A01018,midcap,641255669760.0
26,L&T Technology Services Ltd.,IT,LTTS,EQ,INE010V01017,midcap,559459991552.0


#### List of  top 5 smallcap companies

In [7]:
smallcap_top_5

Unnamed: 0,Company Name,Industry,Symbol,Series,ISIN Code,MCAP Class,MCAP
23,H.E.G. Ltd.,INDUSTRIAL MANUFACTURING,HEG,EQ,INE545A01016,smallcap,1016817319936.0
25,IDBI Bank Ltd.,FINANCIAL SERVICES,IDBI,EQ,INE008A01015,smallcap,568802017280.0
29,Indian Overseas Bank,FINANCIAL SERVICES,IOB,EQ,INE565A01014,smallcap,397895532544.0
28,Indian Energy Exchange Ltd.,FINANCIAL SERVICES,IEX,EQ,INE022Q01020,smallcap,243515817984.0
24,Happiest Minds Technologies Ltd.,IT,HAPPSTMNDS,BE,INE419U01012,smallcap,193544716288.0


#### Build the final stock list

In [8]:
final_df = largecap_top_5.append(midcap_top_5,ignore_index=True).append(smallcap_top_5,ignore_index=True)

#### Add the Yahoo Symbol or ticker name in the final_df dataFrame 

In [9]:
final_df['Yahoo Symbol'] = final_df['Symbol'] + '.NS'

### Final Stock list

In [10]:
final_df

Unnamed: 0,Company Name,Industry,Symbol,Series,ISIN Code,MCAP Class,MCAP,Yahoo Symbol
0,Reliance Industries Ltd.,OIL & GAS,RELIANCE,EQ,INE002A01018,largecap,17408949485568.0,RELIANCE.NS
1,Tata Consultancy Services Ltd.,IT,TCS,EQ,INE467B01029,largecap,13131627626496.0,TCS.NS
2,HDFC Bank Ltd.,FINANCIAL SERVICES,HDFCBANK,EQ,INE040A01034,largecap,8589652000768.0,HDFCBANK.NS
3,Infosys Ltd.,IT,INFY,EQ,INE009A01021,largecap,7595713626112.0,INFY.NS
4,Hindustan Unilever Ltd.,CONSUMER GOODS,HINDUNILVR,EQ,INE030A01027,largecap,5690706886656.0,HINDUNILVR.NS
5,MindTree Ltd.,IT,MINDTREE,EQ,INE018I01017,midcap,812352798720.0,MINDTREE.NS
6,Tata Power Co. Ltd.,POWER,TATAPOWER,EQ,INE245A01021,midcap,772874043392.0,TATAPOWER.NS
7,Godrej Properties Ltd.,CONSTRUCTION,GODREJPROP,EQ,INE484J01027,midcap,648513388544.0,GODREJPROP.NS
8,MphasiS Ltd.,IT,MPHASIS,EQ,INE356A01018,midcap,641255669760.0,MPHASIS.NS
9,L&T Technology Services Ltd.,IT,LTTS,EQ,INE010V01017,midcap,559459991552.0,LTTS.NS


In [11]:
# Final ticker list
final_tickers_list = list(final_df['Yahoo Symbol'])
final_tickers_list

['RELIANCE.NS',
 'TCS.NS',
 'HDFCBANK.NS',
 'INFY.NS',
 'HINDUNILVR.NS',
 'MINDTREE.NS',
 'TATAPOWER.NS',
 'GODREJPROP.NS',
 'MPHASIS.NS',
 'LTTS.NS',
 'HEG.NS',
 'IDBI.NS',
 'IOB.NS',
 'IEX.NS',
 'HAPPSTMNDS.NS']

### Saving the final stock list (Make sure you do not overwrite the data)

In [17]:
# Set the data directory
DATADIR = "C:\Soumen\AIML\MSc\StockProject\datasets\stock_price"
EXTN = "csv"

In [18]:
# final_df.to_csv(DATADIR+'\\final_stock_list.csv')

### Saving the stock prices

In [19]:
for ticker in final_tickers_list:
    ticker_data = yf.download(ticker, start="2010-01-01", end="2019-12-31")
    file_name = DATADIR +'\\{}.csv'.format(ticker.rstrip()[:-3])
    
    # Write the DataFrame to CSV file.
#     with open(file_name, 'w') as f:
#         ticker_data.to_csv(f)

C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\RELIANCE.csv
C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\TCS.csv
C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\HDFCBANK.csv
C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\INFY.csv
C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\HINDUNILVR.csv
C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\MINDTREE.csv
C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\TATAPOWER.csv
C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\GODREJPROP.csv
C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\MPHASIS.csv
C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\LTTS.csv
C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\HEG.csv
C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\IDBI.csv
C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\IOB.csv
C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\IEX.csv
C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\HAPPSTMNDS.csv


### Manually download the data for HAPPSTMNDS and CAMS

In [20]:
# ticker_data_HAPPSTMNDS = yf.download('HAPPSTMNDS.NS', start="2020-09-17", end="2021-07-30")
# ticker_data_HAPPSTMNDS.to_csv(r'C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\HAPPSTMNDS.csv')
# ticker_data_HAPPSTMNDS

In [21]:
# ticker_data_CAMS = yf.download('CAMS.NS', start="2020-10-05", end="2021-07-30")
# ticker_data_CAMS.to_csv(r'C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\CAMS.csv')
# ticker_data_CAMS

#### Manually download the Index data: NIFTY 50 (^NSEI)

In [29]:
ticker_data_NIFTY = yf.download('^NSEI', start="2010-01-01", end="2019-12-31")
ticker_data_NIFTY.to_csv(r'C:\Soumen\AIML\MSc\StockProject\datasets\stock_price\NIFTY.csv')
ticker_data_NIFTY

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,5200.90,5238.45,5167.10,5232.20,5232.20,0
2010-01-05,5277.15,5288.35,5242.40,5277.90,5277.90,0
2010-01-06,5278.15,5310.85,5260.05,5281.80,5281.80,0
2010-01-07,5281.80,5302.55,5244.75,5263.10,5263.10,0
2010-01-08,5264.25,5276.75,5234.70,5244.75,5244.75,0
...,...,...,...,...,...,...
2019-12-23,12235.45,12287.15,12213.25,12262.75,12262.75,604800
2019-12-24,12269.25,12283.70,12202.10,12214.55,12214.55,470300
2019-12-26,12211.85,12221.55,12118.85,12126.55,12126.55,520300
2019-12-27,12172.90,12258.45,12157.90,12245.80,12245.80,383800
