In [1]:
%%time
# Underlying details extracted from nse
# Program takes 5 mins

# STATUS: Completed

import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup as bs4
import json
import datetime as datetime
import sys

#....        Initializations       ....
#......................................

load_from_disk = True   # For underlying symbol list


#......  Error catch in list comprehension  ...

def catch(func, handle=lambda e : e, *args, **kwargs):
    '''List comprehension error catcher'''
    try:
        return func(*args, **kwargs)
    except Exception as e:
        pass

#.....    get / make the symbols list  .....

if load_from_disk:
    try:
        symbols = list(pd.read_csv('./zdata/nse_eq_symbols.csv'))
    except FileNotFoundError as e:
        print('csv file not found')
        sys.exit(0)

#.....    get the underlying symbols   .....

def get_underlying(symbol):
    
    '''Gets the underlying data
    Arg: (symbol as string)
    Returns: df as dataframe'''

    # URLs
    url_base1 = "https://www.nseindia.com/live_market/dynaContent/live_watch/"
    url = url_base1 + "get_quote/GetQuote.jsp?symbol="+symbol

    page = requests.get(url).text

    # soup out the json dict
    bs_nse = bs4(page, 'html.parser')
    json_nse = json.loads(bs_nse.find(id='responseDiv').text.strip())

    df = pd.DataFrame.from_dict(json_nse['data'][0], orient='index').T
    
    return df

nse_underlyings = [catch(lambda: get_underlying(symbol)) for symbol in symbols]

# indexed by symbols
df_nse_und = pd.concat(nse_underlyings).set_index('symbol')

# clean up the commas, dashes and empty strs
df_nse_und1 = df_nse_und.replace(',|-' , '', regex=True)
df_nse_und1 = df_nse_und1.replace('', np.nan)

# convert date columns to datetime.date format
filtin = ['Date', 'dt']
filtout = ['isExDateFlag']
date_columns_mask = df_nse_und1.columns.str.contains('|'.join(filtin)) | (df_nse_und1.columns.str.contains('|'.join(filtout)))
dt_col_list = df_nse_und1.loc[:, date_columns_mask].columns.tolist()
dt_col_list = [item for item in dt_col_list if item not in filtout]  #filterout

df_nse_und1.loc[:, dt_col_list] = df_nse_und1.loc [:, dt_col_list].apply( \
                                  pd.to_datetime, errors= 'coerce').applymap(pd.Timestamp.date)

dict_df = df_nse_und1.to_dict(orient='index')  # dictionary based on index of symbols

# store data for next program
df_nse_und1.to_pickle('./zdata/underlying_df.pkl')

Wall time: 3min 52s


In [96]:
# ignored because dataframe pickle occupies lesser space!
# import pickle
# with open('./zdata/underlying_dict.pkl', 'wb') as handle:
#     pickle.dump(dict_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [2]:
df_nse_und1.head()

Unnamed: 0_level_0,pricebandupper,applicableMargin,bcEndDate,totalSellQuantity,adhocMargin,companyName,marketType,exDate,bcStartDate,css_status_desc,...,buyQuantity3,buyQuantity2,buyQuantity1,series,faceValue,buyQuantity5,closePrice,open,isinCode,lastPrice
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
DHFL,260.9,55.88,2018-06-27,20565,14.0,Dewan Housing Finance Corporation Limited,N,2018-06-19,2018-06-21,Listed,...,,,,EQ,10.0,,238.9,239.1,INE202B01012,237.5
AUROPHARMA,852.15,13.61,0001-01-01,2177,,Aurobindo Pharma Limited,N,2018-11-20,0001-01-01,Listed,...,,,,EQ,1.0,,783.1,779.8,INE406A01037,782.0
DALMIABHA,2338.6,12.5,2017-09-20,5,,Dalmia Bharat Limited,N,2017-09-12,2017-09-14,Listed,...,,,,EQ,2.0,,2123.05,2147.9,INE439L01019,2134.9
JISLJALEQS,80.9,19.53,2018-09-28,4437,4.0,Jain Irrigation Systems Limited,N,2018-09-12,2018-09-17,Listed,...,,,,EQ,2.0,,73.6,73.85,INE175A01038,73.5
HEXAWARE,347.25,14.58,0001-01-01,926,,Hexaware Technologies Limited,N,2018-11-02,0001-01-01,Listed,...,,,,EQ,2.0,,318.15,316.0,INE093A01033,318.0
