In [34]:
%%time
# Underlying details extracted from nse
# Program takes 5 mins

import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup as bs4
import json
import datetime as datetime
import sys

#....        Initializations       ....
#......................................

load_from_disk = True   # For underlying symbol list


#......  Error catch in list comprehension  ...

def catch(func, handle=lambda e : e, *args, **kwargs):
    '''List comprehension error catcher'''
    try:
        return func(*args, **kwargs)
    except Exception as e:
        pass

#.....    get / make the symbols list  .....

if load_from_disk:
    try:
        symbols = list(pd.read_csv('./zdata/nse_equity_symbols.csv'))
    except FileNotFoundError as e:
        print('csv file not found')
        sys.exit(0)

#.....    get the underlying symbols   .....

def get_underlying(symbol):
    
    '''Gets the underlying data
    Arg: (symbol as string)
    Returns: df as dataframe'''

    # URLs
    url_base1 = "https://www.nseindia.com/live_market/dynaContent/live_watch/"
    url = url_base1 + "get_quote/GetQuote.jsp?symbol="+symbol

    page = requests.get(url).text

    # soup out the json dict
    bs_nse = bs4(page, 'html.parser')
    json_nse = json.loads(bs_nse.find(id='responseDiv').text.strip())

    df = pd.DataFrame.from_dict(json_nse['data'][0], orient='index').T
    
    return df

nse_underlyings = [catch(lambda: get_underlying(symbol)) for symbol in symbols]

# indexed by symbols
df_nse_und = pd.concat(nse_underlyings).set_index('symbol')

# clean up the commas, dashes and empty strs
df_nse_und1 = df_nse_und.replace(',|-' , '', regex=True)
df_nse_und1 = df_nse_und1.replace('', np.nan)

# convert date columns to datetime.date format
filtin = ['Date', 'dt']
filtout = ['isExDateFlag']
date_columns_mask = df_nse_und1.columns.str.contains('|'.join(filtin)) | (df_nse_und1.columns.str.contains('|'.join(filtout)))
dt_col_list = df_nse_und1.loc[:, date_columns_mask].columns.tolist()
dt_col_list = [item for item in dt_col_list if item not in filtout]  #filterout

df_nse_und1.loc[:, dt_col_list] = df_nse_und1.loc [:, dt_col_list].apply( \
                                  pd.to_datetime, errors= 'coerce').applymap(pd.Timestamp.date)

dict_df = df_nse_und1.to_dict(orient='index')  # dictionary based on index of symbols

# store data for next program
df_nse_und1.to_pickle('./zdata/underlying_df.pkl')

Wall time: 5min 56s


In [96]:
# ignored because dataframe pickle occupies lesser space!
# import pickle
# with open('./zdata/underlying_dict.pkl', 'wb') as handle:
#     pickle.dump(dict_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [35]:
df_nse_und1

Unnamed: 0_level_0,pricebandupper,applicableMargin,bcEndDate,totalSellQuantity,adhocMargin,companyName,marketType,exDate,bcStartDate,css_status_desc,...,buyQuantity3,buyQuantity2,buyQuantity1,series,faceValue,buyQuantity5,closePrice,open,isinCode,lastPrice
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ACC,1680.65,12.59,2018-05-25,269,,ACC Limited,N,2018-05-17,2018-05-21,Listed,...,,,,EQ,10.00,,1478.60,1515.00,INE012A01025,1470.15
ADANIENT,146.20,19.54,0001-01-01,4334,,Adani Enterprises Limited,N,2018-09-06,0001-01-01,Listed,...,,,,EQ,1.00,,130.20,131.55,INE423A01024,129.35
ADANIPORTS,348.75,13.29,2018-08-06,4186,,Adani Ports and Special Economic Zone Limited,N,2018-07-26,2018-07-30,Listed,...,,,,EQ,2.00,,301.80,317.10,INE742F01042,298.00
ADANIPOWER,26.75,24.33,2018-08-06,,,Adani Power Limited,N,2018-07-26,2018-07-30,Listed,...,,,29314,EQ,10.00,,23.45,24.30,INE814H01011,23.70
AJANTPHARM,1105.85,13.87,2018-07-05,67,,Ajanta Pharma Limited,N,2018-06-26,2018-06-28,Listed,...,,,,EQ,2.00,,1019.50,1004.00,INE031B01049,1023.00
ALBK,39.65,13.42,2018-06-27,,,Allahabad Bank,N,2018-06-19,2018-06-21,Listed,...,,,6374,EQ,10.00,,35.35,35.60,INE428A01015,35.15
AMARAJABAT,802.15,12.78,2018-08-11,511,,Amara Raja Batteries Limited,N,2018-08-02,2018-08-06,Listed,...,,,,EQ,1.00,,719.35,733.65,INE885A01032,714.95
AMBUJACEM,235.05,12.83,2018-04-13,1849,,Ambuja Cements Limited,N,2018-04-05,2018-04-09,Listed,...,,,,EQ,2.00,,208.45,213.00,INE079A01024,208.25
ANDHRABANK,32.80,16.37,2018-07-09,8134,,Andhra Bank,N,2018-06-29,2018-07-03,Listed,...,,,,EQ,10.00,,26.85,27.60,INE434A01013,26.70
APOLLOHOSP,1151.90,13.68,2018-09-27,,,Apollo Hospitals Enterprise Limited,N,2018-09-12,2018-09-15,Listed,...,,,1973,EQ,5.00,,1057.20,1039.70,INE437A01024,1065.30
