In [1]:
%%time
# Underlying details extracted from nse
# Program takes 5 mins

# STATUS: Completed

import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup as bs4
import json
import datetime as datetime
import sys

#******         Paths and variables         ****
#_______________________________________________

datapath = r'./zdata/'


#******   Error catch in list comprehension  ****
#________________________________________________

def catch(func, handle=lambda e : e, *args, **kwargs):
    '''List comprehension error catcher'''
    try:
        return func(*args, **kwargs)
    except Exception as e:
        pass

Wall time: 4.44 s


In [2]:
#******               Symbols list          ****
#________________________________________________
eq_symbols = list(pd.read_pickle(datapath+'df_nse_eq_symbols.pkl').localSymbol.values)

idx_symbols = list(pd.read_pickle(datapath+'df_nse_idx_symbols.pkl').localSymbol.values)

symbols = eq_symbols + idx_symbols

In [3]:
#******     Underlying data extraction     *****
#_______________________________________________

def get_underlying(symbol):
    
    '''Gets the underlying data
    Arg: (symbol as string)
    Returns: df as dataframe'''

    # URLs
    url_base1 = "https://www.nseindia.com/live_market/dynaContent/live_watch/"
    url = url_base1 + "get_quote/GetQuote.jsp?symbol="+symbol

    page = requests.get(url).text

    # soup out the json dict
    bs_nse = bs4(page, 'html.parser')
    json_nse = json.loads(bs_nse.find(id='responseDiv').text.strip())

    df = pd.DataFrame.from_dict(json_nse['data'][0], orient='index').T
    
    return df

nse_underlyings = [catch(lambda: get_underlying(symbol)) for symbol in symbols]

# indexed by symbols
df_nse_und = pd.concat(nse_underlyings).set_index('symbol')

# clean up the commas, dashes and empty strs
df_nse_und1 = df_nse_und.replace(',|-' , '', regex=True)
df_nse_und1 = df_nse_und1.replace('', np.nan)

# convert date columns to datetime.date format
filtin = ['Date', 'dt']
filtout = ['isExDateFlag']
date_columns_mask = df_nse_und1.columns.str.contains('|'.join(filtin)) | (df_nse_und1.columns.str.contains('|'.join(filtout)))
dt_col_list = df_nse_und1.loc[:, date_columns_mask].columns.tolist()
dt_col_list = [item for item in dt_col_list if item not in filtout]  #filterout

df_nse_und1.loc[:, dt_col_list] = df_nse_und1.loc [:, dt_col_list].apply( \
                                  pd.to_datetime, errors= 'coerce').applymap(pd.Timestamp.date)

# store data for next program
df_nse_und1.to_pickle(datapath+r'df_underlying.pkl')

In [11]:
df_nse_und1.reset_index().symbol.unique()

array(['ACC', 'ADANIENT', 'ADANIPORTS', 'ADANIPOWER', 'AJANTPHARM',
       'ALBK', 'AMARAJABAT', 'AMBUJACEM', 'APOLLOHOSP', 'APOLLOTYRE',
       'ARVIND', 'ASHOKLEY', 'ASIANPAINT', 'AUROPHARMA', 'AXISBANK',
       'BAJAJ-AUTO', 'BAJAJFINSV', 'BAJFINANCE', 'BALKRISIND',
       'BANKBARODA', 'BANKINDIA', 'BATAINDIA', 'BEL', 'BEML',
       'BERGEPAINT', 'BHARATFIN', 'BHARATFORG', 'BHARTIARTL', 'BHEL',
       'BIOCON', 'BOSCHLTD', 'BPCL', 'BRITANNIA', 'CADILAHC', 'CANBK',
       'CANFINHOME', 'CAPF', 'CASTROLIND', 'CEATLTD', 'CENTURYTEX',
       'CESC', 'CGPOWER', 'CHENNPETRO', 'CHOLAFIN', 'CIPLA', 'COALINDIA',
       'COLPAL', 'CONCOR', 'CUMMINSIND', 'DABUR', 'DALMIABHA', 'DCBBANK',
       'DHFL', 'DISHTV', 'DIVISLAB', 'DLF', 'DRREDDY', 'EICHERMOT',
       'ENGINERSIN', 'EQUITAS', 'ESCORTS', 'EXIDEIND', 'FEDERALBNK',
       'GAIL', 'GLENMARK', 'GMRINFRA', 'GODFRYPHLP', 'GODREJCP',
       'GODREJIND', 'GRANULES', 'GRASIM', 'GSFC', 'HAVELLS', 'HCC',
       'HCLTECH', 'HDFC', 'HDFCBANK', 'HE

In [13]:
df_nse_und1

Unnamed: 0_level_0,pricebandupper,applicableMargin,bcEndDate,totalSellQuantity,adhocMargin,companyName,marketType,exDate,bcStartDate,css_status_desc,...,buyQuantity3,buyQuantity2,buyQuantity1,series,faceValue,buyQuantity5,closePrice,open,isinCode,lastPrice
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ACC,1651.50,12.50,2018-05-25,10640,,ACC Limited,N,2018-05-17,2018-05-21,Listed,...,,,,EQ,10.00,,1501.65,1502.60,INE012A01025,1489.90
ADANIENT,177.55,15.13,0001-01-01,2,,Adani Enterprises Limited,N,2018-09-06,0001-01-01,Listed,...,,,,EQ,1.00,,162.10,162.40,INE423A01024,162.00
ADANIPORTS,382.15,14.52,2018-08-06,,,Adani Ports and Special Economic Zone Limited,N,2018-07-26,2018-07-30,Listed,...,,,35,EQ,2.00,,350.55,348.20,INE742F01042,351.00
ADANIPOWER,52.30,35.02,2018-08-06,,8.02,Adani Power Limited,N,2018-07-26,2018-07-30,Listed,...,,,153443,EQ,10.00,,47.40,47.55,INE814H01011,48.00
AJANTPHARM,1231.55,12.69,0001-01-01,,,Ajanta Pharma Limited,N,2018-11-09,0001-01-01,Listed,...,,,2485,EQ,2.00,,1136.55,1120.00,INE031B01049,1143.75
ALBK,47.65,20.87,2018-06-27,,,Allahabad Bank,N,2018-06-19,2018-06-21,Listed,...,,,55739,EQ,10.00,,43.55,43.40,INE428A01015,44.00
AMARAJABAT,807.10,12.50,0001-01-01,,,Amara Raja Batteries Limited,N,2018-11-20,0001-01-01,Listed,...,,,298,EQ,1.00,,732.55,737.90,INE885A01032,732.15
AMBUJACEM,238.95,12.50,2018-04-13,91,,Ambuja Cements Limited,N,2018-04-05,2018-04-09,Listed,...,,,,EQ,2.00,,220.40,218.00,INE079A01024,221.00
APOLLOHOSP,1356.85,14.73,2018-09-27,741,,Apollo Hospitals Enterprise Limited,N,2018-09-12,2018-09-15,Listed,...,,,,EQ,5.00,,1225.05,1235.00,INE437A01024,1225.50
APOLLOTYRE,248.85,14.37,2018-08-01,,,Apollo Tyres Limited,N,2018-07-18,2018-07-20,Listed,...,,,512,EQ,1.00,,229.15,226.60,INE438A01022,229.50
