In [2]:
%%time

# Expiry, symbol and option info extracted from NSE and pickled
# Takes 4 minutes

# STATUS: Complete

import requests
import lxml.html as lh
import datetime as dt
import pandas as pd
import numpy as np
import csv
import os

#******         Paths and variables         ****
#_______________________________________________

datapath = r'./zdata/'

#******   Error catch in list comprehension  ****
#________________________________________________

def catch(func, handle=lambda e : e, *args, **kwargs):
    '''List comprehension error catcher'''
    try:
        return func(*args, **kwargs)
    except Exception as e:
        pass

#******               Symbols list          ****
#________________________________________________
eq_symbols = list(pd.read_pickle(datapath+'df_nse_eq_symbols.pkl').localSymbol.values)

idx_symbols = list(pd.read_pickle(datapath+'df_nse_idx_symbols.pkl').localSymbol.values)

symbols = eq_symbols + idx_symbols

#****         Get Options and Expiries      ****
#_______________________________________________

# generic url for equity and index options
url_base = "https://nseindia.com/live_market/dynaContent/live_watch/option_chain/optionKeys.jsp?symbol="
url_end = '&date='

def get_opt_data(symbol):
    '''Gets the options data
    Arg: (symbol) as str
    Returns: scraped dataframe'''
    
#     symbol = 'NIFTY'
    url = url_base + symbol
    
    while_flag = True   # Flag to loop over expiries for a symbol
    while_counter = 0   # Counter for expiry
    expiry = ''         # Initialize expiry for first run
    df = pd.DataFrame() # Initialize return dataframe

    while while_flag:

        # If it is not first run, append next expiry
        if expiry != '':  # This is not the first run
            try:
                expiry = expiry[while_counter]
                url = url_base+symbol+url_end+expiry  # Check to see if this works
            except IndexError:
                while_flag = True   # Get out of the while loop
                break

        # Get raw data from the url
        page = requests.get(url)
        doc = lh.fromstring(page.content)

        #...    Get the Expiries ...#
        #............................

        the_path= "//form"   # Get the form content. There are two forms
        expiries = doc.xpath(the_path)[0].text_content()
        strip_chars = [ord('\n'), ord('\xa0'), ord('\t'), ord('\r')] # characters to be stripped
        char_table = {s: ' ' for s in strip_chars} # table for translate to locate the chars

        sym_exp = expiries.translate(char_table).split() # split converts the translated dict into list

        # Get the expiries only, from the table
        expiry = [sym_exp[k+i] 
         for k, v in enumerate(sym_exp) 
         if v in 'Select' 
         for i in range(len(sym_exp) - k)][1:]

        # Convert expiry to datetime
        expiry_dt = [dt.datetime.strptime(date, "%d%b%Y").date() for date in expiry]

        #...   Get the Options Data .... #
        #.................................

        tbl_path = "//*[@id='octable']"  # xpath for the table

        # The headers
        opt_head = ['cOI', 'cChnginOI', 'cVolume', 'cIV', 'cLTP', 
                    'cNetChng', 'cBidQty', 'cBidPrice', 'cAskPrice', 'cAskQty', 'Strike', 
                    'pBidQty', 'pBidPrice', 'pAskPrice', 'pAskQty', 'pNetChng', 'pLTP', 
                    'pIV', 'pVolume', 'pChnginOI', 'pOI']

#         # Extract the rows within the header in the table
#         opt_data = [[tr.text_content().strip() 
#                     for table in doc.xpath(tbl_path)] 
#                     for th in table.xpath('//thead') 
#                     for tr in th.xpath('//tr')][5:]  # First 5 rows are junk

#         df1 = pd.DataFrame(data=[a.translate(char_table).split() 
#                                     for d in opt_data for a in d], columns=opt_head)

        opt_data = [tr.text_content() for table in doc.xpath(tbl_path) for tr in table[1:]]

        df1 = pd.DataFrame([d.split() for d in opt_data], columns=opt_head)

        df2 = df1.replace(',', '', regex=True) # Remove comma from numbers
        df2 = df2.apply(pd.to_numeric, errors='coerce') # Convert to numeric
        df2.insert(0, 'Expiry', expiry_dt[while_counter]) # Insert the Expiry column
        df2.insert(0, 'Symbol', symbol) # Insert the Symbol

        # Rearrange the columns
        cols_beginning = ['Symbol', 'Expiry', 'Strike']
        df2 = df2[cols_beginning + [c for c in df2 if c not in cols_beginning]]
        while_counter = while_counter + 1
        df = df.append(df2)
        return df

# %%time
nse_options = [catch(lambda: get_opt_data(symbol)) for symbol in symbols]

df_options = pd.concat(nse_options).reset_index(drop=True)

#****        Closing tasks     ****
#___________________________________

# write to pickle file - for the next program   
df_options.to_pickle('./zdata/df_nse_options.pkl')   # Pickle the dataframe for later use

Wall time: 18min 11s


In [3]:
len(df_options.Symbol.unique())

199

In [4]:
len(df_options)

6861