In [1]:
%%time

# Expiry, symbol and option info extracted from NSE and pickled

# STATUS: Complete
# Runtime: 20 mins

import lxml.html as lh
import datetime as dt
import pandas as pd
import numpy as np
import time

import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry


session = requests.Session()
retry = Retry(connect=3, backoff_factor=0.5)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)

#******         Paths and variables         ****
#_______________________________________________

datapath = r'./zdata/'

#******   Error catch in list comprehension  ****
#________________________________________________

def catch(func, handle=lambda e : e, *args, **kwargs):
    '''List comprehension error catcher'''
    try:
        return func(*args, **kwargs)
    except Exception as e:
        pass

#******               Symbols list          ****
#________________________________________________
eq_symbols = list(pd.read_pickle(datapath+'df_nse_eq_symbols.pkl').nseSymbol.values)

idx_symbols = list(pd.read_pickle(datapath+'df_nse_idx_symbols.pkl').nseSymbol.values)

symbols = eq_symbols + idx_symbols

# generic url for equity and index options
url_base = "https://nseindia.com/live_market/dynaContent/live_watch/option_chain/optionKeys.jsp?symbol="
url_end = '&date='

the_path= "//form"   # Path has the form contents. There are two forms
tbl_path = "//*[@id='octable']"  # xpath for the table

def get_expiry(symbol):
    '''Get expiry dates for the symbol
    Arg: (symbol) as string
    Returns: a list of expiries'''
    
    url = url_base + symbol
    
    page = session.get(url)
    doc = lh.fromstring(page.content)
    
    expiries = doc.xpath(the_path)[0].text_content()
    strip_chars = [ord('\n'), ord('\xa0'), ord('\t'), ord('\r')] # characters to be stripped
    char_table = {s: ' ' for s in strip_chars} # table for translate to locate the chars

    sym_exp = expiries.translate(char_table).split() # split converts the translated dict into list

    # Get the expiries only, from the table
    expiry_list = [sym_exp[k+i] 
     for k, v in enumerate(sym_exp) 
     if v in 'Select' 
     for i in range(len(sym_exp) - k)][1:]
    
    return expiry_list

def get_opt_chains(symbol, expiry):
    ''' Gets the option chains for symbol and expiry from nse webpage
        Args: (symbol) = str
              (expiry) = str in 27DEC2018 form
        Returns: option chain dataframe'''
    
    df2 = pd.DataFrame()  # Initialize a return dataframe
    
    # Make the URL
    url = url_base + symbol + url_end + expiry
    
    # Get the requests
    for i in range(1,5):
        while True:
            try:
                page = session.get(url)
            except TimeoutError:
                print('NSE Connection Exception ', i)
                time.sleep(0.2)
                continue
            break
            
    doc = lh.fromstring(page.content)
    
    # The headers
    opt_head = ['cOI', 'cChnginOI', 'cVolume', 'cIV', 'cLTP', 
                'cNetChng', 'cBidQty', 'cBidPrice', 'cAskPrice', 'cAskQty', 'Strike', 
                'pBidQty', 'pBidPrice', 'pAskPrice', 'pAskQty', 'pNetChng', 'pLTP', 
                'pIV', 'pVolume', 'pChnginOI', 'pOI']

    opt_data = [tr.text_content() for table in doc.xpath(tbl_path) for tr in table[1:]]
    
    # return empty dataframe if no contracts
    if opt_data != ['No contracts traded today']:

        df1 = pd.DataFrame([d.split() for d in opt_data], columns=opt_head)

        df2 = df1.replace(',', '', regex=True) # Remove comma from numbers
        df2 = df2.apply(pd.to_numeric, errors='coerce') # Convert to numeric
        df2.insert(0, 'Expiry', expiry) # Insert the Expiry column
        df2.insert(0, 'nseSymbol', symbol) # Insert the Symbol

        # Get the underlying's price
        und_price = float(str(pd.read_html(page.text)[0][1]).split(' ')[7])
        df2.insert(loc=0, column='undPrice', value=und_price)

        # Rearrange the columns
        cols_beginning = ['nseSymbol', 'Expiry', 'Strike', 'undPrice']
        df2 = df2[cols_beginning + [c for c in df2 if c not in cols_beginning]]
        
    return df2

# Get the option chain dataframe
sym_exp = [(s, e) for s in symbols for e in get_expiry(s)]

df_list = [catch(lambda: get_opt_chains(*i)) for i in sym_exp]

df = pd.concat(df_list).reset_index(drop=True)

#****        Closing tasks     ****
#___________________________________

# write to pickle file - for the next program   
df.to_pickle('./zdata/df_nse_options.pkl')   # Pickle the dataframe for later use

Wall time: 19min 46s
