In [1]:
%%time

# Expiry, symbol and option info extracted from NSE and pickled
# Takes 4 minutes

# STATUS: Completed

import requests
import lxml.html as lh
import datetime as dt
import pandas as pd
import numpy as np
import csv
import os

#******         Paths and variables         ****
#_______________________________________________

datapath = r'./zdata/'

#******   Error catch in list comprehension  ****
#________________________________________________

def catch(func, handle=lambda e : e, *args, **kwargs):
    '''List comprehension error catcher'''
    try:
        return func(*args, **kwargs)
    except Exception as e:
        pass

#******               Symbols list          ****
#________________________________________________

# copy and paste table from #
# https://www.nseindia.com/products/content/derivatives/equities/fo_underlying_home.htm
# Steps:
# 1. copy and paste to spreadsheet
# 2. copy the 'Symbol' column to notepad++. The first row should be 'Symbol'
# 3. Delete index symbols
# 4. Store the file as raw_equity.csv

eq_symbols = pd.read_csv(datapath+r'_raw_equity.csv')['Symbol'].tolist()

idx_symbols=['NIFTY', 'BANKNIFTY']

#****         Get Options and Expiries      ****
#_______________________________________________

#......      Data Limiter   .......
symbols = eq_symbols+idx_symbols
#..................................

# generic url for equity and index options
url_base = "https://nseindia.com/live_market/dynaContent/live_watch/option_chain/optionKeys.jsp?symbol="
url_end = '&date='

def get_opt_data(symbol):
    '''Gets the options data
    Arg: (symbol) as str
    Returns: scraped dataframe'''
    
#     symbol = 'NIFTY'
    url = url_base + symbol
    
    while_flag = True   # Flag to loop over expiries for a symbol
    while_counter = 0   # Counter for expiry
    expiry = ''         # Initialize expiry for first run
    df = pd.DataFrame() # Initialize return dataframe

    while while_flag:

        # If it is not first run, append next expiry
        if expiry != '':  # This is not the first run
            try:
                expiry = expiry[while_counter]
                url = url_base+symbol+url_end+expiry  # Check to see if this works
            except IndexError:
                while_flag = True   # Get out of the while loop
                break

        # Get raw data from the url
        page = requests.get(url)
        doc = lh.fromstring(page.content)

        #...    Get the Expiries ...#
        #............................

        the_path= "//form"   # Get the form content. There are two forms
        expiries = doc.xpath(the_path)[0].text_content()
        strip_chars = [ord('\n'), ord('\xa0'), ord('\t'), ord('\r')] # characters to be stripped
        char_table = {s: ' ' for s in strip_chars} # table for translate to locate the chars

        sym_exp = expiries.translate(char_table).split() # split converts the translated dict into list

        # Get the expiries only, from the table
        expiry = [sym_exp[k+i] 
         for k, v in enumerate(sym_exp) 
         if v in 'Select' 
         for i in range(len(sym_exp) - k)][1:]

        # Convert expiry to datetime
        expiry_dt = [dt.datetime.strptime(date, "%d%b%Y").date() for date in expiry]

        #...   Get the Options Data .... #
        #.................................

        tbl_path = "//*[@id='octable']"  # xpath for the table

        # The headers
        opt_head = ['cOI', 'cChnginOI', 'cVolume', 'cIV', 'cLTP', 
                    'cNetChng', 'cBidQty', 'cBidPrice', 'cAskPrice', 'cAskQty', 'Strike', 
                    'pBidQty', 'pBidPrice', 'pAskPrice', 'pAskQty', 'pNetChng', 'pLTP', 
                    'pIV', 'pVolume', 'pChnginOI', 'pOI']

#         # Extract the rows within the header in the table
#         opt_data = [[tr.text_content().strip() 
#                     for table in doc.xpath(tbl_path)] 
#                     for th in table.xpath('//thead') 
#                     for tr in th.xpath('//tr')][5:]  # First 5 rows are junk

#         df1 = pd.DataFrame(data=[a.translate(char_table).split() 
#                                     for d in opt_data for a in d], columns=opt_head)

        opt_data = [tr.text_content() for table in doc.xpath(tbl_path) for tr in table[1:]]

        df1 = pd.DataFrame([d.split() for d in opt_data], columns=opt_head)

        df2 = df1.replace(',', '', regex=True) # Remove comma from numbers
        df2 = df2.apply(pd.to_numeric, errors='coerce') # Convert to numeric
        df2.insert(0, 'Expiry', expiry_dt[while_counter]) # Insert the Expiry column
        df2.insert(0, 'Symbol', symbol) # Insert the Symbol

        # Rearrange the columns
        cols_beginning = ['Symbol', 'Expiry', 'Strike']
        df2 = df2[cols_beginning + [c for c in df2 if c not in cols_beginning]]
        while_counter = while_counter + 1
        df = df.append(df2)
        return df

# %%time
nse_options = [catch(lambda: get_opt_data(symbol)) for symbol in symbols]

df_options = pd.concat(nse_options).reset_index(drop=True)

#****        Closing tasks     ****
#___________________________________

# write list of successful symbols to a csv file - for the next program

def write_csv(the_list, filenm):
    '''Writes to the file as a csv
    Args: 
       (the_list) as list
       (filenm) as the name of file, with or without extension
    Returns: None'''
    
    # Check if file_name has an extension
    filenm = filenm.split('.')[0]
    
    if os.path.exists(r'./zdata/'+filenm+r'.csv'):
        print(filenm+'.csv'+' exists and is overwritten!')
    
    with open(datapath+filenm+r'.csv', 'w', newline='') as csvfile:
        wr = csv.writer(csvfile)
        wr.writerow(the_list)

good_idx = list(set(list(df_options.Symbol.unique())) & set(idx_symbols))
good_equity = list(set(list(df_options.Symbol.unique())) & set(eq_symbols))

write_csv(filenm='nse_idx_symbols', the_list=good_idx)
write_csv(filenm='nse_eq_symbols', the_list=good_equity)

# write to pickle file - for the next program   
df_options.to_pickle('./zdata/df_nse_options.pkl')   # Pickle the dataframe for later use

nse_idx_symbols.csv exists and is overwritten!
nse_eq_symbols.csv exists and is overwritten!
Wall time: 4min 53s


In [2]:
df_options

Unnamed: 0,Symbol,Expiry,Strike,cOI,cChnginOI,cVolume,cIV,cLTP,cNetChng,cBidQty,...,pBidQty,pBidPrice,pAskPrice,pAskQty,pNetChng,pLTP,pIV,pVolume,pChnginOI,pOI
0,ACC,2018-11-29,1140.0,,,,,,,,...,,,,,,,,,,
1,ACC,2018-11-29,1200.0,,,,,,,,...,2400.0,0.05,1.00,400.0,,0.05,,,,2400.0
2,ACC,2018-11-29,1220.0,,,,,,,,...,2800.0,0.10,,,,1.25,,,,800.0
3,ACC,2018-11-29,1240.0,,,,,,,,...,2400.0,0.10,,,,1.75,,,,800.0
4,ACC,2018-11-29,1260.0,400.0,,,,209.60,,,...,400.0,1.00,1.10,2000.0,,1.60,,,,10800.0
5,ACC,2018-11-29,1280.0,2800.0,,,,99.75,,800.0,...,400.0,1.00,,,,2.60,,,,3600.0
6,ACC,2018-11-29,1300.0,6800.0,,,,167.30,,1600.0,...,400.0,1.10,1.85,400.0,-0.25,1.00,40.36,4.0,-1200.0,27200.0
7,ACC,2018-11-29,1320.0,400.0,,,,84.00,,1600.0,...,1600.0,0.50,,,,3.90,,,,5200.0
8,ACC,2018-11-29,1340.0,2000.0,,,,80.75,,2400.0,...,400.0,0.35,2.00,800.0,-0.10,1.30,34.88,2.0,-400.0,8000.0
9,ACC,2018-11-29,1360.0,6000.0,,,,150.85,,800.0,...,400.0,0.80,3.80,400.0,2.10,3.80,39.22,2.0,-400.0,9200.0
