In [1]:
# NSE scanner and pickler

# STATUS: Completed
# Run-time: 5 hours

#***          Start ib_insync (run once)       *****
#_______________________________________________

from ib_insync import *
util.startLoop()
ib = IB().connect('127.0.0.1', 3000, clientId=1)

In [None]:
%%time
import pandas as pd
import numpy as np
import itertools
import datetime
from math import sqrt, exp, log, erf

#... assignments
exchange = 'NSE'
fspath = './zdata/'

putsigma = 1.5
callsigma = 2
maxdte = 65  # max expiry date for options

tradingdays = 252

#... Error catching for list comprehension
#_________________________________________

def catch(func, handle=lambda e : e, *args, **kwargs):
    '''List comprehension error catcher
    Args: 
        (func) as the function
         (handle) as the lambda of function
         <*args | *kwargs> as arguments to the functions
    Outputs:
        output of the function | <np.nan> on error
    Usage:
        eggs = [1,3,0,3,2]
        [catch(lambda: 1/egg) for egg in eggs]'''
    try:
        return func(*args, **kwargs)
    except Exception as e:
        np.nan

#... build underlying contracts
#______________________________

# from 5paisa
paisaurl = "https://www.5paisa.com/5pit/spma.asp"
df_paisa = pd.read_html(paisaurl, header=0)[1].drop_duplicates(subset='Symbol')

# Rename Symbol and Margin fields
df_paisa = df_paisa.rename(columns={'Symbol': 'nseSymbol', 'TotMgn%': 'marginpct', 'Mlot': 'lot'})

# Convert columns to numeric and make margin to pct
df_paisa = df_paisa.apply(pd.to_numeric, errors='ignore')
df_paisa.marginpct = df_paisa.marginpct.div(100)

# Truncate to 9 characters for ibSymbol
df_paisa['ibSymbol'] = df_paisa.nseSymbol.str.slice(0,9)

# nseSymbol to ibSymbol dictionary for conversion
ntoi = {'M&M': 'MM', 'M&MFIN': 'MMFIN', 'L&TFH': 'LTFH', 'NIFTY': 'NIFTY50'}

# remap ibSymbol, based on the dictionary
df_paisa.ibSymbol = df_paisa.ibSymbol.replace(ntoi)

# separate indexes and equities, eliminate discards from df_paisa
indexes = ['NIFTY50', 'BANKNIFTY']
discards = ['NIFTYMID5', 'NIFTYIT', 'LUPIN']
equities = sorted([s for s in df_paisa.ibSymbol if s not in indexes+discards])

symbols = equities+indexes

c = [Stock(s, exchange) if s in equities else Index(s, exchange) for s in symbols]
contracts = ib.qualifyContracts(*c)

#############################################
# contracts = [c for c in contracts if c.symbol in ['ACC', 'NIFTY50']] # DATA LIMITER!!!
#............................................

#... get ohlc
#____________

#...function to get historical data
def get_hist(contract, duration):
    '''Gets 1-day bars of contracts for the duration specified
    Args:
        (contract) as obj
        (duration) as int
    Returns: dataframe of symbol, date, ohlc, avg and volume 
    '''
    
    # Prepare the duration
    strduration = str(duration) + ' D'
    
    # Extract the history
    hist = ib.reqHistoricalData(contract=contract, endDateTime='', 
                                    durationStr=strduration, barSizeSetting='1 day',  
                                                whatToShow='Trades', useRTH=True)
    
    df = util.df(hist)
    df.insert(0, column='symbol', value=contract.symbol)
    
    return df

# get historical data in blocks of 50
blks = 50
hists = [get_hist(c, 365) for i in range(0, len(contracts), blks) for c in contracts[i: i+blks]]

df_ohlc = pd.concat(hists).reset_index(drop=True)

# volatility and standard deviation
#__________________________________

df_vol = pd.DataFrame(df_ohlc.set_index(['symbol', 'date'])['close'].sort_index(ascending=[True, False]).pct_change().expanding(1).std(ddof=0)*sqrt(tradingdays))
df_vol.columns = ['volatility']

df_stdev = pd.DataFrame(df_ohlc.set_index(['symbol', 'date'])['close'].sort_index(ascending=[True, False]).expanding(1).std(ddof=0))
df_stdev.columns = ['stdev']

df_vstd = df_vol.join(df_stdev)

def get_vol(symbol, dte):
    '''Gets volatility
    Args:
       (symbol) symbol as string
       (dte) dte as int'''
    
    # prevent selection of first two rows
    if dte < 2:
        dte = 2
    
    return df_vstd.loc[symbol].iloc[dte].volatility

def get_std(symbol, dte):
    '''Gets standard deviation
    Args:
       (symbol) symbol as string
       (dte) dte as int'''

    # prevent selection of first two rows
    if dte < 2:
        dte = 2

    return df_vstd.loc[symbol].iloc[dte].stdev

#... get the underlyings
#___________________________

#...function to get price and dividend ticker
def get_dividend_ticker(contract):
    '''Gets ticker of the contract
    Arg: (contract) as a qualified contract object with conId
    Returns: ticker'''
    
    ib.reqMktData(contract, '456', snapshot=False, regulatorySnapshot=False) # request ticker stream

    ticker = ib.ticker(contract)
    
    # Ensure the ticker is filled
    while ticker.dividends is None:
        while np.isnan(ticker.close):
            ib.sleep(0.1)

    ib.cancelMktData(contract)
       
    return ticker

tickers = [get_dividend_ticker(c) for i in range(0, len(contracts), blks) for c in contracts[i: i+blks]]

# make dataframe of tickers
df_und = util.df(tickers)

# get the undPrice - preferably from last price
df_und['undPrice'] = np.where(np.isnan(df_und['last']), df_und.close, df_und['last'])

# compute dividend rate, from max of 'last' and 'close' to avoid nan
df_und['divrate'] = [catch(lambda: d[0][0]/d[1]) for d in zip(df_und.dividends, df_und.undPrice)]

# add symbol to underlying dataframe
df_und.insert(0, 'symbol', [c.symbol for c in df_und.contract]) # insert the symbol

# get lot and margin
df_paisa1 = df_paisa[['ibSymbol', 'lot', 'marginpct']].rename({'ibSymbol': 'symbol'}, axis=1)
df_und = df_und.merge(df_paisa1)

df_und = df_und.set_index('symbol')

#... get the options
#___________________

right = ['P', 'C'] # rights

# get the chains
chains = [ib.reqSecDefOptParams(underlyingSymbol=c.symbol, futFopExchange='', 
                      underlyingConId=c.conId, underlyingSecType=c.secType) for c in contracts]

sekr = [list(itertools.product([c.tradingClass], c.expirations, c.strikes, right)) for cs in chains for c in cs]

df_opt = pd.DataFrame([x for s in sekr for x in s], columns=['symbol', 'expiry', 'strike', 'right'])

df_opt.symbol = df_opt.symbol.map(ntoi).fillna(df_opt.symbol)

df_opt['undPrice'] = df_opt.symbol.map(df_und.undPrice)

df_opt['dte'] = (df_opt.expiry.apply(util.parseIBDatetime) - datetime.datetime.now().date()).dt.days

# remove options that are more than maxdte
df_opt = df_opt[df_opt.dte <= maxdte].reset_index(drop=True)

# get the standard deviations
vf = np.vectorize(get_std)
df_opt['stdev'] = vf(df_opt.symbol, df_opt.dte)

# weed out options within threshold of strike and dte
mask = (((df_opt.right == 'P') & (df_opt.strike < (df_opt.undPrice - df_opt.stdev * putsigma))) | \
       ((df_opt.right == 'C') & (df_opt.strike > (df_opt.undPrice + df_opt.stdev * callsigma))))

df_opt = df_opt.loc[mask, :].reset_index(drop=True)

df_opt['lot'] = df_opt.symbol.map(df_und.lot)
df_opt['exchange'] = exchange

# get the options and qualify them
opts = [Option(s, e, k, r, x) for s, e, k, r, x in zip(df_opt.symbol, df_opt.expiry, df_opt.strike, df_opt.right, df_opt.exchange)]
qual_opts = [catch(lambda: ib.qualifyContracts(cs)) for i in range(0, len(opts), blks) for cs in opts[i: i+blks]]

df_opt['option'] = [x[0] if x != [] else np.nan for x in qual_opts] # make empty lists [] as nan

df_opt1 = df_opt.dropna().reset_index(drop=True) # drop empty lists

df_opt1 = df_opt1.assign(order=[Order(action='SELL', 
                                      totalQuantity=lot, orderType='MKT') 
                                for lot in df_opt1.lot]) # orders

df_opt1['divrate'] = df_opt1.symbol.map(df_und.divrate).fillna(0)

# get the standard deviations
vfv = np.vectorize(get_vol)
df_opt1['volatility'] = vfv(df_opt1.symbol, df_opt1.dte)

#...get probability of profit (pop)
#__________________________________

#... Get risk-free rate from 91 day T-bills
rate_url = 'https://rbi.org.in/home.aspx'

li = pd.read_html(rate_url)
li_df = li[4].rename(columns = {0: 'Cat', 1: 'Values'})
li_val = li_df.loc[li_df.Cat == '91 day T-bills', 'Values']
rate = float((str(li_val).split('\n')[0].split('%')[0].split(' ')[-1:])[0])/100

df_opt1['rate'] = rate

#... Black-Scholes
# Ref: - https://ideone.com/fork/XnikMm - Brian Hyde

def get_bsm(undPrice, strike, dte, rate, volatility, divrate):
    ''' Gets Black Scholes output
    Args:
        (undPrice) : Current Stock Price in float
        (strike)   : Strike Price in float
        (dte)      : Days to expiration in float
        (rate)     : dte until expiry in days
        (volatility)    : Standard Deviation of stock's return in float
        (divrate)  : Dividend Rate in float
    Returns:
        (delta, call_price, put_price) as a tuple
    '''
    #statistics
    sigTsquared = sqrt(dte/365)*volatility
    edivT = exp((-divrate*dte)/365)
    ert = exp((-rate*dte)/365)
    d1 = (log(undPrice*edivT/strike)+(rate+.5*(volatility**2))*dte/365)/sigTsquared
    d2 = d1-sigTsquared
    Nd1 = (1+erf(d1/sqrt(2)))/2
    Nd2 = (1+erf(d2/sqrt(2)))/2
    iNd1 = (1+erf(-d1/sqrt(2)))/2
    iNd2 = (1+erf(-d2/sqrt(2)))/2

    #Outputs
    callPrice = round(undPrice*edivT*Nd1-strike*ert*Nd2, 2)
    putPrice = round(strike*ert*iNd2-undPrice*edivT*iNd1, 2)
    delta = Nd1

    return {'bsmCall': callPrice, 'bsmPut': putPrice, 'bsmDelta': delta}

# get bsm
df_bsm = pd.DataFrame([catch(lambda: get_bsm(u, k, e, r, v, d)) 
                       for u, k, e, r, v, d in zip(df_opt1.undPrice, df_opt1.strike, df_opt1.dte, 
                                                   df_opt1.rate, df_opt1.volatility, df_opt1.divrate)])

df_opt2 = pd.concat([df_opt1, df_bsm], axis=1)

df_opt2['bsmPrice'] = np.where(df_opt2.right == 'P', df_opt2.bsmPut, df_opt2.bsmCall)
df_opt2['pop'] = np.where(df_opt2.right == 'C', 1-df_opt2.bsmDelta, df_opt2.bsmDelta)

# get otion price
#___________________________

# get live option tickers
def get_price(contract):
    '''Gets ticker of the contract
    Arg: (contract) as a qualified contract object with conId
    Returns: ticker'''
    
    ib.reqMktData(contract, '', snapshot=False, regulatorySnapshot=False) # request ticker stream

    ticker = ib.ticker(contract)
    
    # Ensure the ticker is filled
    while np.isnan(ticker.close):
        ib.sleep(0.1)

    ib.cancelMktData(contract)
    
    # Returns last price if available
    if np.isnan(ticker.last):
        return ticker.close
    else:
        return ticker.last

df_opt3 = df_opt2.assign(price=[get_price(c) for i in range(0, len(df_opt2), blks) for c in df_opt2.option[i: i+blks]])

#...get return-on-margins
#_________________________

# margin from whatiforders (very slowwwww)
# whatifs = [z for z in zip(df_opt3.option, df_opt3.order)]
# df_opt3 = df_opt3.insert(8, 'margin', [catch(lambda: float(ib.whatIfOrder(*w).initMarginChange)) for w in whatifs])

# margin from 5paisa
df_opt3['margin'] = df_opt3.symbol.map(df_und.marginpct)*df_opt3.lot*df_opt3.undPrice

df_opt3['rom'] = (df_opt3.price*df_opt3.lot)/df_opt3.margin*tradingdays/df_opt3.dte

#...limit band
#_____________

hilo = {s.symbol: (df_ohlc[df_ohlc.symbol == s.symbol][-int(maxdte*1.5):].low.min(), 
            df_ohlc[df_ohlc.symbol == s.symbol][-maxdte*2:].high.max()) 
 for s in contracts}

df_hilo = pd.DataFrame(hilo).T

df_hilo.columns=['lo', 'hi']

df_opt3['limit'] = np.where(df_opt3.right == 'P', df_opt3.symbol.map(df_hilo.lo), df_opt3.symbol.map(df_hilo.hi))

# select those options which fall outside the limits
df_opt4 = df_opt3[((df_opt3.right == 'P') & (df_opt3.strike < df_opt3.limit)) |
        ((df_opt3.right == 'C') & (df_opt3.strike > df_opt3.limit))].sort_values(by='rom', ascending=False)

Started to throttle requests
Stopped to throttle requests


In [None]:
# make the pickles & csv
df_ohlc.to_pickle(fspath+'ohlc.pkl')
df_und.to_pickle(fspath+'underlying.pkl')
df_opt4.to_pickle(fspath+'selected_options.pkl')
df_opt4.to_csv(fspath+'selected_options.csv', index=None)