# Deeper look to fix `nakeds.py`
* Why is it not estimating sdMult properly?
* Why is it not choosing the `edge` trades?

In [1]:
MARKET = 'NSE'

In [2]:
import sys
import pathlib
import numpy as np
import pandas as pd
import yaml
import asyncio

from ib_insync import IB, util, Option, MarketOrder, Contract
from typing import Callable, Coroutine, Union

In [3]:
# Specific to Jupyter. Will be ignored in IDE / command-lines
import IPython as ipy
if ipy.get_ipython().__class__.__name__ == 'ZMQInteractiveShell':
    import nest_asyncio
    nest_asyncio.apply()
    util.startLoop()
    pd.options.display.max_columns = None
    
    THIS_FOLDER = '' # Dummy for jupyter notebook's current folder
    BAR_FORMAT = "{l_bar}{bar:-20}{r_bar}"

In [4]:
# Get capability to import programs from `asyncib` folder
cwd = pathlib.Path.cwd() # working directory from where python was initiated
DATAPATH = cwd.joinpath('data', MARKET.lower()) # path to store data files
LOGFILE = cwd.joinpath(THIS_FOLDER, 'data', 'log', 'temp.log') # path to store log files

IBPATH = cwd.parent.parent.joinpath('asyncib') # where ib programs are stored

# append IBPATH to import programs.
if str(IBPATH) not in sys.path:  # Convert it to string!
    sys.path.append(str(IBPATH))
    
IBDATAPATH = IBPATH.joinpath('data', MARKET.lower())

In [5]:
# Get the host, port, cid
from engine import Vars

ibp = Vars(MARKET.upper())  # IB Parameters from var.yml
HOST, PORT, CID = ibp.HOST, ibp.PORT, ibp.CID

In [6]:
# Get the pickle files
from os import listdir
fs = listdir(DATAPATH)

files = [f for f in fs if f[-4:] == '.pkl']
for f in files:
    exec(f"{f.split('.')[0]} = pd.read_pickle(DATAPATH.joinpath(f))")
np.sort(np.array(files))

array(['df_chains.pkl', 'df_nakeds.pkl', 'df_ohlcs.pkl',
       'df_opt_margins.pkl', 'df_opt_prices.pkl', 'df_opts.pkl',
       'df_symlots.pkl', 'df_unds.pkl', 'dfrq.pkl', 'qopt_rejects.pkl',
       'qopts.pkl'], dtype='<U18')

## Let's write some code

### Which symbols are the most volatile?

In [None]:
cols1 = ['symbol', 'undPrice', 'margin', 'high52week', 'low52week', 'histVolatility', 'iv']
df_unds.sort_values('iv', ascending=False)[cols1]

### Pick up a symbol

In [None]:
symbol = 'NIFTY50'

In [None]:
cols2 = ['symbol', 'strike', 'undPrice', 'dte', 'right', 'und_iv', 'bid', 'ask', 'close', 'last', 'margin', 'iv', 'intrinsic', 'timevalue', 'prob', 'sdMult', 'price', 'rom']
df = df_opts[df_opts.symbol == symbol][cols2]

In [None]:
ibp.CALLSTDMULT

In [None]:
# removes options without time value
m1 = ((df.right == 'C') & (df.strike > df.undPrice)) | \
    ((df.right == 'P') & (df.strike < df.undPrice))

# removes out-of-fence (sdMult) options
m2 = ((df.right == 'C') & (df.sdMult > ibp.CALLSTDMULT)) | \
    ((df.right == 'P') & (df.sdMult > ibp.PUTSTDMULT))

df = df[m1&m2].reset_index(drop=True)

### `rom` for the best `sdMult`

In [None]:
# Sort down sdMult based on dte and right, groupby dte, right
s = df.sort_values(['dte', 'right', 'sdMult'], ascending=[True, False, True]).groupby(['symbol', 'dte', 'right']).cumcount()

# Filtered and Sorted df
df = df.loc[s.index]

# Option with the largest rom for the dte and right
df_max_rom = df[df.rom == df.groupby(['symbol', 'dte', 'right']).rom.transform(max)]

In [None]:
df_max_rom.sort_values('rom', ascending=False)

In [None]:
dfrq.sort_values('symbol').set_index('symbol').remq.to_dict()

## For `all` symbols
### Let us try this using sort, filter and groupby sequence

In [None]:
# Read all opts (to reset from single opt above)
df = df_opts
cols2 = ['symbol', 'strike', 'undPrice', 'dte', 'right', 'und_iv', 'bid', 'ask', 'close', 'last', 'margin', 'iv', 'intrinsic', 'timevalue', 'prob', 'sdMult', 'price', 'rom']


# ...remove options without time value
m1 = ((df.right == 'C') & (df.strike > df.undPrice)) | \
    ((df.right == 'P') & (df.strike < df.undPrice))

# ...remove out-of-fence (sdMult) options
m2 = ((df.right == 'C') & (df.sdMult > ibp.CALLSTDMULT)) | \
    ((df.right == 'P') & (df.sdMult > ibp.PUTSTDMULT))

df = df[m1&m2].reset_index(drop=True)

# Sort down sdMult based on dte and right, groupby dte, right
s = df.sort_values(['dte', 'right', 'sdMult'], ascending=[True, False, True]).groupby(['symbol', 'dte', 'right']).cumcount()

# Filtered and Sorted df
df = df.loc[s.index]

# Option with the largest rom for the dte and right
df_max_rom = df[df.rom == df.groupby(['symbol', 'dte', 'right']).rom.transform(max)].sort_values('rom', ascending=False)

In [None]:
len(df)

In [None]:
df_max_rom[cols2]

In [None]:
# * SORT
df1 = df_opts.sort_values(['symbol', 'dte', 'right', 'sdMult'], 
                          ascending=[True, True, False, True])

# * FILTER
# ... filter for options with timevalue
f1 = ((df1.right == 'C') & (df1.strike > df1.undPrice)) | \
    ((df1.right == 'P') & (df1.strike < df1.undPrice))

# ... filter for sdMult limits
f2 = ((df1.right == 'C') & (df1.sdMult > ibp.CALLSTDMULT)) | \
    ((df1.right == 'P') & (df1.sdMult > ibp.PUTSTDMULT))

df2 = df1[f1&f2].reset_index(drop=True)



In [None]:
# Filter for options with timevalue
m1 = ((df_opts.right == 'C') & (df_opts.strike > df_opts.undPrice)) | \
    ((df_opts.right == 'P') & (df_opts.strike < df_opts.undPrice))

# Filter for sdMult limits
m2 = ((df_opts.right == 'C') & (df_opts.sdMult > ibp.CALLSTDMULT)) | \
    ((df_opts.right == 'P') & (df_opts.sdMult > ibp.PUTSTDMULT))


# groupby symbol
df1 = df_opts[m1&m2].sort_values(['dte', 'right', 'sdMult'], ascending=[True, False, True]).groupby('symbol').head(1)

### experiments with sort and groupby

In [None]:
import random
import pandas as pd
random.seed(999)
sz = 50

qty = {'one': 1, 'two': 2, 'three': 3}

thing = (random.choice(['one', 'two', 'three']) for _ in range(sz))
order = (random.choice(['ascending', 'descending']) for _ in range(sz))
value = (random.randint(0, 100) for _ in range(sz))

df = pd.DataFrame({'thing': thing, 'order': order, 'value': value})

In [None]:
df.loc[df.order=='descending', 'value']*= -1

In [None]:
df.head()

In [None]:
df.sort_values('value').groupby(['thing', 'order']).cumcount()