# Refactoring exec_pool as a core engine
We are refactoring execution of a contract pool by building a new `async exec_pool` function.

`async exec_pool`:
1. processes sets of contracts to run specific algos 
2. with controlled concurrency 
3. with an option to produce df outputs
   - which provides the capability to checkpoint to a pickle file...
   - ... thereby `re-start` from near a point of failure


In [1]:
MARKET = 'SNP'

In [2]:
import sys
import pathlib
import pandas as pd
import yaml
import asyncio

from ib_insync import IB, util, Option, MarketOrder
from typing import Callable, Coroutine

In [3]:
# Specific to Jupyter. Will be ignored in IDE / command-lines
import IPython as ipy
if ipy.get_ipython().__class__.__name__ == 'ZMQInteractiveShell':
    import nest_asyncio
    nest_asyncio.apply()
    util.startLoop()
    pd.options.display.max_columns = None

In [8]:
# Get capability to import programs from `asyncib` folder
cwd = pathlib.Path.cwd() # working directory from where python was initiated
FSPATH = cwd.joinpath('data') # path to store data files
LOGPATH = FSPATH # path to store log files

IBPATH = cwd.parent.parent.joinpath('asyncib') # where ib programs are stored

# append IBPATH to import programs.
if str(IBPATH) not in sys.path:  # Convert it to string!
    sys.path.append(str(IBPATH))
    
IBDATAPATH = IBPATH.joinpath('data', MARKET.lower())

In [5]:
# local imports
from base import ohlcs, chains, unds, qualify, prices, margins

In [6]:
# Get the yaml config for HOST, PORT, CID
with open(IBPATH.joinpath('var.yml')) as fi:
    data = yaml.safe_load(fi)
    
HOST = data["COMMON"]["HOST"]
PORT = data[MARKET.upper()]["PORT"]
CID = data["COMMON"]["CID"]

# Set log file
util.logToFile(FSPATH.joinpath('./engine.log'), level=30)

# The ``async exec_pool`` algo
### with concurrency control and post-processing checkpoints

In [12]:
async def executeAsync(ib: IB(),
                       algo: Callable[..., Coroutine],  # coro name
                       cts: list, # list of contracts
                       post_process: Callable[[set, pathlib.Path, str], pd.DataFrame]=None, # If checkpoint is needed
                       FSPATH: pathlib.Path=None, # Necessary for post_process
                       CONCURRENT: int=40, # adjust to prevent overflows
                       TIMEOUT: None=None, # if None, no progress messages shown
                       OP_FILENAME: str='', # output file name
                       **kwargs, # keyword inputs for algo
                       ):
    
    tasks = set()
    results = set()
    remaining = tuple(cts,)
    
    # Determine unique names for tasks
    try:
        remaining[0].symbol
        
    except AttributeError: # It is a (contract, order) tuple for margin algo!
        ct_name="c[0].symbol+c[0].lastTradeDateOrContractMonth[-4:]+c[0].right+str(c[0].strike)+'..'"
        
    else: # for all algos, except margin algo
        ct_name="c.symbol+c.lastTradeDateOrContractMonth[-4:]+c.right+str(c.strike)+'..'"

    # Get the results
    while len(remaining):
    
        # Tasks limited by concurrency
        if len(remaining) <= CONCURRENT:
            tasks.update(asyncio.create_task(algo(ib, c, **kwargs), name=eval(ct_name)) for c in remaining)
        else:
            tasks.update(asyncio.create_task(algo(ib, c, **kwargs), name=eval(ct_name)) for c in list(remaining)[:CONCURRENT])

        # Execute tasks
        while len(tasks):

            done, tasks = await asyncio.wait(tasks,
                                            timeout=TIMEOUT,
                                            return_when=asyncio.ALL_COMPLETED)

            # Remove dones from remaining
            done_names = [d.get_name() for d in done]
            remaining = [c for c in remaining if eval(ct_name) not in done_names]
            
            # Update results and checkpoint
            results.update(done)
            
            # Checkpoint the results
            if post_process:
                output = post_process(results, FSPATH, OP_FILENAME)
            else:
                output = results
            
            if TIMEOUT:
                print(f'\nCompleted {done_names[:2]} {len(results)} out of {len(cts)} .. remaining {[eval(ct_name) for c in remaining][:2]}')
    
    return output


def save_df(results: set, FSPATH: pathlib.Path, file_name: str='') -> pd.DataFrame():

    if results:
        df = pd.concat([r.result() for r in results if r], ignore_index=True)
        if file_name:
            df.to_pickle(FSPATH.joinpath(file_name))
    else:
        df = pd.DataFrame([]) # results are not yet ready!
    return df

## Testing `async exec_pool` algo

In [None]:
# Get symlots
df_symlots = pd.read_pickle(IBDATAPATH.joinpath('df_symlots.pkl'))

und_cts = df_symlots.contract.unique()

In [None]:
## Uncomment for !!! DATA LIMITING underlying contracts
# und_cts = und_cts[:10]

#### Get the OHLCs

In [None]:
%%time
with IB().connect(HOST, PORT, CID) as ib:
    
    # Get the underlyings
    df_ohlcs = ib.run(executeAsync(ib=ib, algo=ohlcs, cts=und_cts, 
                                  CONCURRENT=20, TIMEOUT=10.0, 
                                  post_process=save_df, FSPATH=FSPATH, OP_FILENAME='',
                                  **{'DURATION': 365, 'OHLC_DELAY': 20},
                                  ))

### Get underlyings

In [None]:
%%time
with IB().connect(HOST, PORT, CID) as ib:
    
    # Get the underlyings
    df_unds = ib.run(executeAsync(ib=ib, algo=unds, cts=und_cts, 
                                  CONCURRENT=40, TIMEOUT=2.0, 
                                  post_process=save_df, FSPATH=FSPATH, OP_FILENAME='df_unds.pkl',
                                  **{'FILL_DELAY': 8},
                                  ))

### Make the chains

In [None]:
%%time
# Make the chains
with IB().connect(HOST, PORT, CID) as ib:
    df_chains = ib.run(executeAsync(ib=ib, algo=chains, cts=und_cts,
                                  CONCURRENT=44, TIMEOUT=5,
                                  post_process=save_df, FSPATH=FSPATH, OP_FILENAME='df_chains.pkl',
                                  ))

### Qualify ready-made options

In [None]:
opts = pd.read_pickle(IBDATAPATH.joinpath('df_qopts.pkl'))
opts_cts = [Option(symbol=s, lastTradeDateOrContractMonth=e, 
                   strike=k, right=r, exchange='SMART', conId='') 
            for s, e, k, r, cid in zip(opts.symbol, opts.expiry, opts.strike, opts.right, range(1, len(opts)))]
len(opts_cts)

In [None]:
%%time
with IB().connect(HOST, PORT, CID) as ib:
    df_qopts = ib.run(executeAsync(ib=ib, algo=qualify, cts=opts_cts, 
                                  CONCURRENT=200, TIMEOUT=2.0,
                                  post_process=save_df, FSPATH=FSPATH, OP_FILENAME='',))

### Prepare and qualify fresh set of options (MEGA)
* Run this code if the ENTIRE set of options available in the market for ALL options

In [None]:
## Let us pick assemble the contracts to get our qualifications

# Build the option contracts

puts = [Option(s, e, k, 'P', 'SMART')
        for s, e, k
        in zip(df_chains.symbol, df_chains.expiry, df_chains.strike)]

calls = [Option(s, e, k, 'C', 'SMART')
        for s, e, k
        in zip(df_chains.symbol, df_chains.expiry, df_chains.strike)]

cts = puts + calls

df_cts = util.df(cts).iloc[:, :6].\
            rename(columns={'lastTradeDateOrContractMonth': 'expiry'}).\
                assign(contract=cts)
                
df_cts.conId = None # Replace `conId` as None to track completeness

df_cts = df_cts.sample(len(df_cts)) # !!! TO BE DELETED in live run
df_cts = df_cts.reset_index(drop=True) # Index used to track progress

# *** Done once only in the TEST ***!!!
df_cts.to_pickle(FSPATH.joinpath('all_raw_opts.pkl'))

df_cts = pd.read_pickle(FSPATH.joinpath('all_raw_opts.pkl'))

s = sorted(df_cts.symbol.unique())
print(f"# of symbols: {len(s)}, # of option contracts: {len(df_cts)}, # no of expiries: {len(df_cts.expiry.unique())}")

In [None]:
%%time

fresh_opts = df_cts.contract.sample(500).to_list() # !!! DATA LIMITER - 500 contracts
with IB().connect(HOST, PORT, CID) as ib:
    df_qopts = ib.run(executeAsync(ib=ib, algo=qualify, cts=fresh_opts, 
                                  CONCURRENT=200, TIMEOUT=5.0,
                                  post_process=save_df, FSPATH=FSPATH, OP_FILENAME='df_qualopts1.pkl',))

### Get the price of qualified options

In [None]:
%%time
df_qopts = pd.read_pickle(FSPATH.joinpath('df_qualopts1.pkl'))

with IB().connect(HOST, PORT, CID) as ib:
    df_price = ib.run(executeAsync(ib=ib, algo=prices, cts=df_qopts.contract.to_list(), 
                                  CONCURRENT=200, TIMEOUT=10.0,
                                  post_process=save_df, FSPATH=FSPATH, OP_FILENAME='',))

### Prepare cos for margins from qualified options

In [9]:
df_symlots = pd.read_pickle(IBDATAPATH.joinpath('df_symlots.pkl'))
df_raw_opts = pd.read_pickle(IBDATAPATH.joinpath('df_qopts.pkl'))

if MARKET == 'NSE':
    df_raw_opts['expiryM'] = df_raw_opts.expiry.apply(
        lambda d: d[:4] + '-' + d[4:6])
    cols1 = ['symbol', 'expiryM']
    df_raw_opts = df_raw_opts.set_index(cols1).join(
        df_symlots[cols1 + ['lot']].set_index(cols1)).reset_index()
    df_raw_opts = df_raw_opts.drop('expiryM', 1)
else:
    df_raw_opts['lot'] = 100

# ... build cos (contract, orders)
opts = df_raw_opts.contract.to_list()
orders = [MarketOrder('SELL', lot / lot) if MARKET.upper() ==
          'SNP' else MarketOrder('SELL', lot) for lot in df_raw_opts.lot]
cos = [(c, o) for c, o in zip(opts, orders)]

In [10]:
cos = cos[:500] # !!! DATA LIMITER

### Get option margins

In [13]:
%%time
with IB().connect(HOST, PORT, CID) as ib:
    df_margins = ib.run(executeAsync(ib=ib, algo=margins, cts=cos, 
                                  CONCURRENT=200, TIMEOUT=5.0,
                                  post_process=save_df, FSPATH=FSPATH, OP_FILENAME='df_margins.pkl',))


Completed [] 0 out of 500 .. remaining ['AIG1127P20.0..', 'XLF1030C30.5..']

Completed ['BLK1106P535.0..', 'AZO1030P990.0..'] 200 out of 500 .. remaining ['BLK1106P532.5..', 'AKAM1204C119.0..']

Completed ['SWK1120C202.5..', 'BLK1106P527.5..'] 400 out of 500 .. remaining ['STX1204C65.0..', 'FFIV1106P65.0..']

Completed ['KLAC1204P115.0..', 'NOC1113P240.0..'] 500 out of 500 .. remaining []
Wall time: 21.8 s


In [15]:
df_margins

Unnamed: 0,secType,conId,symbol,expiry,strike,right,margin,lot,comm
0,OPT,447050536,BLK,20201106,535.00,P,5947.38,100,2.19
1,OPT,444728587,AZO,20201030,990.00,P,9819.86,100,2.19
2,OPT,451151328,CAT,20201127,85.00,P,-15.67,100,1.59
3,OPT,449742764,BLK,20201106,790.00,C,909.33,100,2.19
4,OPT,444728207,AZO,20201030,1000.00,P,10577.91,100,2.19
...,...,...,...,...,...,...,...,...,...
495,OPT,451788650,STX,20201204,60.00,C,223.37,100,1.59
496,OPT,448435625,HD,20201113,220.00,P,1967.81,100,1.59
497,OPT,452871518,DPZ,20201127,235.00,P,1026.47,100,2.19
498,OPT,447394142,FFIV,20201106,105.00,P,874.34,100,2.19


### Get option prices

In [None]:
%%time
with IB().connect(HOST, PORT, CID) as ib:
    df_price = ib.run(executeAsync(ib=ib, algo=prices, cts=opts, 
                                  CONCURRENT=200, TIMEOUT=10.0,
                                  post_process=save_df, FSPATH=FSPATH, OP_FILENAME='df_optprices.pkl',))

#### Verifying data integrity

In [None]:
df = df_price[~df_price.price.isnull()]

In [None]:
df1 = df.set_index('conId').join(df_margins[['conId', 'margin', 'lot', 'comm']].set_index('conId')).reset_index()