# Refactoring exec_pool as a core engine
We are refactoring execution of a contract pool by building a new `async exec_pool` function.

`async exec_pool`:
1. processes sets of contracts to run specific algos 
2. with controlled concurrency 
3. with an option to produce df outputs
   - which provides the capability to checkpoint to a pickle file...
   - ... thereby `re-start` from near a point of failure


In [2]:
MARKET = 'NSE'

In [3]:
import sys
import os
import pathlib
import pandas as pd
import yaml
import asyncio

from ib_insync import IB, util, Option, MarketOrder, Contract
from typing import Callable, Coroutine, Union

In [4]:
# ** LOCAL IMPORTS
from engine import ohlc, chain, qualify, und, Vars

In [5]:
# ** JUPYTER SPECIFIC
# .... Will be ignored in IDE / command-lines
import IPython as ipy
if ipy.get_ipython().__class__.__name__ == 'ZMQInteractiveShell':
    import nest_asyncio
    nest_asyncio.apply()
    util.startLoop()
    pd.options.display.max_columns = None

In [6]:
# ** SET PATHS
cwd = pathlib.Path.cwd() # working directory from where python was initiated

# ...set up data and log path for local (learn)
THIS_FOLDER = '' # ! DUMMY setup for Jupyter. In .py file it is ``os.path.dirname(os.path.abspath(__file__))``
LOGPATH = pathlib.Path.cwd().joinpath(THIS_FOLDER, "data", "log")
DATAPATH = pathlib.Path.cwd().joinpath(THIS_FOLDER, "data", MARKET.lower())

# ...get capability to import programs from `asyncib` folder
IBPATH = cwd.parent.parent.joinpath('asyncib') # where ib programs are stored
if str(IBPATH) not in sys.path:  # Convert it to string!
    sys.path.append(str(IBPATH))
    
IBDATAPATH = IBPATH.joinpath('data', MARKET.lower())

In [7]:
# ** SET VARIABLES & LOGS
ibp = Vars(MARKET.upper())

HOST, PORT, CID = ibp.HOST, ibp.PORT, ibp.CID

LOGFILE = LOGPATH.joinpath(MARKET.lower() + "_base.log")
util.logToFile(path=LOGFILE, level=30)
with open(LOGFILE, "w"):
    pass

# The ``async exec_pool`` algo
### with concurrency control and post-processing checkpoints

In [22]:
def remains(cts):
    """Generates tuples for tracking remaining contracts"""
    if isinstance(cts, Contract): # Single contract
        remaining = cts, None # Convert to tuple with None
        
    elif isinstance(cts, pd.Series): # Contracts given as a series
        if len(cts) == 1: # Single contract given as a series
            remaining = list(cts)[0], None # Convert to a tuple with None
        else:
            remaining = ((c, None) for c in cts)
            
    elif isinstance(cts, list): # List of Contracts or (c, o) tuples
        if len(cts) == 1: # Single contract or (c, o)
            if isinstance(cts[0], tuple): # (c, o) tuple
                remaining = tuple(cts[0])
            else: # Single contract
                remaining = cts[0].iloc[0], None
        else: # Multiple contracts or (c, o)
            if isinstance(cts[0], tuple): # (c, o) tuples
                remaining = tuple((c, o) for c, o in cts)
            else: # Multiple contracts
                remaining = tuple((c, None) for c in cts)
    else:
        remaining = None
        
    return remaining

async def executeAsync(ib: IB(),
                       algo: Callable[..., Coroutine],  # coro name
                       cts: Union[Contract, pd.Series, list, tuple], # list of contracts
                       post_process: Callable[[set, pathlib.Path, str], pd.DataFrame]=None, # If checkpoint is needed
                       FSPATH: pathlib.Path=None, # Necessary for post_process
                       CONCURRENT: int=40, # adjust to prevent overflows
                       TIMEOUT: None=None, # if None, no progress messages shown
                       OP_FILENAME: str='', # output file name
                       **kwargs, # keyword inputs for algo
                       ):
    
    tasks = set()
    results = set()
    remaining = remains(cts)
    
    # Determine unique names for tasks
    ct_name="c[0].symbol+c[0].lastTradeDateOrContractMonth[-4:]+c[0].right+str(c[0].strike)+'..'"
    
    """try:
        remaining[0].symbol
        
    except AttributeError: # It is a (contract, order) tuple for margin algo!
        ct_name="c[0].symbol+c[0].lastTradeDateOrContractMonth[-4:]+c[0].right+str(c[0].strike)+'..'"
        
    else: # for all algos, except margin algo
        ct_name="c.symbol+c.lastTradeDateOrContractMonth[-4:]+c.right+str(c.strike)+'..'"""

    # Get the results
    while len(remaining):
    
        # Tasks limited by concurrency
        if len(remaining) <= CONCURRENT:
            print([c[0].symbol+c[0].lastTradeDateOrContractMonth[-4:]+c[0].right+str(c[0].strike)+'..' for c in remaining]) # !!!TEMPORARY
            tasks.update(asyncio.create_task(algo(ib, c, **kwargs), name=eval(ct_name)) for c in remaining)
        else:
            tasks.update(asyncio.create_task(algo(ib, c, **kwargs), name=eval(ct_name)) for c in remaining[:CONCURRENT])
            
        print(f"\nTasks: {tasks}\n") # !!! TEMPORARY

        # Execute tasks
        while len(tasks):

            done, tasks = await asyncio.wait(tasks,
                                            timeout=TIMEOUT,
                                            return_when=asyncio.ALL_COMPLETED)

            # Remove dones from remaining
            done_names = [d.get_name() for d in done]
            remaining = [c for c in remaining if eval(ct_name) not in done_names]
            
            # Update results and checkpoint
            results.update(done)
            
            # Checkpoint the results
            if post_process:
                output = post_process(results, FSPATH, OP_FILENAME)
            else:
                output = results
            
            if TIMEOUT:
                print(f'\nCompleted {done_names[:2]} {len(results)} out of {len(cts)} .. remaining {[eval(ct_name) for c in remaining][:2]}')
    
    return output


def save_df(results: set, FSPATH: pathlib.Path, file_name: str='') -> pd.DataFrame():

    """if results:
        df = pd.concat([r.result() for r in results if r], ignore_index=True)
        if file_name:
            df.to_pickle(FSPATH.joinpath(file_name))
    else:
        df = pd.DataFrame([]) # results are not yet ready!
    return df"""
    
    print(results) #!!! TEMPORARY

## Testing `async exec_pool` algo

In [23]:
# Get symlots
df_symlots = pd.read_pickle(DATAPATH.joinpath('df_symlots.pkl'))

und_cts = df_symlots.contract.unique()

In [24]:
# Uncomment for !!! DATA LIMITING underlying contracts
und_cts = list(und_cts[:4])
und_cts

[Contract(secType='IND', conId=56994300, symbol='BANKNIFTY', exchange='NSE', currency='INR', localSymbol='BANKNIFTY'),
 Contract(secType='IND', conId=51497778, symbol='NIFTY50', exchange='NSE', currency='INR', localSymbol='NIFTY50'),
 Contract(secType='STK', conId=64769335, symbol='BHARTIART', exchange='NSE', primaryExchange='NSE', currency='INR', localSymbol='BHARTIARTL', tradingClass='BHARTIART'),
 Contract(secType='STK', conId=56986798, symbol='ADANIENT', exchange='NSE', primaryExchange='NSE', currency='INR', localSymbol='ADANIENT', tradingClass='ADANIENT')]

#### Get the OHLCs

In [27]:
%%time
with IB().connect(HOST, PORT, CID) as ib:
    
    # Get the underlyings
    df_ohlcs = ib.run(executeAsync(ib=ib, algo=ohlc, cts=und_cts, 
                                  CONCURRENT=20, TIMEOUT=10.0, 
                                  post_process=save_df, FSPATH=DATAPATH, OP_FILENAME='',
                                  **{'DURATION': 365, 'OHLC_DELAY': 20},
                                  ))

['BANKNIFTY0.0..', 'NIFTY500.0..', 'BHARTIART0.0..', 'ADANIENT0.0..']

Tasks: {<Task pending name='BANKNIFTY0.0..' coro=<ohlc() running at C:\Users\User\Documents\Business\Projects\learn\ib\engine.py:197>>, <Task pending name='BHARTIART0.0..' coro=<ohlc() running at C:\Users\User\Documents\Business\Projects\learn\ib\engine.py:197>>, <Task pending name='NIFTY500.0..' coro=<ohlc() running at C:\Users\User\Documents\Business\Projects\learn\ib\engine.py:197>>, <Task pending name='ADANIENT0.0..' coro=<ohlc() running at C:\Users\User\Documents\Business\Projects\learn\ib\engine.py:197>>}

{<Task finished name='BANKNIFTY0.0..' coro=<ohlc() done, defined at C:\Users\User\Documents\Business\Projects\learn\ib\engine.py:197> exception=AttributeError("'tuple' object has no attribute 'includeExpired'")>, <Task finished name='BHARTIART0.0..' coro=<ohlc() done, defined at C:\Users\User\Documents\Business\Projects\learn\ib\engine.py:197> exception=AttributeError("'tuple' object has no attribute 'includ

In [29]:
[MarketOrder('SELL', 100)] * 15

[MarketOrder(action='SELL', totalQuantity=100),
 MarketOrder(action='SELL', totalQuantity=100),
 MarketOrder(action='SELL', totalQuantity=100),
 MarketOrder(action='SELL', totalQuantity=100),
 MarketOrder(action='SELL', totalQuantity=100),
 MarketOrder(action='SELL', totalQuantity=100),
 MarketOrder(action='SELL', totalQuantity=100),
 MarketOrder(action='SELL', totalQuantity=100),
 MarketOrder(action='SELL', totalQuantity=100),
 MarketOrder(action='SELL', totalQuantity=100),
 MarketOrder(action='SELL', totalQuantity=100),
 MarketOrder(action='SELL', totalQuantity=100),
 MarketOrder(action='SELL', totalQuantity=100),
 MarketOrder(action='SELL', totalQuantity=100),
 MarketOrder(action='SELL', totalQuantity=100)]

### Get underlyings

In [None]:
%%time
with IB().connect(HOST, PORT, CID) as ib:
    
    # Get the underlyings
    df_unds = ib.run(executeAsync(ib=ib, algo=unds, cts=und_cts, 
                                  CONCURRENT=40, TIMEOUT=2.0, 
                                  post_process=save_df, FSPATH=DATAPATH, OP_FILENAME='df_unds.pkl',
                                  **{'FILL_DELAY': 8},
                                  ))

### Make the chains

In [None]:
%%time
# Make the chains
with IB().connect(HOST, PORT, CID) as ib:
    df_chains = ib.run(executeAsync(ib=ib, algo=chains, cts=und_cts,
                                  CONCURRENT=44, TIMEOUT=5,
                                  post_process=save_df, FSPATH=FSPATH, OP_FILENAME='df_chains.pkl',
                                  ))

### Qualify ready-made options

In [None]:
opts = pd.read_pickle(IBDATAPATH.joinpath('df_qopts.pkl'))
opts_cts = [Option(symbol=s, lastTradeDateOrContractMonth=e, 
                   strike=k, right=r, exchange='SMART', conId='') 
            for s, e, k, r, cid in zip(opts.symbol, opts.expiry, opts.strike, opts.right, range(1, len(opts)))]
len(opts_cts)

In [None]:
%%time
with IB().connect(HOST, PORT, CID) as ib:
    df_qopts = ib.run(executeAsync(ib=ib, algo=qualify, cts=opts_cts, 
                                  CONCURRENT=200, TIMEOUT=2.0,
                                  post_process=save_df, FSPATH=FSPATH, OP_FILENAME='',))

### Prepare and qualify fresh set of options (MEGA)
* Run this code if the ENTIRE set of options available in the market for ALL options

In [None]:
## Let us pick assemble the contracts to get our qualifications

# Build the option contracts

puts = [Option(s, e, k, 'P', 'SMART')
        for s, e, k
        in zip(df_chains.symbol, df_chains.expiry, df_chains.strike)]

calls = [Option(s, e, k, 'C', 'SMART')
        for s, e, k
        in zip(df_chains.symbol, df_chains.expiry, df_chains.strike)]

cts = puts + calls

df_cts = util.df(cts).iloc[:, :6].\
            rename(columns={'lastTradeDateOrContractMonth': 'expiry'}).\
                assign(contract=cts)
                
df_cts.conId = None # Replace `conId` as None to track completeness

df_cts = df_cts.sample(len(df_cts)) # !!! TO BE DELETED in live run
df_cts = df_cts.reset_index(drop=True) # Index used to track progress

# *** Done once only in the TEST ***!!!
df_cts.to_pickle(FSPATH.joinpath('all_raw_opts.pkl'))

df_cts = pd.read_pickle(FSPATH.joinpath('all_raw_opts.pkl'))

s = sorted(df_cts.symbol.unique())
print(f"# of symbols: {len(s)}, # of option contracts: {len(df_cts)}, # no of expiries: {len(df_cts.expiry.unique())}")

In [None]:
%%time

fresh_opts = df_cts.contract.sample(500).to_list() # !!! DATA LIMITER - 500 contracts
with IB().connect(HOST, PORT, CID) as ib:
    df_qopts = ib.run(executeAsync(ib=ib, algo=qualify, cts=fresh_opts, 
                                  CONCURRENT=200, TIMEOUT=5.0,
                                  post_process=save_df, FSPATH=FSPATH, OP_FILENAME='df_qualopts1.pkl',))

### Get the price of qualified options

In [None]:
%%time
df_qopts = pd.read_pickle(FSPATH.joinpath('df_qualopts1.pkl'))

with IB().connect(HOST, PORT, CID) as ib:
    df_price = ib.run(executeAsync(ib=ib, algo=prices, cts=df_qopts.contract.to_list(), 
                                  CONCURRENT=200, TIMEOUT=10.0,
                                  post_process=save_df, FSPATH=FSPATH, OP_FILENAME='',))

### Prepare cos for margins from qualified options

In [None]:
df_symlots = pd.read_pickle(IBDATAPATH.joinpath('df_symlots.pkl'))
df_raw_opts = pd.read_pickle(IBDATAPATH.joinpath('df_qopts.pkl'))

if MARKET == 'NSE':
    df_raw_opts['expiryM'] = df_raw_opts.expiry.apply(
        lambda d: d[:4] + '-' + d[4:6])
    cols1 = ['symbol', 'expiryM']
    df_raw_opts = df_raw_opts.set_index(cols1).join(
        df_symlots[cols1 + ['lot']].set_index(cols1)).reset_index()
    df_raw_opts = df_raw_opts.drop('expiryM', 1)
else:
    df_raw_opts['lot'] = 100

# ... build cos (contract, orders)
opts = df_raw_opts.contract.to_list()
orders = [MarketOrder('SELL', lot / lot) if MARKET.upper() ==
          'SNP' else MarketOrder('SELL', lot) for lot in df_raw_opts.lot]
cos = [(c, o) for c, o in zip(opts, orders)]

In [None]:
cos = cos[:500] # !!! DATA LIMITER

### Get option margins

In [None]:
%%time
with IB().connect(HOST, PORT, CID) as ib:
    df_margins = ib.run(executeAsync(ib=ib, algo=margins, cts=cos, 
                                  CONCURRENT=200, TIMEOUT=5.0,
                                  post_process=save_df, FSPATH=FSPATH, OP_FILENAME='df_margins.pkl',))

In [None]:
df_margins

### Get option prices

In [None]:
%%time
with IB().connect(HOST, PORT, CID) as ib:
    df_price = ib.run(executeAsync(ib=ib, algo=prices, cts=opts, 
                                  CONCURRENT=200, TIMEOUT=10.0,
                                  post_process=save_df, FSPATH=FSPATH, OP_FILENAME='df_optprices.pkl',))

#### Verifying data integrity

In [None]:
df = df_price[~df_price.price.isnull()]

In [None]:
df1 = df.set_index('conId').join(df_margins[['conId', 'margin', 'lot', 'comm']].set_index('conId')).reset_index()