# CloudEx Algo Main Script

#### Default Imports

In [1]:
# Import packages.
import datetime
import json
import os
import sys
import time

import pandas as pd
import numpy as np
import redis
from pandas.core.common import SettingWithCopyWarning

import warnings
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

# CloudEx imports.
import cloud_ex

# Import AlgorithmicTrader helper class.
from algorithmic_trader_shay import AlgorithmicTrader
from algorithmic_trader import summarize_historical_trades_df

# Start Redis and its Python API.
os.system("redis-server --daemonize yes")
time.sleep(1)

# Get CloudEX and VM-specific config. 
# NOTE: gateway_ip will be null when the exchange is not online 
def get_vm_config():
    with open("vm_config.json", "r") as read_file:
        config = json.load(read_file)
    return config

config = get_vm_config()


# utilities  

ORDER_FIELDS_LIST = [
    'Symbol', 'OrderID', 'CancelID', 'ClientID', 'OrderType', 'OrderAction',
    'SubmitTimestamp', 'GatewayTimestamp', 'EnqueueTimestamp',
    'DequeueTimestamp', 'OrderSerialNum', 'LimitPrice', 'ResultType','NumShares'
]

TRADE_FIELDS_LIST = [
    "Symbol", "BuyerSerialNum", "SellerSerialNum", "BuyerOrderID",
    "SellerOrderID", "BuyerClientID", "SellerClientID", "ExecPrice",
    "CashTraded", "SharesTraded", "CreationTimestamp", "ReleaseTimestamp",
    "TradeSerialNum"
]

'''
Takes in a cloud_ex.VectorOrder with serialized orders and returns a DataFrame
'''
def OrderDF(order_vec):
    if not len(order_vec):
        return pd.DataFrame(columns=ORDER_FIELDS_LIST)
    df = pd.DataFrame(order_vec).applymap(lambda x:x.SerializeOrder())[0].str.split('|', expand=True)
    df.columns = ORDER_FIELDS_LIST
    for label in ['SubmitTimestamp', 'GatewayTimestamp', 'EnqueueTimestamp',
                  'DequeueTimestamp', 'OrderSerialNum', 'LimitPrice','NumShares']:
        df.loc[:, label] = pd.to_numeric(df[label], errors='coerce')
    return df

'''
Takes in a cloud_ex.VectorOrder with serialized trades and returns a DataFrame
'''
def TradeDF(trade_vec):
    if not len(trade_vec):
        return pd.DataFrame(columns=TRADE_FIELDS_LIST)
    df = pd.DataFrame(trade_vec).applymap(lambda x:x.SerializeTrade())[0].str.split('|', expand=True)
    df.columns = TRADE_FIELDS_LIST
    for label in ["ExecPrice", "CashTraded", "SharesTraded",
                  "CreationTimestamp", "ReleaseTimestamp", "TradeSerialNum"]:
        df.loc[:, label] = pd.to_numeric(df[label], errors='coerce')
    return df

'''
Takes in a cloud_ex.MapStringOrder mapping Order ID strings to outstanding the coorresponding orders, 
and returns a DataFrame
'''
def OutstandingOrderDF(outstanding_orders):
    if not len(outstanding_orders):
        return pd.DataFrame(columns=ORDER_FIELDS_LIST)
    df = (pd.DataFrame(outstanding_orders.items())[1]).apply(lambda x:x.SerializeOrder()).str.split('|', expand=True)
    df.columns = ORDER_FIELDS_LIST
    for label in ['SubmitTimestamp', 'GatewayTimestamp', 'EnqueueTimestamp',
                  'DequeueTimestamp', 'OrderSerialNum', 'LimitPrice','NumShares']:
        df.loc[:, label] = pd.to_numeric(df[label], errors='coerce')
    return df 

#### Custom Imports

In [3]:
import importlib 
import sys 
#importlib.reload(sys.modules['mean_reversion_shay'])

In [5]:
import strategies_shay   
import utilities as u 
default_symbols = ['AA', 'AB', 'AC', 'AD', 'AE', 'AF', 'AG', 'AH', 'AI', 'AJ', 'AK', 'AL', 'AM', 'AN', 'AO', 'AP', 'AQ', 'AR', 'AS', 'AT', 'AU', 'AV', 'AW', 'AX', 'AY', 'AZ', 'BA', 'BB', 'BC', 'BD', 'BE', 'BF', 'BG', 'BH', 'BI', 'BJ', 'BK', 'BL', 'BM', 'BN', 'BO', 'BP', 'BQ', 'BR', 'BS', 'BT', 'BU', 'BV', 'BW', 'BX', 'BY', 'BZ', 'CA', 'CB', 'CC', 'CD', 'CE', 'CF', 'CG', 'CH', 'CI', 'CJ', 'CK', 'CL', 'CM', 'CN', 'CO', 'CP', 'CQ', 'CR', 'CS', 'CT', 'CU', 'CV', 'CW', 'CX', 'CY', 'CZ', 'DA', 'DB', 'DC', 'DD', 'DE', 'DF', 'DG', 'DH', 'DI', 'DJ', 'DK', 'DL', 'DM', 'DN', 'DO', 'DP', 'DQ', 'DR', 'DS', 'DT', 'DU', 'DV']
default_top_symbols = ['CQ', 'BV', 'BP', 'BY', 'CR', 'BL', 'BO', 'CX', 'DE', 'BF']

In [None]:
trader = None

#### Get the trader object

In [3]:
def getTrader() : 
    global trader
    if trader : 
        u.debug("Returning saved trader")
        return trader 

    # Get relevant fields from VM-specific config. Token is yours only, so don't make it public.
    gateway_ip = config["gateway_ip"]
    client_id = config["client_id"]
    client_token = config["client_token"]

    # Clear any existing data locally.
    redis_api = redis.Redis()
    redis_api.flushall();

    # Create CloudEx base trader object.
    trader = cloud_ex.Trader(gateway_ip, client_id, client_token)
    return trader 

def getSymbols() : 
    trader = getTrader() 
    return trader.GetSymbols()


def getPortfolio(): 
    portfolio_mat = cloud_ex.MapStringInt()
    return trader.GetPortfolioMatrix(portfolio_mat)
    

In [5]:
T = getTrader()

#### Main Todos



#### TRADING PARAMETERS

In [None]:
GLOBALS = { 
    'NUM_SHARES' :  1 , 
    'BIN_INTERVAL_MS' : 500,  #interval to bin the data with 
    'WAIT_INTERVAL_MS' : 500, 
    'BACKTEST_LOOKBACK_PERIOD_SECONDS' : 60*5  , #amount of historical data to backtest on 
    'MAX_NUM_ORDERS' : 60*5*2 , #how long the algo will trade for, in # of bins 
} 

### Selecting symbols to trade on based on aggregate volume over last n seconds  

We need to figure out which symbols to trade on prior to the initiation of trading. It seems natural to rank them by some metric and then take the top N of them, for example volume 

In [57]:
def aggregate_volume(symbol, seconds_in_past) : 
    """
    Gets the most recent 'seconds_in_past' historical data for the symbol and compute the total 
    'CashTraded' 
    """
    u.debug("Getting aggregate volume for {}".format(symbol))
    end_time_ms = int(time.time()*1e3)
    start_time_ms = end_time_ms - int(seconds_in_past*1e3)
    symbol_trades_vec = cloud_ex.VectorTrade()  
    cloud_ex.MarketDataAPI.PullTrades(config['project_id'], config['bigtable_id'], 
                                          config['table_name'], symbol, start_time_ms, 
                                          end_time_ms, symbol_trades_vec)
    sym_df = TradeDF(symbol_trades_vec)
    return np.sum(sym_df['CashTraded'])    

In [62]:
def rank_symbols_by_volume(symbols , seconds_in_past) : 
    data = [ [symbol, aggregate_volume(symbol,seconds_in_past)  ] for  symbol in symbols ] 
    data.sort(key=lambda x:  x[1] , reverse=True )
    return data

In [68]:
def get_top_n_symbols_by_volume(symbols,seconds_in_past, n )  : 
    ranked = rank_symbols_by_volume(symbols,seconds_in_past) 
    
    syms = [ x[0] for x in ranked[0:n]] 
    return (syms , ranked)  

In [69]:
top_symbols, symbol_ranks = get_top_n_symbols_by_volume(default_symbols, 60, 10)

Getting aggregate volume for AA
Getting aggregate volume for AB
Getting aggregate volume for AC
Getting aggregate volume for AD
Getting aggregate volume for AE
Getting aggregate volume for AF
Getting aggregate volume for AG
Getting aggregate volume for AH
Getting aggregate volume for AI
Getting aggregate volume for AJ
Getting aggregate volume for AK
Getting aggregate volume for AL
Getting aggregate volume for AM
Getting aggregate volume for AN
Getting aggregate volume for AO
Getting aggregate volume for AP
Getting aggregate volume for AQ
Getting aggregate volume for AR
Getting aggregate volume for AS
Getting aggregate volume for AT
Getting aggregate volume for AU
Getting aggregate volume for AV
Getting aggregate volume for AW
Getting aggregate volume for AX
Getting aggregate volume for AY
Getting aggregate volume for AZ
Getting aggregate volume for BA
Getting aggregate volume for BB
Getting aggregate volume for BC
Getting aggregate volume for BD
Getting aggregate volume for BE
Getting 

In [70]:
top_symbols

['CQ', 'BV', 'BP', 'BY', 'CR', 'BL', 'BO', 'CX', 'DE', 'BF']

In [71]:
symbol_ranks

[['CQ', 116859],
 ['BV', 36186],
 ['BP', 35567],
 ['BY', 34071],
 ['CR', 33011],
 ['BL', 29829],
 ['BO', 28591],
 ['CX', 28353],
 ['DE', 27214],
 ['BF', 27095],
 ['BK', 26853],
 ['BM', 26104],
 ['BU', 25758],
 ['BJ', 25662],
 ['CZ', 25658],
 ['DR', 25089],
 ['BZ', 25003],
 ['CM', 24784],
 ['DM', 24540],
 ['DD', 23301],
 ['DA', 23160],
 ['BN', 22428],
 ['DU', 22140],
 ['DB', 21907],
 ['DF', 21496],
 ['BI', 21310],
 ['DG', 20080],
 ['AZ', 19960],
 ['DC', 19859],
 ['DT', 19339],
 ['AI', 18526],
 ['DO', 18425],
 ['CK', 18321],
 ['BE', 18303],
 ['DP', 18301],
 ['AW', 18241],
 ['CU', 18182],
 ['AX', 17262],
 ['CE', 17248],
 ['CN', 16980],
 ['DV', 16758],
 ['AV', 16631],
 ['DJ', 15214],
 ['AJ', 14427],
 ['AT', 14214],
 ['AA', 13607],
 ['AY', 13604],
 ['CG', 12927],
 ['AN', 12845],
 ['CF', 12398],
 ['AU', 12317],
 ['BQ', 12292],
 ['CO', 12150],
 ['AF', 12103],
 ['CS', 12090],
 ['DS', 11948],
 ['CT', 11428],
 ['CH', 11195],
 ['AO', 10617],
 ['CP', 10589],
 ['AH', 10527],
 ['DL', 10489],
 ['CL',

### QUERY ORDER HISTORY

In [6]:
### Order Monitoring 
def outstanding_orders() : 
    outstanding_orders = cloud_ex.MapStringOrder()
    trader.GetOutstandingOrders(outstanding_orders)
    u.debug("You have {} outstanding orders.".format(len(outstanding_orders)))
    # Transform outstanding orders into a DataFrame
    outstanding_orders = OutstandingOrderDF(outstanding_orders)
    return outstanding_orders
    
def historical_orders() : 
    my_historical_orders = cloud_ex.VectorOrder()
    trader.GetAllHistoricalOrders(my_historical_orders)
    u.debug("You have submitted a total of {} order(s).".format(len(my_historical_orders))) 
    my_historical_orders_df = OrderDF(my_historical_orders)
    return my_historical_orders_df

def historical_trades() : 
    my_historical_trades = cloud_ex.VectorTrade()
    trader.GetAllHistoricalTrades(my_historical_trades)
    u.debug("You have made a total of {} trade(s).".format(len(my_historical_trades)))
    my_historical_trades_df = TradeDF(my_historical_trades)
    return my_historical_trades_df

# 1. Backtesting

Let's see how we can backtest our trading algorithms to get them ready for live trading. In the following cells we will download historical data and evaluate how well a mean reversion trader would have done.

## 1.1 Get our bank of strategies

### Note on strategy and strategy parameters terminology

A strategy is implemented with specific parameters. <b>The pair of the strategy and parameters will be called an "algo"</b>
The backtest logic will take a dictionary of string keys (identifiers) to a tuple of (strategy , params). This dictionary will be called "algo_bank" 

For example => 

In [6]:
# NOte a fully specified strategy is called an "algo" and consists of a tuple of (strategy, kwargs) where kwargs provide
# the parameters for the strategy 

def mean_reversion_algo(ma,t) : 
    # Helper function for making the "algo" data structure
    # NOTE this returns a TUPLE of (strategy , kwargs)
    return (strategies_shay.strategies['mean_reversion'].strategy, { 'ma' : ma, 'threshold' : t} ) 

def momentum_algo(p1,p2,t) : 
    # NOTE this returns a TUPLE of (strategy , kwargs)
    return (strategies_shay.strategies['momentum'].strategy, { 'p1' : p1, 'p2' : p2, 'threshold' : t} ) 

def random_buy_algo(p) : 
    return (strategies_shay.strategies['random_buy'].strategy , { 'p' : p} ) 

def random_sell_algo(p) : 
    return (strategies_shay.strategies['random_sell'].strategy , { 'p' : p} ) 


# Define an "Algo Bank", which holds the (strategy, kwargs) pair, indexed by a unique identifier 
algo_bank = { 
    'mr_10_3' : mean_reversion_algo(10,3),
    'mr_10_5' : mean_reversion_algo(10,5), 
    'mr_10_10' : mean_reversion_algo(10,10),
    'mo_5_5_1' : momentum_algo(0.5,0.5,1) , 
    'mo_5_5_3' : momentum_algo(0.5,0.5,3) , 
    'mo_8_2_1' : momentum_algo(0.8,0.2,1) , 
    'mo_8_2_3' : momentum_algo(0.8,0.2,3) , 
    'rb_10'    : random_buy_algo(0.1) , 
    'rb_50'    : random_buy_algo(0.5) , 
    'rs_10'    : random_sell_algo(0.1) , 
    'rs_50'    : random_sell_algo(0.5) , 
 
}


In [1]:
def backtest_strategy_with_symbol(data,strategy, symbol, kwargs = {} ) :  
    """
    Backtests a given strategy, kwarg pair on a diven symbol using the last seconds_in_past seconds of historical data 
    """
   
    
    #bin_interval_ms = 500
    #summarize_historical_trades_df(symbol_historical_trades_df, bin_interval_ms)
    
    # Initial capital and shares we will use for the backtest. 
    init_capital = 100000
    init_shares = 1000

    # Set up trading algorithm parameters.
    num_shares = 10

    # Set up the strategy 
    trader = None
    algo = strategy(trader, [symbol],GLOBALS['BIN_INTERVAL_MS'])  # instantiates the strategy which is subclass of algorithmic_trader

    # Run the backtest.
    roi, action_list = algo.backtest(data,
                 num_shares, 
                 init_capital, 
                 init_shares, 
                 **kwargs)

    u.debug("Algo ROI={}%".format(roi))
    return roi,action_list
    
    

In [10]:
def backtest_algobank_on_symbols(algobank, symbols) : 
    """
    Backtests all algos ( strategy, kwarg pairs) on each symbol in symbols, over the last seconds_in_past seconds 
    of data 
    
    Returns the results in a sorted list with algos ranked by roi 
    """
    results = [] 
    
    end_time_ms = int(time.time()*1e3)
    start_time_ms = end_time_ms - int(GLOBALS['BACKTEST_LOOKBACK_PERIOD_SECONDS']*1e3)
    symbol_trades_vec = cloud_ex.VectorTrade()  
    cloud_ex.MarketDataAPI.PullTrades(config['project_id'], config['bigtable_id'], 
                                          config['table_name'], symbol, start_time_ms, 
                                          end_time_ms, symbol_trades_vec)
    u.debug("There are a total of {} trades for {} symbol".format(len(symbol_trades_vec), symbol))
    symbol_historical_trades_df = TradeDF(symbol_trades_vec)
    symbol_historical_trades_df = symbol_historical_trades_df.sort_values(by="CreationTimestamp")
    
    
    for symbol in symbols : 
        u.debug("\n\nBacktesting symbol: {}".format(symbol) ) 
        for (algoname,algo) in algobank.items() : 
            strategy, kwargs = algo
            u.debug("algo={}".format(algoname))
            roi,action_list = backtest_strategy_with_symbol(symbol_historical_trades_df,strategy, symbol, kwargs)
            results.append( [  roi, symbol, algoname,action_list ])
            u.debug("Made {} trades with roi: {}".format(len(action_list), roi))
    
    results.sort(key=lambda x : x[0], reverse=True) 
    return results         

            

In [9]:
backtest_results = backtest_algobank_on_symbols(algo_bank, default_top_symbols[0:5])

Backtesting symbol: CQ

algo=mr_10_3
There are a total of 310 trades for CQ symbol
Running in offline mode. Could not set active symbols.
Algo ROI=4.291845493562235%
Made 0 trades with roi: 4.291845493562235
algo=mr_10_5
There are a total of 310 trades for CQ symbol
Running in offline mode. Could not set active symbols.
Algo ROI=6.060606060606062%
Made 0 trades with roi: 6.060606060606062
algo=mr_10_10
There are a total of 310 trades for CQ symbol
Running in offline mode. Could not set active symbols.
Algo ROI=6.986899563318772%
Made 0 trades with roi: 6.986899563318772
algo=mo_5_5_1
There are a total of 308 trades for CQ symbol
Running in offline mode. Could not set active symbols.
Algo ROI=8.38157894736841%
Made 5 trades with roi: 8.38157894736841
algo=mo_5_5_3
There are a total of 308 trades for CQ symbol
Running in offline mode. Could not set active symbols.
Algo ROI=8.810572687224678%
Made 0 trades with roi: 8.810572687224678
algo=mo_8_2_1
There are a total of 311 trades for CQ sy

In [11]:
backtest_results

[[11.6591928251121, 'CQ', 'mo_8_2_3', []],
 [10.393805309734503,
  'CQ',
  'mo_8_2_1',
  [(111, 'Buy', 107.0),
   (112, 'Buy', 107.0),
   (113, 'Buy', 107.0),
   (296, 'Buy', 135.0),
   (297, 'Buy', 135.0)]],
 [8.810572687224678, 'CQ', 'mo_5_5_3', []],
 [8.38157894736841,
  'CQ',
  'mo_5_5_1',
  [(121, 'Buy', 107.0),
   (122, 'Buy', 107.0),
   (305, 'Buy', 135.0),
   (306, 'Buy', 135.0),
   (307, 'Buy', 135.0)]],
 [6.986899563318772, 'CQ', 'mr_10_10', []],
 [6.060606060606062, 'CQ', 'mr_10_5', []],
 [4.291845493562235, 'CQ', 'mr_10_3', []],
 [0.21929824561404132, 'CR', 'mo_5_5_1', []],
 [0.21881838074398274, 'CR', 'mo_8_2_1', []],
 [0.21881838074398274, 'CR', 'mo_8_2_3', []],
 [0.0, 'CR', 'mr_10_3', []],
 [0.0, 'CR', 'mr_10_5', []],
 [0.0, 'CR', 'mr_10_10', []],
 [0.0, 'CR', 'mo_5_5_3', []],
 [-4.072398190045249, 'BP', 'mr_10_3', []],
 [-4.298642533936643, 'BP', 'mr_10_5', []],
 [-4.7404063205417515, 'BP', 'mr_10_10', []],
 [-4.966139954853276, 'BP', 'mo_5_5_1', []],
 [-4.9661399548532

# Deploying the trading threads 

Worked more on this and completed it -- see <b>trade_algorithmically</b> function below. Nothing is tested yet so will expect bugs. But for now each thread will log its own submitted order ids to a logfile named by algo+timestamp. 

In [None]:
def run_and_evaluate_algorithm(**kwargs) : 
    """
    Intended as target of new thread() 
    1. Launches the algo on the symbol and starts trading 
    2. should calculate ROI of the algo 
    4. Writes ROI and submitted order ids to disk 
    """
    name     = kwargs['name']
    strategy = kwargs['strategy']  
    strategy_parameters = kwargs['parameters'] 
    num_shares = kwargs['num_shares'] 
    max_num_orders = kwargs['max_num_orders'] 
    symbol   = kwargs['symbol']
    trader   = kwargs['trader'] #reference to the 1 trader instance connnected to cloudX
    
    # create the AlgorithmicTrader Object 
    algo = strategy(trader, [symbol], bin_interval_ms=GLOBALS['BIN_INTERVAL']) 
    
    # get and set id for this trader  (for logging purposes)
    trader_id = name + "_" + str(time.time()).split(".")[0]
    algo.set_id(trader_id)  #added this method 
    
    # Calculate portfolio state pre-trading 
    pass 

    # start and finish trading
    order_ids = algo.trade(symbol,num_shares,max_num_orders,GLOBALS['WAIT_INTERVAL_MS'] ,strategy_parameters)
    # can SIMULATE if we want 
    
    # calculate portfolio state post-trading
    pass 

    # write the order ids to a log file 
    u.logfile(self.trader_id + "_order_ids", json.dumps(submitted_order_ids) )
    # write the ROI information a log file 
    pass     

def run_algorithm_in_thread(kwargs) : 
    # create the thread 
    t = threading.Thread(target=run_and_evaluate_algorithm,
                         kwargs=kwargs)
    # start the thread 
    t.start() 
    # return it 
    return t 

def deploy_top_N_algorithms(trader,ranked_algos, N,  num_shares, max_num_orders) : 
    sublist = ranked_algos[0:N] 
    ts = []
    for to_deploy in sublist : 
        roi, symbol, algoname , _  =  to_deploy 
        strategy, strategy_parameters = algo_bank[algoname]  
        
        arguments = { 
            'name' : algoname, 
            'strategy' : strategy, 
            'strategy_parameters' : strategy_parameters, 
            'num_shares' : num_shares, 
            'max_num_orders' : max_num_orders,  
            'symbol' : symbol , 
            'trader' : trader ,
        }
        ts.append(run_algorithm_in_thread(arguments))
        
    return ts 


def trade_algorithmically(trader,algobank,symbols,N=5) : 
    while True : 
        ranked = backtest_algobank_on_symbols(algobank,symbols)
        algo_threads = deploy_top_N_algorithms(trader,ranked,N, GLOBALS['num_shares'] , GLOBALS['MAX_NUM_ORDERS']) 
        for t in algo_threads : 
            t.join() 

### Notes 


Before instantiating traders: 
1. we should record the portfolio value of the symbol its going to trade on


Taking down traders: 
1. Before deleting a trader instance we should check its order ids to see its executed orders
Record the new portfolio value in order to track its performance 

