In [14]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import math
import datetime
import glob
import copy

import pandas as pd
import seaborn as sns
import tensorflow as tf
from matplotlib import pyplot as plt

from ml.opt_weights import *
from ml.model_inputs import *
from ml.model import *
import ml.utils as ut
from custom_objectives import *
#from constructions import *
from tutils import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
from itertools import product
from toolz.dicttoolz import keymap

In [56]:
params = (('look_ahead_per', (10, 22, 44)),
          ('cash', (True, False)),
          ('as_ts_score', (True, False)),
          ('anti_signals', (False,)),
          ('boosting', (True,)),
          ('sigmoid_gain', (.5, .75, 1.)),
          ('layer_size', (10, 25, 100)),
          ('num_layers', (1, 2, 3,)),
          ('num_iterations', (10000, 50000, 500000)),
          ('batch_size', (250, 500, 1000)),
          ('learning_rate', (.01, .1, 1.)),
          ('penalty_alpha', (0., .01, .1, 1.)),
          ('train_dropout_rate', (0., .1, .25)),
          ('holdings_penalty_alpha', (.5, 1., 1.5)),
          ('gearing_alpha', (.2,)),)

iters = list(product(*[i[1] for i in params]))

In [57]:
len(iters)

314928

In [54]:
fn = 'data/backtest_grid.csv'
with open(fn, 'w') as csvfile:
    for ind, i in enumerate(iters):
        if ind > 0:
            break
        print ind, i
        row = dict(zip([p[0] for p in params], i))
        row['num_iterations'] = 100
        try:
            res = train_model(paths, row['look_ahead_per'], row['cash'], row['as_ts_score'], row['anti_signals'], 
                              row['boosting'], row['sigmoid_gain'], row['layer_size'], row['num_layers'], 
                              row['num_iterations'], row['batch_size'], row['learning_rate'], row['penalty_alpha'], 
                              row['train_dropout_rate'], row['holdings_penalty_alpha'], row['gearing_alpha'])
            row.update(res)
        except:
            continue
            
        if not ind:
            writer = csv.DictWriter(csvfile, fieldnames=row.keys())
            writer.writeheader()
        writer.writerow(row)

0 (10, True, True, False, True, 0.5, 10, 1, 10000, 250, 0.01, 0.0, 0.0, 0.5, 0.2)
train loss:	0.41994	test loss:	0.46704


	DataFrame.rolling(min_periods=1,window=10,center=False).mean()
	DataFrame.rolling(min_periods=1,window=22,center=False).mean()


In [55]:
pd.read_csv(fn)

Unnamed: 0,train_irraw,learning_rate,boosting,as_ts_score,batch_size,train_dropout_rate,look_ahead_per,test_irraw,test_to_one_month,anti_signals,...,sigmoid_gain,train_to_two_weeks,test_to_two_weeks,train_ir2_weeks,test_to,train_ir1_month,layer_size,train_to_one_month,test_ir1_month,penalty_alpha
0,2.364775,0.01,True,True,250,0.0,10,0.966529,0.309771,False,...,0.5,0.656881,0.666027,2.364775,6.606979,2.364775,10,0.299463,0.966529,0.0


In [10]:
paths = [i for i in glob.iglob('data/bagging/portfolio_returns_*.csv')]
paths.append('data/returns.csv')

In [43]:
def train_model(paths, look_ahead_per, cash, as_ts_score, anti_signals, boosting,
               sigmoid_gain, layer_size, num_layers, num_iterations, batch_size, learning_rate,
               penalty_alpha, train_dropout_rate, holdings_penalty_alpha, gearing_alpha):
    
    sel_paths = paths[-1:] if not boosting else paths
    ret = [load_returns(i, anti_signals) for i in sel_paths]
    
    if cash: 
        ret = [add_cash(r) for r in ret]


    # set Xs to forward looking returns
    fwd_ret = []
    for r in ret:
        fr = (get_fwd_ret(r, look_ahead_per).dropna(how='all')
        if cash:
            fr = fr.drop('cash', axis=1)
        fwd_ret.append(fr)
        
    Xs = copy.deepcopy(fwd_ret)
    
    # add noise
    Xs = [df_to_corr_panel(i) for i in Xs]

    # ts score (scaled to look similar to returns)
    if as_ts_score:
        ts_scaler = fwd_ret[-1].stack().std()
        Xs = [panel_ts_score(i, ts_scaler) for i in Xs]
        
    # concat data sets
    ret, fwd_ret, Xs = map(lambda x: pd.concat(x), [ret, fwd_ret, Xs])
    
    ys = pd.DataFrame(np.ones([Xs.shape[0], Xs.shape[1]]), index=Xs.items, columns=Xs.major_axis)
    
    inputs = [Xs, ys, fwd_ret]
    inputs = validate_and_format_inputs(inputs)
    inputs = split_inputs_by_date(inputs, datetime.date(2013,1,1), look_ahead_per)
    
    structure = [[1,],[layer_size for i in range(num_layers)]]
    data = pd.DataFrame(inputs).applymap(lambda x: x.values).values
    cost_func = lambda l, y, y_, r_, act: sigmoid_ir(l, y, y_, r_, act, look_ahead_per, sigmoid_gain, holdings_penalty_alpha, gearing_alpha)
    preds, stats = train_nn(data, 
                            structure, num_iterations, batch_size, learning_rate, penalty_alpha=penalty_alpha, 
                            train_dropout_rate=train_dropout_rate, verbosity=10000,
                            fc_final_layer_activation=None,
                            fc_hidden_layer_activation=tf.nn.relu,
                            conv_layer_activation=None,
                            loss_func=cost_func,)
    
    fret = select_final_data_set(ret)
    
    row = {}
    for i in ('train', 'test'):

        ind = 0 if i=='train' else 1
        probs = pd.DataFrame(preds[i]['weights'], columns=inputs[1][ind].columns, index=inputs[1][ind].index)
        probs = select_final_data_set(probs)
        probs_10 = pd.rolling_mean(probs,window=10, min_periods=1)
        probs_22 = pd.rolling_mean(probs,window=22, min_periods=1)
        
        perf = pd.DataFrame({'raw': fret.mul(probs).sum(axis=1),
                             '2_weeks': fret.mul(probs).sum(axis=1),
                             '1_month': fret.mul(probs).sum(axis=1)})

        irs = ut.get_ir(perf)
        irs = keymap(lambda x: '{0}_ir'.format(i) + x, irs.to_dict())
        row.update(irs)
        row[i + '_to'] = ut.calc_annual_turnover(probs)
        row[i + '_to_two_weeks'] = ut.calc_annual_turnover(probs_10)
        row[i + '_to_one_month'] = ut.calc_annual_turnover(probs_22)
        
    return row

train loss:	1.49686	test loss:	1.49917


	DataFrame.rolling(min_periods=1,window=10,center=False).mean()
	DataFrame.rolling(min_periods=1,window=22,center=False).mean()


In [58]:
look_ahead_per = 10
cash = False
as_ts_score = False
anti_signals = True
boosting = True
sigmoid_gain = .5
layer_size = 10
num_layers = 2
num_iterations = 100
batch_size = 100
learning_rate = .01
penalty_alpha = .05
train_dropout_rate = .1
holdings_penalty_alpha = 1.
gearing_alpha = .2

row = train_model(paths, look_ahead_per, cash, as_ts_score, anti_signals, boosting,
                  sigmoid_gain, layer_size, num_layers, num_iterations, batch_size, learning_rate,
                  penalty_alpha, train_dropout_rate, holdings_penalty_alpha, gearing_alpha)

train loss:	1.24010	test loss:	1.21910


	DataFrame.rolling(min_periods=1,window=10,center=False).mean()
	DataFrame.rolling(min_periods=1,window=22,center=False).mean()
