# How to generate long-short trading signals with a Random Forest

## Imports & Settings

In [1]:
!pip install ipython-autotime
!pip install lightgbm
!conda install -y -c quantopian ta-lib
!pip install pyfolio
# https://github.com/stefan-jansen/zipline-reloaded
!pip install zipline-reloaded
!pip install Logbook

Collecting lightgbm
  Downloading lightgbm-3.2.1-py3-none-manylinux1_x86_64.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 8.2 MB/s eta 0:00:01
Installing collected packages: lightgbm
Successfully installed lightgbm-3.2.1
Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 4.9.2
  latest version: 4.10.1

Please update conda by running

    $ conda update -n base conda



# All requested packages already installed.



In [2]:
# modify timeseries.py of pyfolio
# src: https://github.com/quantopian/pyfolio/pull/634
!sed -i 's/valley = np.argmin(underwater)/valley = underwater.idxmin()/g' /opt/conda/lib/python3.7/site-packages/pyfolio/timeseries.py
!sed -i 's/if (len(returns) == 0) or (len(underwater) == 0):/if (len(returns) == 0) or (len(underwater) == 0) or np.min(underwater) == 0:/g' /opt/conda/lib/python3.7/site-packages/pyfolio/timeseries.py

In [1]:
%matplotlib inline
%load_ext autotime

from collections import defaultdict
from io import StringIO
from itertools import product
from logbook import Logger, StderrHandler, INFO
from sklearn.linear_model import LinearRegression
from scipy.stats import spearmanr
from time import time
from utils import MultipleTimeSeriesCV, format_time

import lightgbm as lgb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import seaborn as sns
import statsmodels.api as sm
import sys, os
import warnings

from zipline import run_algorithm
from zipline.api import (attach_pipeline, pipeline_output,
                         date_rules, time_rules, record,
                         schedule_function, commission, slippage,
                         set_slippage, set_commission,
                         get_open_orders, cancel_order,
                         order_target, order_target_percent)
from zipline.data import bundles
from zipline.utils.run_algo import load_extensions
from zipline.pipeline import Pipeline, CustomFactor
from zipline.pipeline.data import Column, DataSet
from zipline.pipeline.domain import JP_EQUITIES
from zipline.pipeline.filters import StaticAssets
from zipline.pipeline.loaders.frame import DataFrameLoader

import pyfolio as pf
from pyfolio.plotting import plot_rolling_returns, plot_rolling_sharpe
from pyfolio.timeseries import forecast_cone_bootstrap

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
np.random.seed(42) # for cv

idx = pd.IndexSlice
YEAR = 252

time: 1.36 s (started: 2021-05-02 10:10:28 +00:00)


In [2]:
### set path
import os
from pathlib import Path
path_home = os.getcwd()
path_home = Path(path_home)
path_data = path_home / 'data'
path_result = path_home / 'result'
if not path_result.exists():
    path_result.mkdir()

time: 928 µs (started: 2021-05-02 10:10:31 +00:00)


In [3]:
### zipline path
import os, sys

zipline_root = path_home / 'zipline'
os.environ['ZIPLINE_ROOT'] = str(zipline_root)
sys.path.append(zipline_root.expanduser().as_posix())

custom_data_path = zipline_root / 'custom_data'
if not custom_data_path.exists():
    custom_data_path.mkdir()

time: 872 µs (started: 2021-05-02 10:10:35 +00:00)


## Get Data

In [None]:
!gsutil cp gs://finance_data_bucket/lbk/rf/stooq_jp_data.zip .
!unzip stooq_jp_data.zip

In [6]:
# get cv result from 02_random_forest_return_signals
!gsutil cp gs://finance_data_bucket/lbk/rf/stooq_jp_result.zip .
!unzip stooq_jp_result.zip result/daily_ic_avg.csv

Copying gs://finance_data_bucket/lbk/rf/stooq_jp_result.zip...
- [1 files][ 47.5 MiB/ 47.5 MiB]                                                
Operation completed over 1 objects/47.5 MiB.                                     
Archive:  stooq_jp_result.zip
replace result/lin_reg_performance.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C
time: 12.1 s (started: 2021-05-01 14:33:58 +00:00)


In [16]:
!ln -s ../../data/stooq_jp_tse_stocks_prices.csv zipline/custom_data/.
!ln -s ../../data/stooq_jp_tse_stocks_tickers.csv zipline/custom_data/.

time: 265 ms (started: 2021-05-02 07:40:23 +00:00)


## Generate predictions

In [4]:
train_lengths = [5 * YEAR, 3 * YEAR, YEAR, 126, 63]
test_lengths = [5, 21]
val_length = 2 * YEAR

time: 581 µs (started: 2021-05-02 10:10:40 +00:00)


In [5]:
param_cols = ['train_length', 'test_length', 'bagging_fraction',
              'feature_fraction', 'min_data_in_leaf', 'rounds']

# get params from descending ic orders
def get_params(data, t=5, best=0):
    df = data[data.t == t].sort_values('ic', ascending=False).iloc[best]
    df = df.loc[param_cols]
    rounds = int(df.rounds)
    params = pd.to_numeric(df.drop('rounds'))
    return params, rounds


def train_test_periods_msg(train_idx, test_idx, data):
    train = data.iloc[train_idx]
    train_dates = train.index.get_level_values('date')
    test = data.iloc[test_idx]
    test_dates = test.index.get_level_values('date')
    df = train.reset_index().append(test.reset_index())
    n = len(df)
    assert n== len(df.drop_duplicates())
    msg = f'Training: {train_dates.min().date()}-{train_dates.max().date()} '
    msg += f' ({train.groupby(level="ticker").size().value_counts().index[0]:,.0f} days) | '
    msg += f'Test: {test_dates.min().date()}-{test_dates.max().date()} '
    msg += f'({test.groupby(level="ticker").size().value_counts().index[0]:,.0f} days)'
    print(msg)
    return test_dates.min(), test_dates.max()
    

cpu_count = os.cpu_count()
print('# of cpus:', cpu_count)

base_params = dict(boosting_type='rf',
                   objective='regression',
                   bagging_freq=1,
                   n_jobs=cpu_count,
                   verbose=-1)

# of cpus: 2
time: 2.63 ms (started: 2021-05-02 10:10:42 +00:00)


In [11]:
daily_ic_avg = pd.read_csv(path_result / 'daily_ic_avg.csv')
daily_ic_avg.head()

Unnamed: 0,t,train_length,test_length,bagging_fraction,feature_fraction,min_data_in_leaf,rounds,ic
0,1,63,5,0.5,0.75,250,25,0.005485
1,1,63,5,0.5,0.75,250,50,0.004826
2,1,63,5,0.5,0.75,250,75,0.005358
3,1,63,5,0.5,0.75,250,100,0.004369
4,1,63,5,0.5,0.75,250,125,0.003739


time: 34.2 ms (started: 2021-05-02 10:13:03 +00:00)


In [12]:
num_models = 10
test_start = test_end = None

for lookahead in [1, 5, 10, 21]:
    if lookahead > 1: 
        continue
    print(f'\nLookahead: {lookahead:02}')
    data = (pd.read_csv(path_data / 'stooq_jp_equities.csv', parse_dates=['date'])
              .set_index(['ticker','date'])
              .sort_index(level=['ticker','date'])
              .loc[idx[:, '2017':], :]
              )
    labels = sorted(data.filter(like='fwd').columns)
    features = data.columns.difference(labels).tolist()
    label = f'fwd_ret_{lookahead:02}'
    #data = data.loc[:, features + [label]].dropna()
    data = data.loc[:, features + [label]] # keep rows of no label for trading purpose

    categoricals = ['year', 'weekday', 'month']
    for feature in categoricals:
        data[feature] = pd.factorize(data[feature], sort=True)[0]

    lgb_data = lgb.Dataset(data=data[features],
                           label=data[label],
                           categorical_feature=categoricals,
                           free_raw_data=False)
    
    for position in range(num_models):
        params, num_boost_round = get_params(daily_ic_avg,
                                             t=lookahead,
                                             best=position)
        params = params.to_dict()
        params['min_data_in_leaf'] = int(params['min_data_in_leaf'])
        train_length = int(params.pop('train_length'))
        test_length = int(params.pop('test_length'))
        params.update(base_params)

        print(f'Position: {position:02}')

        n_splits = int(val_length / test_length)
        cv = MultipleTimeSeriesCV(n_splits=n_splits,
                                  test_period_length=test_length,
                                  lookahead=lookahead,
                                  train_period_length=train_length)

        predictions = []
        start = time()
        for i, (train_idx, test_idx) in enumerate(cv.split(X=data), 1):
            if i > 1: # get latest period only
                break
            train_set = lgb_data.subset(used_indices=train_idx.tolist(),
                                        #params=params).construct()
                                        params=params)
    
            model = lgb.train(params=params,
                              train_set=train_set,
                              num_boost_round=num_boost_round,
                              categorical_feature=categoricals, 
                              verbose_eval=False)
        
            test_set = data.iloc[test_idx, :]
            y_test = test_set.loc[:, label].to_frame('y_test')
            y_pred = model.predict(test_set.loc[:, model.feature_name()])
            predictions.append(y_test.assign(prediction=y_pred))
            
            test_start_, test_end_ = train_test_periods_msg(train_idx, test_idx, data)
            if (test_start is None) or (test_start_ > test_start):
                test_start = test_start_
            if (test_end is None):
                test_end = test_end_
            elif test_end != test_end_:
                print('WARNING: Check date for prediction')
            
        if position == 0:
            test_predictions = (pd.concat(predictions)
                                .rename(columns={'prediction': position}))
        else:
            test_predictions[position] = pd.concat(predictions).prediction
        
    by_day = test_predictions.groupby(level='date')
    for position in range(num_models):
        if position == 0:
            ic_by_day = by_day.apply(lambda x: spearmanr(x.y_test, x[position])[0]).to_frame()
        else:
            ic_by_day[position] = by_day.apply(lambda x: spearmanr(x.y_test, x[position])[0])
    test_predictions = test_predictions.sort_index(level=['ticker','date'])
    test_predictions.to_csv(path_result / f'prediction_{lookahead:02}.csv')


Lookahead: 01
Position: 00
Training: 2019-05-29-2019-11-29  (126 days) | Test: 2019-12-02-2019-12-30 (21 days)
Position: 01
Training: 2019-05-29-2019-11-29  (126 days) | Test: 2019-12-02-2019-12-30 (21 days)
Position: 02
Training: 2019-05-29-2019-11-29  (126 days) | Test: 2019-12-02-2019-12-30 (21 days)
Position: 03
Training: 2019-05-29-2019-11-29  (126 days) | Test: 2019-12-02-2019-12-30 (21 days)
Position: 04
Training: 2019-05-29-2019-11-29  (126 days) | Test: 2019-12-02-2019-12-30 (21 days)
Position: 05
Training: 2019-05-29-2019-11-29  (126 days) | Test: 2019-12-02-2019-12-30 (21 days)
Position: 06
Training: 2019-05-29-2019-11-29  (126 days) | Test: 2019-12-02-2019-12-30 (21 days)
Position: 07
Training: 2019-05-29-2019-11-29  (126 days) | Test: 2019-12-02-2019-12-30 (21 days)
Position: 08
Training: 2019-05-29-2019-11-29  (126 days) | Test: 2019-12-02-2019-12-30 (21 days)
Position: 09
Training: 2019-05-29-2019-11-29  (126 days) | Test: 2019-12-02-2019-12-30 (21 days)
time: 45.8 s (s

In [13]:
test_predictions.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 19761 entries, ('1332.JP', Timestamp('2019-12-02 00:00:00')) to ('9990.JP', Timestamp('2019-12-30 00:00:00'))
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   y_test  18820 non-null  float64
 1   0       19761 non-null  float64
 2   1       19761 non-null  float64
 3   2       19761 non-null  float64
 4   3       19761 non-null  float64
 5   4       19761 non-null  float64
 6   5       19761 non-null  float64
 7   6       19761 non-null  float64
 8   7       19761 non-null  float64
 9   8       19761 non-null  float64
 10  9       19761 non-null  float64
dtypes: float64(11)
memory usage: 1.8+ MB
time: 16.5 ms (started: 2021-05-02 10:13:50 +00:00)


In [14]:
test_predictions.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,y_test,0,1,2,3,4,5,6,7,8,9
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
9990.JP,2019-12-24,-0.009687,-0.006863,-0.006745,-0.006752,-0.006625,-0.008662,-0.006622,-0.008952,-0.009186,-0.009274,-0.011906
9990.JP,2019-12-25,0.001071,-0.006879,-0.00673,-0.00672,-0.006581,-0.001607,-0.006569,-0.001854,-0.002041,-0.001986,-0.001519
9990.JP,2019-12-26,0.0,-0.006435,-0.006531,-0.006573,-0.00645,-0.008612,-0.006482,-0.008913,-0.009159,-0.009233,-0.011906
9990.JP,2019-12-27,-0.003257,-0.008419,-0.008416,-0.008392,-0.008201,-0.007015,-0.008131,-0.007324,-0.007515,-0.007553,-0.009308
9990.JP,2019-12-30,,0.002966,0.003232,0.003242,0.003208,0.002296,0.003314,0.001988,0.001817,0.001899,0.003217


time: 28.2 ms (started: 2021-05-02 10:13:50 +00:00)


## Zipline Custom Bundle

In [15]:
load_extensions(default=True,
                extensions=[],
                strict=True,
                environ=None)

log_handler = StderrHandler(format_string='[{record.time:%Y-%m-%d %H:%M:%S.%f}]: ' +
                            '{record.level_name}: {record.func_name}: {record.message}',
                            level=INFO)
log_handler.push_application()
log = Logger('Algorithm')

time: 2.09 ms (started: 2021-05-02 10:13:50 +00:00)


In [16]:
from importlib import reload  
import stooq_preprocessing

time: 721 µs (started: 2021-05-02 10:13:50 +00:00)


In [17]:
import stooq_preprocessing
stooq_preprocessing.load(test_start, test_end) 

# take time for large universe
!zipline ingest -b stooq

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 73815 entries, ('1301.JP', Timestamp('2019-12-02 00:00:00+0000', tz='UTC')) to ('9997.JP', Timestamp('2019-12-30 00:00:00+0000', tz='UTC'))
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   open    73815 non-null  float64
 1   high    73815 non-null  float64
 2   low     73815 non-null  float64
 3   close   73815 non-null  float64
 4   volume  73815 non-null  float64
dtypes: float64(5)
memory usage: 3.2+ MB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3515 entries, 0 to 3514
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   sid     3515 non-null   int64 
 1   ticker  3515 non-null   object
 2   name    3515 non-null   object
dtypes: int64(1), object(2)
memory usage: 82.5+ KB
None
[2021-05-02 10:14:06.232567] INFO: zipline.data.bundles.core: Ingesting stooq.
[?25lMerging daily equity files:  [################

In [18]:
# run this line on diffeent cell from inges command
bundle_data = bundles.load('stooq')

time: 420 ms (started: 2021-05-02 11:06:08 +00:00)


In [19]:
### ML Predictions
def load_predictions(bundle, positions=5):
    """
    positions: num of top positions to use
    """
    t = 1
    df = (pd.read_csv(path_result / f'prediction_{t:02}.csv', parse_dates=['date'])
            .set_index(['ticker','date'])
            .sort_index(level=['ticker','date'])      
         )
    df = df[~df.index.duplicated()].drop('y_test', axis=1)
    predictions = df.iloc[:, :positions].mean(1).to_frame('predictions')

    tickers = predictions.index.get_level_values('ticker').unique().tolist()

    assets = bundle.asset_finder.lookup_symbols(tickers, as_of_date=None)
    predicted_sids = pd.Int64Index([asset.sid for asset in assets])
    ticker_map = dict(zip(tickers, predicted_sids))

    return (predictions
            .unstack('ticker')
            .rename(columns=ticker_map)
            .predictions
            .tz_localize('UTC')), assets

predictions, assets = load_predictions(bundle_data)

time: 3.13 s (started: 2021-05-02 11:06:15 +00:00)


In [20]:
predictions

ticker,1,5,6,14,16,37,38,39,40,47,...,3482,3483,3484,3487,3490,3503,3504,3506,3507,3508
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-12-02 00:00:00+00:00,0.002531,0.003244,0.002523,0.002487,0.003229,0.003231,0.003392,0.002207,0.003239,0.003192,...,0.002534,0.003224,0.003395,0.002308,0.003241,0.003212,0.003068,0.006115,0.002507,0.003001
2019-12-03 00:00:00+00:00,-0.001535,-0.006883,-0.007151,-0.007114,-0.006975,-0.007017,-0.006981,-0.007151,-0.006977,-0.007159,...,-0.006994,-0.006864,-0.006836,-0.006949,-0.006942,-0.007158,-0.007001,-0.006593,-0.007152,-0.007152
2019-12-04 00:00:00+00:00,-0.005376,-0.00576,-0.005939,-0.005839,-0.005836,-0.005821,-0.00572,-0.005668,-0.005803,-0.005913,...,-0.005742,-0.005705,-0.005734,-0.005989,-0.005631,0.000345,-0.005654,-0.005463,-0.005948,-0.005804
2019-12-05 00:00:00+00:00,-0.006425,-0.006966,-0.007053,-0.007108,-0.006953,-0.007121,-0.006941,-0.007146,-0.007118,-0.007093,...,-0.00695,-0.006965,-0.006971,-0.007037,-0.006947,-0.006468,-0.007153,-0.006808,-0.007144,-0.006961
2019-12-06 00:00:00+00:00,-0.011369,-0.00786,-0.008325,-0.008276,-0.008285,-0.008354,-0.008745,-0.008138,-0.00828,-0.008286,...,-0.00787,-0.007885,-0.007968,-0.007944,-0.00845,-0.009227,-0.008257,-0.00869,-0.008285,-0.007909
2019-12-09 00:00:00+00:00,0.003163,0.003227,0.00253,0.002393,0.002506,0.002499,0.002529,0.002494,0.002502,0.002484,...,0.003199,0.003084,0.003245,0.003118,0.003081,0.003242,0.002823,0.006403,0.002496,0.003126
2019-12-10 00:00:00+00:00,-0.006914,-0.006977,-0.007044,-0.007149,-0.007114,-0.007136,-0.007085,-0.007019,-0.007128,-0.007125,...,-0.007043,-0.00697,-0.007071,-0.007139,-0.006981,-0.00687,-0.007161,-0.006618,-0.00704,-0.006936
2019-12-11 00:00:00+00:00,-0.005931,-0.00567,-0.005635,-0.005723,-0.005795,-0.005712,-0.005797,-0.005938,-0.005829,-0.00585,...,-0.005749,-0.005479,-0.005621,-0.00591,-0.005484,-0.005785,-0.00579,-0.005472,-0.005726,-0.005608
2019-12-12 00:00:00+00:00,0.000949,-0.006958,-0.006947,-0.00715,-0.007149,-0.006925,-0.007124,-0.007015,-0.007148,-0.007148,...,-0.007041,-0.006878,-0.006918,-0.007119,-0.006976,-0.007142,-0.007138,-0.001815,-0.00689,-0.007145
2019-12-13 00:00:00+00:00,-0.008024,-0.008118,-0.008108,-0.008113,-0.00813,-0.008275,-0.008188,-0.00832,-0.00832,-0.008332,...,-0.007898,-0.008045,-0.008104,-0.0081,-0.008118,-0.008264,-0.008342,-0.009351,-0.008142,-0.007859


time: 46.5 ms (started: 2021-05-02 11:06:20 +00:00)


In [78]:
# Algo parmas
# The strategy will hold the 25 stocks with the highest positive and lowest negative predictions each as long as there are at least 15 on each side.
N_LONGS = 25
N_SHORTS = 25
#MIN_POSITIONS = 15
MIN_POSITIONS = 1

time: 476 µs (started: 2021-05-02 11:22:39 +00:00)


In [79]:
### Define Pipeline Loaders

class SignalData(DataSet):
    predictions = Column(dtype=float)
    domain = JP_EQUITIES
    
signal_loader = {SignalData.predictions:
                 DataFrameLoader(SignalData.predictions, predictions)}

time: 6.66 ms (started: 2021-05-02 11:22:39 +00:00)


In [80]:
class MLSignal(CustomFactor):
    """Converting signals to Factor
        so we can rank and filter in Pipeline"""
    inputs = [SignalData.predictions]
    window_length = 1

    def compute(self, today, assets, out, preds):
        out[:] = preds

        
def compute_signals():
    signals = MLSignal()
    predictions = SignalData.predictions.latest
    return Pipeline(columns={
        'longs': signals.top(N_LONGS, mask=signals > 0),
        'shorts': signals.bottom(N_SHORTS, mask=signals < 0)},
        screen=StaticAssets(assets)
    )


def initialize(context):
    """
    Called once at the start of the algorithm.
    """
    context.n_longs = N_LONGS
    context.n_shorts = N_SHORTS
    context.min_positions = MIN_POSITIONS
    context.universe = assets
    context.trades = pd.Series()

    set_slippage(slippage.FixedSlippage(spread=0.00))
    set_commission(commission.PerShare(cost=0.05, min_trade_cost=1))

    schedule_function(rebalance,
                      date_rules.every_day(),
                      time_rules.market_open(hours=1, minutes=30))

    schedule_function(record_vars,
                      date_rules.every_day(),
                      time_rules.market_close())

    pipeline = compute_signals()
    attach_pipeline(pipeline, 'signals')
    
    
def before_trading_start(context, data):
    """
    Called every day before market open.
    """
    output = pipeline_output('signals')
    context.trades = (output['longs'].astype(int)
                      .append(output['shorts'].astype(int).mul(-1))
                      .reset_index()
                      .drop_duplicates()
                      .set_index('index')
                      .squeeze())
    
    
def rebalance(context, data):
    """
    Execute orders according to schedule_function() date & time rules.
    """
    trades = defaultdict(list)
    for symbol, open_orders in get_open_orders().items():
        for open_order in open_orders:
            cancel_order(open_order)

    positions = context.portfolio.positions
    s = (pd.Series({s: v.amount*v.last_sale_price for s,
                    v in positions.items()})
         .sort_values(ascending=False))
    
    for stock, trade in context.trades.items():
        if trade == 0:
            order_target(stock, target=0)
        else:
            trades[trade].append(stock)

    context.longs, context.shorts = len(trades[1]), len(trades[-1])
    if context.longs > context.min_positions and context.shorts > context.min_positions:
        for stock in trades[-1]:
            order_target_percent(stock, -1 / context.shorts)
        for stock in trades[1]:
            order_target_percent(stock, 1 / context.longs)
    else:
        for stock in trades[-1] + trades[1]:
            if stock in positions:
                order_target(stock, 0)
                

def record_vars(context, data):
    """
    Plot variables at the end of each day.
    """
    record(leverage=context.account.leverage,
           longs=context.longs,
           shorts=context.shorts)

time: 2.6 ms (started: 2021-05-02 11:22:41 +00:00)


In [81]:
dates = predictions.index.get_level_values('date')
#start_date = dates.min() + pd.DateOffset(day=1)
start_date = dates.min()
end_date = dates.max()
print('Start:\t{}\nEnd:\t{}'.format(start_date.date(), end_date.date()))

Start:	2019-12-02
End:	2019-12-30
time: 893 µs (started: 2021-05-02 11:22:43 +00:00)


In [82]:
start = time()
results = run_algorithm(start=start_date,
                        end=end_date,
                        initialize=initialize,
                        before_trading_start=before_trading_start,
                        capital_base=1e6,
                        data_frequency='daily',
                        bundle='stooq',
                        custom_loader=signal_loader)

print('Duration: {:.2f}s'.format(time() - start))

[2021-05-02 11:22:45.172570]: INFO: handle_simulation_end: Simulated 20 trading days
first open: 2019-12-02 14:31:00+00:00
last close: 2019-12-30 21:00:00+00:00


Duration: 0.97s
time: 969 ms (started: 2021-05-02 11:22:44 +00:00)


In [86]:
long_cnt = results.longs_count.sum()
short_cnt = results.shorts_count.sum()
if long_cnt + short_cnt > 0:
    print(f'# of long: {long_cnt}, short: {short_cnt}')
    returns, positions, transactions = pf.utils.extract_rets_pos_txn_from_zipline(results)

# of long: 42, short: 201
time: 124 ms (started: 2021-05-02 11:24:48 +00:00)


In [87]:
transactions

Unnamed: 0,sid,symbol,price,order_id,amount,commission,dt,txn_dollars
2019-12-04 21:00:00+00:00,Equity(196 [2146.JP]),Equity(196 [2146.JP]),3225.00,91a0768998eb4bf583ce34ee5b6c2e5b,-12,,2019-12-04 21:00:00+00:00,38700.00
2019-12-04 21:00:00+00:00,Equity(210 [2168.JP]),Equity(210 [2168.JP]),1721.17,08b2782aea4746bd91f1c77e7f62ae00,-23,,2019-12-04 21:00:00+00:00,39586.91
2019-12-04 21:00:00+00:00,Equity(221 [2183.JP]),Equity(221 [2183.JP]),1170.94,6bc78dfc55624605a3edda8ca15fd6e8,-34,,2019-12-04 21:00:00+00:00,39811.96
2019-12-04 21:00:00+00:00,Equity(317 [2413.JP]),Equity(317 [2413.JP]),3011.37,d257aed10b624fd889261139fcd5cb5b,-13,,2019-12-04 21:00:00+00:00,39147.81
2019-12-04 21:00:00+00:00,Equity(354 [2491.JP]),Equity(354 [2491.JP]),2052.80,6d13f504442e4d8588f01487fbe7c41a,-19,,2019-12-04 21:00:00+00:00,39003.20
...,...,...,...,...,...,...,...,...
2019-12-23 21:00:00+00:00,Equity(2668 [7821.JP]),Equity(2668 [7821.JP]),2139.77,8c5693e9cfdb4346b47a3a0d2f5ac566,18,,2019-12-23 21:00:00+00:00,-38515.86
2019-12-23 21:00:00+00:00,Equity(3037 [8698.JP]),Equity(3037 [8698.JP]),266.16,362de2f266654f46b69f9694038249cf,145,,2019-12-23 21:00:00+00:00,-38593.20
2019-12-23 21:00:00+00:00,Equity(3145 [9037.JP]),Equity(3145 [9037.JP]),3486.35,1b9a9594406047089a2b223412612e1a,11,,2019-12-23 21:00:00+00:00,-38349.85
2019-12-23 21:00:00+00:00,Equity(3305 [9509.JP]),Equity(3305 [9509.JP]),519.56,434f48284a6245deb506e84a9743711e,74,,2019-12-23 21:00:00+00:00,-38447.44


time: 26.7 ms (started: 2021-05-02 11:24:57 +00:00)
