In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

from timelab import MultiColumn, from_data, from_xy_data

In [3]:
df = pd.read_pickle('tests/test_data/multi_asset.pkl')
df = df[['LNC', 'MAS', 'CSX']]

In [4]:
panel = from_data(df, 10, 5, gap=0)

100%|██████████| 86/86 [00:00<00:00, 32989.77it/s]


In [5]:
# xdata preprocessing

xdata = df.pct_change().dropna()
xdata = MultiColumn(xdata)
xdata.head()

Unnamed: 0_level_0,LNC,LNC,LNC,LNC,LNC,MAS,MAS,MAS,MAS,MAS,CSX,CSX,CSX,CSX,CSX
Unnamed: 0_level_1,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2005-12-22,0.027751,1.001588e-07,0.019908,-0.004281,-0.499795,0.00495,0.007526,0.000331,-0.008211,0.139214,0.021713,0.003367,0.018466,0.00757,-0.360517
2005-12-23,-0.003166,0.001855704,0.002065,0.00729,-0.462529,-0.006896,-0.017862,-0.011932,-0.00596,0.325702,-0.013108,0.015595,-0.009962,0.009095,-0.320835
2005-12-27,0.011583,0.002963881,0.000562,-0.002226,0.584069,-0.001323,0.001984,0.006373,0.001665,-0.230328,0.035419,0.000972,0.011471,-0.01058,0.35124
2005-12-28,-0.001478,0.00461623,0.006738,0.001859,-0.541975,-0.003973,-0.00297,-0.003,-0.002328,0.259354,-0.015549,-0.010291,-0.000995,0.00297,-0.270459
2005-12-29,-0.003884,-0.005882202,-0.008181,-0.009467,0.246021,-0.002659,-0.000331,-0.001337,-0.000667,-0.240734,-0.000593,0.01236,0.00717,0.008687,0.277498


In [6]:
# ydata preprocessing

ydata = xdata.copy()

ydata = ydata.channel_apply(lambda x: x['High'] > 0, 'Raise')
ydata = ydata.loc[xdata.index]
ydata = MultiColumn(ydata)
ydata.head()

Unnamed: 0_level_0,LNC,MAS,CSX
Unnamed: 0_level_1,Raise,Raise,Raise
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2005-12-22,True,True,True
2005-12-23,True,False,True
2005-12-27,True,True,True
2005-12-28,True,False,False
2005-12-29,False,False,True


In [44]:
lookback = 8
horizon = 1
gap = 0

In [45]:
panel = from_xy_data(xdata, ydata, lookback=lookback, horizon=horizon, gap=gap)

100%|██████████| 91/91 [00:00<00:00, 63286.63it/s]


In [46]:
panels = panel.split_units(split_yunits=True)

100%|██████████| 91/91 [00:00<00:00, 19783.43it/s]
100%|██████████| 91/91 [00:00<00:00, 70984.13it/s]
100%|██████████| 91/91 [00:00<00:00, 52660.27it/s]
100%|██████████| 3/3 [00:00<00:00, 31.17it/s]


In [10]:
stock_panel = panels[0]

In [11]:
x = stock_panel.xflat()
y = stock_panel.yflat()

x_train = x.iloc[:50]
y_train = y.iloc[:50][0] # first column

x_test = x.iloc[50:]
y_test = y.iloc[50:][0] # first column

# A split by date here (e.g. y_train = y.loc[:'2006']) could cause problems, since predictions occur in the future.
# A solution would be to map y indexes with x indexes, but we lose the prediction date information

In [12]:
# Best solution is using time panel split:
train, test = stock_panel.split_train_test(split_size=0.2)

In [13]:
x_train = train.xflat()
y_train = train.yflat()

x_test = test.xflat()
y_test = test.yflat()

In [14]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score

In [22]:
rf = RandomForestClassifier(class_weight={1:0.5, 0:10})
rf.fit(x_train, y_train)
rf.predict_proba(x_test)

  rf.fit(x_train, y_train)


array([[0.11, 0.89],
       [0.27, 0.73],
       [0.39, 0.61],
       [0.17, 0.83],
       [0.48, 0.52],
       [0.49, 0.51],
       [0.76, 0.24],
       [0.68, 0.32],
       [0.87, 0.13],
       [0.51, 0.49],
       [0.55, 0.45],
       [0.67, 0.33],
       [0.7 , 0.3 ],
       [0.45, 0.55],
       [0.34, 0.66],
       [0.46, 0.54],
       [0.65, 0.35],
       [0.62, 0.38]])

In [80]:
# pred_data = pd.DataFrame(index=x_test.index,
#                          data={'ytrue': y_test,
#                                'ypred': preds}).astype(float)

# pred_data['pred'] = pred_data.ypred > 0.5
# roc_auc_score(pred_data.ytrue, pred_data.ypred)
# print(classification_report(pred_data.ytrue, pred_data.pred))

In [47]:
panel.xframe(0)

Unnamed: 0_level_0,LNC,LNC,LNC,LNC,LNC,MAS,MAS,MAS,MAS,MAS,CSX,CSX,CSX,CSX,CSX
Unnamed: 0_level_1,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
2005-12-22,0.027751,1.001588e-07,0.019908,-0.004281,-0.499795,0.00495,0.007526,0.000331,-0.008211,0.139214,0.021713,0.003367,0.018466,0.00757,-0.360517
2005-12-23,-0.003166,0.001855704,0.002065,0.00729,-0.462529,-0.006896,-0.017862,-0.011932,-0.00596,0.325702,-0.013108,0.015595,-0.009962,0.009095,-0.320835
2005-12-27,0.011583,0.002963881,0.000562,-0.002226,0.584069,-0.001323,0.001984,0.006373,0.001665,-0.230328,0.035419,0.000972,0.011471,-0.01058,0.35124
2005-12-28,-0.001478,0.00461623,0.006738,0.001859,-0.541975,-0.003973,-0.00297,-0.003,-0.002328,0.259354,-0.015549,-0.010291,-0.000995,0.00297,-0.270459
2005-12-29,-0.003884,-0.005882202,-0.008181,-0.009467,0.246021,-0.002659,-0.000331,-0.001337,-0.000667,-0.240734,-0.000593,0.01236,0.00717,0.008687,0.277498
2005-12-30,-0.012811,-0.01682683,-0.013123,-0.006184,0.1161,-0.000667,0.002649,0.003348,0.007004,0.086383,0.004544,-0.011046,0.000791,-0.006263,-0.351358
2006-01-03,0.001881,0.007711192,-0.002849,0.009429,0.251136,0.020014,0.030714,0.007675,0.033787,1.17243,0.001376,-0.000784,-0.022921,-0.016151,2.48624
2006-01-04,0.009011,0.009331692,0.023623,0.006912,0.375622,0.01764,-0.000968,0.016781,-0.009029,-0.343349,-0.018068,-0.016081,0.005662,-0.002603,-0.041152


In [48]:
from timelab.utils import smash_array

In [49]:
# Pipeline for one unit
panel = panels[0]

# Remove unit dimension - it is already being processed separately.
X = smash_array(panel.X)

# y is 1D so flatten is enough
y = panel.y.flatten()

In [53]:
X.shape

(91, 8, 5)

In [68]:
inputs = tf.keras.Input(shape=(X.shape[1:]))

# Convoluting on the time dimension
x = tf.keras.layers.SeparableConv1D(10, 8)(inputs)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(10, activation=tf.nn.relu)(x)
x = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)(x)
outputs = tf.keras.layers.Reshape(train.y.shape[1:])(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(optimizer="Adam", loss="binary_crossentropy", metrics=["binary_crossentropy", 'AUC'])
model.summary()

Model: "model_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_15 (InputLayer)        [(None, 8, 5)]            0         
_________________________________________________________________
separable_conv1d_1 (Separabl (None, 1, 10)             100       
_________________________________________________________________
flatten_12 (Flatten)         (None, 10)                0         
_________________________________________________________________
dense_24 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_25 (Dense)             (None, 1)                 11        
_________________________________________________________________
reshape_12 (Reshape)         (None, 1, 1, 1)           0         
Total params: 221
Trainable params: 221
Non-trainable params: 0
____________________________________________________________

In [21]:



x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(10, activation=tf.nn.relu)(x)
x = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)(x)
outputs = tf.keras.layers.Reshape(train.y.shape[1:])(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(optimizer="Adam", loss="binary_crossentropy", metrics=["binary_crossentropy", 'AUC'])
model.summary()

TimePanel
size                              94
lookback                           5
horizon                            1
gap                                0
num_xunits                         1
num_yunits                         1
num_xchannels                      5
num_ychannels                      1
start            2005-12-22 00:00:00
end              2006-05-17 00:00:00
Name: TimePanel, dtype: object
TimePanel
size                              94
lookback                           5
horizon                            1
gap                                0
num_xunits                         1
num_yunits                         1
num_xchannels                      5
num_ychannels                      1
start            2005-12-22 00:00:00
end              2006-05-17 00:00:00
Name: TimePanel, dtype: object
TimePanel
size                              94
lookback                           5
horizon                            1
gap                                0
num_xunits          

[<TimePanel, size 94>, <TimePanel, size 94>, <TimePanel, size 94>]

In [None]:
import plotly.graph_objects as go
import pandas as pd

fig = go.Figure(data=[go.Candlestick(x=data.index,
                open=data['Open'],
                high=data['High'],
                low=data['Low'],
                close=data['Close'])
                ])


fig.update_layout(xaxis_rangeslider_visible=False)

shapes = []
for index in trues.index:
    shapes.append(dict(type='line', yref='paper', y0=0, y1=1, xref='x',
                 x0=index, x1=index))

fig.update_layout(shapes=shapes)

fig.show()

In [None]:
train, test = stock_panel.split_train_test(0.2)
print(train.X.shape)
print(train.y.shape)

In [None]:
inputs = tf.keras.Input(shape=(train.X.shape[1:]))

x = tf.keras.layers.Conv1D(2, 2)(inputs)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(10, activation=tf.nn.relu)(x)
x = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)(x)
outputs = tf.keras.layers.Reshape(train.y.shape[1:])(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(optimizer="Adam", loss="binary_crossentropy", metrics=["binary_crossentropy", 'AUC'])
model.summary()

In [None]:
H = model.fit(train.X, train.y, epochs=120, validation_data=(test.X, test.y), verbose=0)

In [None]:
losses = pd.DataFrame({'train_loss': H.history['loss'], 'val_loss': H.history['val_loss']})
losses.plot()

In [None]:
aucs = pd.DataFrame({'train_auc': H.history['auc'], 'val_auc': H.history['val_auc']})
aucs.plot()

In [None]:
preds = model.predict(test.X)

pred_data = pd.DataFrame(index=test.yindex, 
                         data={'ytrue': test.y.flatten(), 
                               'ypred':preds.flatten()}).astype(float)

pred_data['pred'] = pred_data.ypred > 0.9

In [None]:
pred_data.ypred.plot(kind='scatter')

In [None]:
thresh = 0.9
pred_data[pred_data.ypred > thresh]

In [None]:
print(classification_report(pred_data.ytrue, pred_data.ypred > thresh))

In [None]:
# Option to add multilevel - one per column instead of one above all
# Label: if (tomorrow high - today close) > 0

In [None]:
# xdata.loc[:, (slice(None), ['Volume'])] = xdata.loc[:, (slice(None), ['Volume'])].shift()

In [None]:
import plotly.graph_objects as go

def pair_plot(pair, xunit, yunit, xchannels=None, ychannels=None):
    
    
    x = pair.xframe
    y = pair.yframe
    
    x = select(x, units=[xunit], channels=xchannels)
    y = select(y, units=[yunit], channels=ychannels)

    fig = go.Figure()

    for _, channel in enumerate(xchannels):
        c = random.choice(cmap1)
        fig.add_trace(go.Scatter(x=x.index, y=x[channel], name="x_" + channel,
                                 line=dict(width=2, color=c)))
        
    for _, channel in enumerate(ychannels):
        c = random.choice(cmap1)
        fig.add_trace(go.Scatter(x=y.index, y=y[channel], name="y_" + channel,
                                 line=dict(width=2, dash='dot', color=c)))

    fig.update_layout(title='', xaxis_title='Timestamps', yaxis_title='Values')
    fig.show()

In [None]:
pair_plot(panel.pairs[0], 'MSFT', 'MSFT')

In [None]:
xdata['QCOM']['High'].plot()