<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">

# AI-First Finance

**Time Series Features with `tsfresh`**

Dr Yves J Hilpisch | The AI Machine

http://aimachine.io | http://twitter.com/dyjh

## Imports

In [None]:
!git clone https://github.com/tpq-classes/ai_in_finance.git
import sys
sys.path.append('ai_in_finance')


In [None]:
import tpqoa
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
plt.style.use('seaborn-v0_8')

In [None]:
import warnings
warnings.filterwarnings('ignore')

## The Data

### EOD Data

In [None]:
fl = 'http://hilpisch.com/tr_eikon_eod_data.csv'
raw = pd.read_csv(fl, index_col=0, parse_dates=True).dropna()
raw.head()

In [None]:
data_ = pd.DataFrame(raw['EUR='])

### Intraday Data

### Log Returns

In [None]:
data_['r'] = np.log(data_ / data_.shift())
data_.dropna(inplace=True)

In [None]:
data_.info()

## Time Series Features

### Extracting Features

`pip install tsfresh`

In [None]:
# !pip install tsfresh

In [None]:
from tsfresh import extract_features
# from tsfresh import extract_relevant_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.utilities.dataframe_functions import make_forecasting_frame

In [None]:
n = 0

In [None]:
data_.iloc[-n:].plot(subplots=True, figsize=(10, 8));

In [None]:
# make_forecasting_frame?

In [None]:
basis, labels = make_forecasting_frame(
                            data_.iloc[-n:, 1],
                            kind='return',
                            max_timeshift=5,
                            rolling_direction=1)

In [None]:
basis.sort_values('time').head(10)

In [None]:
labels.head()

In [None]:
%%time
data = extract_features(basis,
                        column_id='id',
                        column_sort='time',
                        column_value='value',
                        impute_function=impute,
                        show_warnings=False)

In [None]:
data.info()

In [None]:
index = [c[1] for c in data.index]
index = pd.DatetimeIndex(index)
index

In [None]:
data.index = index

In [None]:
lags = 10
for lag in range(1, lags + 1):
    col = f'd_lag_{lag}'
    data[col] = np.sign(labels).shift(lag)

In [None]:
data.iloc[:7].T.head(10)

### Dropping Constant Features

In [None]:
data.shape

In [None]:
data.apply(pd.Series.nunique).head(7)

In [None]:
data = data.loc[:, data.apply(pd.Series.nunique) != 1]

In [None]:
data.shape

### Adding Last Value

In [None]:
data['feature_last_value'] = labels.shift(1)

In [None]:
cols = data.columns

### Dropping First Line

In [None]:
data = data.iloc[1:, ]
data['r'] = labels[1:]
data['d'] = np.where(labels[1:] > 0, 1, 0)

In [None]:
data.shape

## Normalization

In [None]:
split = int(len(data) * 0.6)
val_size = int(split * 0.15)
train = data.iloc[:split]

In [None]:
data[cols] = (data[cols] - train[cols].mean()) / train[cols].std()

In [None]:
# impute?

In [None]:
data = impute(data)

## Train-Test Split

In [None]:
train = data.iloc[:split]
val = train[-val_size:]
train = train[:-val_size]
test = data.iloc[split:].copy()

## Selection

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif

In [None]:
selector = SelectKBest(f_classif, k=25)

In [None]:
cols_sel = selector.fit(train[cols], train['d']).get_support(indices=True)

In [None]:
cols_sel

In [None]:
csel = data.columns[cols_sel]
csel

## MLP Classifier

In [None]:
c = cols

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier

In [None]:
model = MLPClassifier(hidden_layer_sizes=(128, 128, 128),
                       learning_rate_init=0.02,
                       random_state=100,
                       max_iter=1000,
                       shuffle=False)

In [None]:
%time model.fit(train[c], train['d'], verbose=0)

In [None]:
pred = model(tf.convert_to_tensor(test[c], dtype=tf.float32), training=False).numpy()

In [None]:
accuracy_score(test['d'], pred)

In [None]:
test['p'] = model(tf.convert_to_tensor(test[c], dtype=tf.float32), training=False).numpy()
test['p'] = np.where(test['p'] > 0, 1, -1)

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
sum(test['p'].diff() != 0)

In [None]:
test['p'].value_counts()

In [None]:
test[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

## Keras

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
import tensorflow as tf
from keras.models import Sequential
from keras.regularizers import l1, l2
from keras.layers import Dense, Dropout
from keras.callbacks import ModelCheckpoint

In [None]:
np.random.seed(100)
tf.random.set_seed(100)

In [None]:
c = cols

In [None]:
model = Sequential()

model.add(Dense(128, activation='relu',
                kernel_regularizer=l2(0.001),
                input_shape=(len(c),)))
model.add(Dropout(0.3, seed=100))
model.add(Dense(128, activation='relu',
                kernel_regularizer=l2(0.001)
               ))
model.add(Dropout(0.3, seed=100))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# model.summary()

In [None]:
callbacks = [ModelCheckpoint(filepath='../../../data/.weights.h5',
                             monitor='val_accuracy',
                             verbose=0,
                             save_best_only=True,
                             save_weights_only=True,
                             mode='auto',
                             save_freq=1)]

In [None]:
%%time
model.fit(train[c], train['d'],
          epochs=250,
          batch_size=32,
          verbose=False,
          validation_data=(val[c], val['d']),
          shuffle=False,
          callbacks=callbacks)

In [None]:
res = pd.DataFrame(model.history.history)

In [None]:
res.tail(3)

In [None]:
res[['accuracy', 'val_accuracy']].plot(figsize=(10, 6), style=['--']);

In [None]:
# model.load_weights('../../../data/.weights.h5')

In [None]:
model.evaluate(test[c], test['d'])

In [None]:
model(tf.convert_to_tensor(test[c], dtype=tf.float32), training=False).numpy()[:6]

In [None]:
test['p'] = model(tf.convert_to_tensor(test[c], dtype=tf.float32), training=False).numpy()
test['p'] = np.where(test['p'] > 0.5, 1, -1)

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
sum(test['p'].diff() != 0)

In [None]:
test['p'].value_counts()

In [None]:
test[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">