# Running the strategy Backtests

In [None]:
! python /home/jovyan/work/q_pack/q_run/run_BT.py \
--strat_name=simple_strategy_2 \
--mode=backtest \
--tickers=EUR_USD,GBP_USD,USD_JPY,NZD_USD

In [1]:
! python /home/jovyan/work/q_pack/q_run/run_BT.py \
--strat_name=simple_strategy_2 \
--mode=backtest \
--tickers=EUR_USD,GBP_USD \
--ml_log=True

Strategy run finished with Run ID: 1
ML Log Saved in Minio Bucket: model-support-files as 1_ml_log.csv
Profit ... or Loss: 32.62


In [2]:
import os
import warnings
import sys
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import mlflow
import mlflow.sklearn
import mlflow.pyfunc
import io
import boto3

# Preprocessing the Log file
The Log file generated from the Backtest is in a simple form than can be preprocessed into a format that can be fed into our ML Model

In [3]:
s3 = boto3.client('s3',endpoint_url="http://minio-image:9000",aws_access_key_id="minio-image",aws_secret_access_key="minio-image-pass")
data = pd.read_csv(s3.get_object(Bucket="model-support-files", Key='1_ml_log.csv')['Body'],sep=',')
data.head(10)

Unnamed: 0,security,datetime,close,RSI,STOCHASTIC,ATR
0,EUR_USD,2010-12-30 22:00:00,1.32855,,,
1,EUR_USD,2010-12-31 22:00:00,1.33815,,,
2,EUR_USD,2011-01-01 22:00:00,1.3328,,,
3,EUR_USD,2011-01-02 22:00:00,1.32512,,,
4,EUR_USD,2011-01-03 22:00:00,1.32919,,,
5,EUR_USD,2011-01-04 22:00:00,1.31258,,,0.007796
6,EUR_USD,2011-01-05 22:00:00,1.29978,,,0.007135
7,EUR_USD,2011-01-06 22:00:00,1.29057,,,0.006176
8,EUR_USD,2011-01-08 22:00:00,1.28607,,,0.005193
9,EUR_USD,2011-01-09 22:00:00,1.28742,,,0.006228


In [4]:
from ml_pack.preprocessing.ml_preprocessing import ml_preprocessing
preprocessed_file=ml_preprocessing(input_file="1_ml_log.csv",fwd_returns=5)
preprocessed_file

'processed_1_ml_log.csv'

In [5]:
s3 = boto3.client('s3',endpoint_url="http://minio-image:9000",aws_access_key_id="minio-image",aws_secret_access_key="minio-image-pass")
data = pd.read_csv(s3.get_object(Bucket="model-support-files", Key=preprocessed_file)['Body'],sep=',')

Now the preprocesssed file is in a format that we can feed into the ML model
The target variable is 5 day forward returns and the features are the technical indicator

In [6]:
data.head(10)

Unnamed: 0,RSI,STOCHASTIC,fwd_returns
0,56.991619,78.882076,-0.005146
1,59.448606,77.279931,0.003864
2,67.632318,67.385341,0.01912
3,60.945457,79.322084,0.013553
4,56.546196,72.484186,-8.8e-05
5,67.38774,72.292758,0.016796
6,54.832174,60.95709,-0.002328
7,64.459591,63.829216,0.01341
8,55.613371,49.955854,0.000744
9,65.796292,58.935306,0.018234


# Train a ML-model using MLflow

### Point to the MLflow tracking endpoint

In [7]:
mlflow.tracking.set_tracking_uri('http://mlflow-image:5500')

### Create a MLflow experiment

In [8]:
mlflow.create_experiment(name='simple_trading_models', artifact_location='s3://mlflow-models')

'1'

In [9]:
mlflow.set_experiment('simple_trading_models')

### Fetching the preprocessed data from Minio

In [10]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

warnings.filterwarnings("ignore")
np.random.seed(40)
# Split the data into training and test sets. (0.75, 0.25) split.
train, test = train_test_split(data)

# The predicted column is "fwd_returns"
train_x = train.drop(["fwd_returns"], axis=1)
test_x = test.drop(["fwd_returns"], axis=1)
train_y = train[["fwd_returns"]]
test_y = test[["fwd_returns"]]

n_estimators=300
max_depth=10

lr = RandomForestRegressor(n_estimators=n_estimators,max_depth=max_depth)
lr.fit(train_x, train_y)
predicted_qualities = lr.predict(test_x)
(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

print("RandomForest Model (n_estimators=%f, max_depth=%f):" % (n_estimators, max_depth))
print("  RMSE: %s" % rmse)
print("  MAE: %s" % mae)
print("  R2: %s" % r2)

mlflow.log_param("n_estimators", n_estimators)
mlflow.log_param("max_depth", max_depth)
mlflow.log_metric("rmse", rmse)
mlflow.log_metric("r2", r2)
mlflow.log_metric("mae", mae)
mlflow.sklearn.log_model(lr, "model")

RandomForest Model (n_estimators=300.000000, max_depth=10.000000):
  RMSE: 0.008593323344445921
  MAE: 0.006673740830905886
  R2: 0.3790275404533735


## Model Serving

In [11]:
import mlflow.pyfunc

In [12]:
model_predict=mlflow.pyfunc.load_model(model_uri="s3://mlflow-models/adebcab9b2d949289e24bd0afb4b3846/artifacts/model")

In [13]:
model_predict.predict([[80,20]])

array([0.00771085])

## Running the strategy with the model
Look at the simple_strategy_2.py Backtrader strategy file to see how the ML model is being served 

In [15]:
!python /home/jovyan/work/q_pack/q_run/run_BT.py \
--strat_name=simple_strategy_2 \
--strat_param=ml_serving=True,model_uri=adebcab9b2d949289e24bd0afb4b3846 \
--ml_log=False \
--mode=backtest \
--tickers=EUR_USD,GBP_USD

s3://mlflow-models/adebcab9b2d949289e24bd0afb4b3846/artifacts/model
Strategy run finished with Run ID: 3
Profit ... or Loss: -1621.91


## Packaging the model using MLflow (BONUS)
For reproducibility and for sharing

In [55]:
mlflow.projects.run("/home/jovyan/work/BT/mlflow_project",parameters={'n_estimators':200,'max_depth':10})


## Serving the model as a rest API using MLflow (BONUS)
Serrve the model in the mlflow tracking container using

docker exec ekholabs-mlflow /bin/sh -c "mlflow models serve -m /ekholabs-mlflow/mlruns/0/a85ab97a393045afaea2b550a79686e8/artifacts/model --host=0.0.0.0 -p 2349"

Then you can call in the program using curl

In [None]:
! curl -X POST -H "Content-Type:application/json; format=pandas-split" --data '{"columns":["alcohol", "chlorides", "citric acid", "density", "fixed acidity", "free sulfur dioxide", "pH", "residual sugar", "sulphates", "total sulfur dioxide", "volatile acidity"],"data":[[12.8, 2, 10, 0.98, 1, 45, 2, 1.2, 44, 4, 0.66]]}' http://mlflow-image:2349/invocations