In [19]:
import sys
sys.path.append('/Users/laurenthericourt/projets/trading/trading')

import psycopg2
import numpy as np
import pandas as pd
from pandas.io.sql import read_sql
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)

from config.load import load_conf
from db.utils import get_uri_db
from utils.utils import AnnualGranularity, get_candles
from indicator.oscillator import Atr, Macd

In [2]:
load_conf('../config/configuration.yaml')
dsn = get_uri_db()
schema = 'trading'

# Get data

In [110]:
start_date = '2010-01-01'
end_date = '20218-01-01'

all_candles = get_candles(dsn, schema, start_date, end_date)

In [112]:
candles = all_candles[(all_candles['table'] == 'candle') & (all_candles['symbol'] == 'EUR/USD')]

# Compute indicators
## ATR

In [113]:
atr = Atr(candles, 'close')
for avg_type in ['ma', 'ewm', 'wws']:
    candles.loc[:, f'atr_7_{avg_type}'], _ = atr.compute(7, avg_type)
    candles.loc[:, f'atr_14_{avg_type}'], _ = atr.compute(14, avg_type)
    candles.loc[:, f'atr_28_{avg_type}'], _ = atr.compute(28, avg_type)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


## MACD

In [114]:
macd = Macd(candles, 'close')
_, _, candles.loc[:, 'macd_hist'] = macd.compute()

In [115]:
def compute_sign_changement(data, col, span):
    data['sign'] = np.where(data[col] < 0, -1, 1)
    sign_sum = data['sign'].rolling(span).sum()
    change_sign = np.where(np.abs(sign_sum) != span, 1, 0)
    change_sign_pos = np.where((change_sign == 1) & (data[col] > 0), 1, 0)
    change_sign_neg = np.where((change_sign == 1) & (data[col] < 0), 1, 0)
    del data['sign']
    return change_sign_pos, change_sign_neg

In [116]:
for span in [2, 5, 10]:
    candles[f'macd_change_sign_pos_{span}'], candles[f'macd_change_sign_neg_{span}'] = compute_sign_changement(candles, 'macd_hist', span)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['sign'] = np.where(data[col] < 0, -1, 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candles[f'macd_change_sign_pos_{span}'], candles[f'macd_change_sign_neg_{span}'] = compute_sign_changement(candles, 'macd_hist', span)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['sign'] = np.where(

## Moyennes mobiles

In [117]:
for span in [5, 10, 20, 50, 100, 200]:
    candles[f'ma_{span}'] = candles['close'].rolling(span, min_periods=span).mean()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candles[f'ma_{span}'] = candles['close'].rolling(span, min_periods=span).mean()


In [118]:
# tendances haussières
candles['above_ma_5'] = np.where(candles['close'] > candles['ma_5'], 1, 0)
candles['above_ma_200'] = np.where(candles['close'] > candles['ma_200'], 1, 0)
candles['ma_10_above_ma_100'] = np.where(candles['ma_10'] > candles['ma_100'], 1, 0)
candles['ma_20_above_ma_50'] = np.where(candles['ma_20'] > candles['ma_50'], 1, 0)

# tendances baissières
candles['below_ma_5'] = np.where(candles['close'] < candles['ma_5'], 1, 0)
candles['below_ma_200'] = np.where(candles['close'] < candles['ma_200'], 1, 0)
candles['ma_10_velow_ma_100'] = np.where(candles['ma_10'] < candles['ma_100'], 1, 0)
candles['ma_20_below_ma_50'] = np.where(candles['ma_20'] < candles['ma_50'], 1, 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candles['above_ma_5'] = np.where(candles['close'] > candles['ma_5'], 1, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candles['above_ma_200'] = np.where(candles['close'] > candles['ma_200'], 1, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candles['ma_10_above_ma_100'] = np.where(candles['m

In [119]:
# Signaux d'achats / ventes
candles['close_minus_ma_20'] = candles['close'] - candles['ma_20']
candles['close_minus_ma_200'] = candles['close'] - candles['ma_200']
candles['ma_50_ma_100'] = candles['ma_50'] - candles['ma_100']
candles['ma_50_ma_200'] = candles['ma_50'] - candles['ma_200']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candles['close_minus_ma_20'] = candles['close'] - candles['ma_20']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candles['close_minus_ma_200'] = candles['close'] - candles['ma_200']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candles['ma_50_ma_100'] = candles['ma_50'] - candles['ma_100']
A valu

In [120]:
for span in [2, 5, 10]:
    candles[f'close_ma_20_change_sign_pos_{span}'], candles[f'close_ma_20_change_sign_neg_{span}'] = compute_sign_changement(candles, 'close_minus_ma_20', span)
    candles[f'close_ma_200_change_sign_pos_{span}'], candles[f'close_ma_200_change_sign_neg_{span}'] = compute_sign_changement(candles, 'close_minus_ma_200', span)
    candles[f'ma_50_ma_100_change_sign_pos_{span}'], candles[f'ma_50_ma_100_change_sign_neg_{span}'] = compute_sign_changement(candles, 'ma_50_ma_100', span)
    candles[f'ma_50_ma_200_change_sign_pos_{span}'], candles[f'ma_50_ma_200_change_sign_neg_{span}'] = compute_sign_changement(candles, 'ma_50_ma_200', span)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['sign'] = np.where(data[col] < 0, -1, 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candles[f'close_ma_20_change_sign_pos_{span}'], candles[f'close_ma_20_change_sign_neg_{span}'] = compute_sign_changement(candles, 'close_minus_ma_20', span)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  da

In [129]:
for span in [5, 10, 20, 50, 100, 200]:
    del candles[f'ma_{span}']

# ML model
## Compute y

In [121]:
stop_gain = 50 * 1e-4
stop_loss = 10 * 1e-4
span = 100
#candles[f'min_{period}'] = np.min(candles['open'].rolling(span, min_periods=span).min(),)

In [122]:
candles_desc = candles[['date', 'open', 'close', 'high', 'low']].sort_values('date', ascending=False)

In [123]:
candles_desc[f'low_{span}'] = candles_desc['low'].rolling(span, min_periods=span).min()
candles_desc[f'high_{span}'] = candles_desc['high'].rolling(span, min_periods=span).max()

In [124]:
mask_buy = ((candles_desc['close'] + stop_gain) < candles_desc[f'high_{span}']) & ((candles_desc['close'] - stop_loss) > candles_desc[f'low_{span}'])
candles_desc[mask_buy].shape

(74611, 7)

In [125]:
mask_sell = ((candles_desc['close'] - stop_gain) > candles_desc[f'low_{span}']) & ((candles_desc['close'] + stop_loss) < candles_desc[f'high_{span}'])
candles_desc[mask_sell].shape

(74912, 7)

In [144]:
classes = ['do_nothing', 'buy', 'sell']
candles_desc['y'] = classes.index('do_nothing')
candles_desc.loc[mask_buy, 'y'] = classes.index('buy')
candles_desc.loc[mask_sell, 'y'] = classes.index('sell')

In [127]:
candles['y'] = candles_desc[['date', 'y']].sort_values('date')['y']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candles['y'] = candles_desc[['date', 'y']].sort_values('date')['y']


## Split train / test

In [131]:
candles.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  candles.dropna(inplace=True)


In [134]:
train = candles[(candles['date'] >= '2010-01-01') & (candles['date'] < '2017-01-01')]
test = candles[(candles['date'] >= '2017-01-01') & (candles['date'] < '2018-01-01')]

In [135]:
cols = [x for x in candles if x not in ['table', 'date', 'symbol', 'open', 'close', 'low', 'high', 'tickqty', 'y']]
y_train, y_test = train['y'], test['y']
X_train, X_test = train[cols], test[cols]

## Train model

In [151]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler

In [137]:
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)

RandomForestClassifier()

In [158]:
mms = MinMaxScaler()
mms.fit(X_train)
X_train_n = mms.transform(X_train)
X_test_n = mms.transform(X_test)

In [157]:
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_n, y_train)

LogisticRegression(max_iter=1000)

## Model Performances

In [149]:
from sklearn.metrics import precision_recall_fscore_support

In [159]:
model = lr

In [160]:
y_pred = model.predict(X_train_n)
res = precision_recall_fscore_support(y_train, y_pred, average=None)
pd.DataFrame(res, columns=classes, index=['precision', 'recall', 'f1-score', 'avg'])

Unnamed: 0,do_nothing,buy,sell
precision,0.787632,0.31519,0.37604
recall,0.989707,0.009391,0.061718
f1-score,0.877182,0.018239,0.106034
avg,408142.0,53027.0,63725.0


In [161]:
y_pred = model.predict(X_test_n)
res = precision_recall_fscore_support(y_test, y_pred, average=None)
pd.DataFrame(res, columns=classes, index=['precision', 'recall', 'f1-score', 'avg'])

Unnamed: 0,do_nothing,buy,sell
precision,0.930292,0.028571,0.161702
recall,0.997393,0.000361,0.015133
f1-score,0.962675,0.000714,0.027677
avg,69431.0,2767.0,2511.0


Unnamed: 0,do_nothing,buy,sell
precision,0.999998,1.0,1.0
recall,1.0,0.999981,1.0
f1-score,0.999999,0.999991,1.0
avg,408142.0,53027.0,63725.0
