In [1]:
import sys
sys.path.append('/Users/laurenthericourt/projets/trading/trading')
import warnings
warnings.filterwarnings('ignore')

import psycopg2
import numpy as np
import pandas as pd
from pandas.io.sql import read_sql
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)

from config.load import load_conf
from db.utils import get_uri_db
from utils.utils import AnnualGranularity, get_candles
from indicator.oscillator import Atr, Macd, Rsi, Stochastic, Obv
from indicator.trend import BollingerBands, Adx, Slope

In [2]:
load_conf('../config/configuration.yaml')
dsn = get_uri_db()
schema = 'trading'

# Get data

In [3]:
start_date = '2015-01-01'
end_date = '2021-01-01'

all_candles = get_candles(dsn, schema, start_date, end_date)

In [15]:
candles = all_candles[(all_candles['table'] == 'candle') & (all_candles['symbol'] == 'EUR/USD')]

# Compute indicators

In [8]:
def compute_sign_changement(data, col, span):
    data['sign'] = np.where(data[col] < 0, -1, 1)
    sign_sum = data['sign'].rolling(span).sum()
    change_sign = np.where(np.abs(sign_sum) != span, 1, 0)
    change_sign_pos = np.where((change_sign == 1) & (data[col] > 0), 1, 0)
    change_sign_neg = np.where((change_sign == 1) & (data[col] < 0), 1, 0)
    del data['sign']
    return change_sign_pos, change_sign_neg

## ATR

In [6]:
def atr_features(candles):
    atr = Atr(candles, 'close')
    for avg_type in ['ma', 'ewm', 'wws']:
        candles.loc[:, f'atr_7_{avg_type}'], _ = atr.compute(7, avg_type)
        candles.loc[:, f'atr_14_{avg_type}'], _ = atr.compute(14, avg_type)
        candles.loc[:, f'atr_28_{avg_type}'], _ = atr.compute(28, avg_type)
    return candles

## MACD

In [7]:
def macd_features(candles):
    macd = Macd(candles, 'close')
    _, _, candles.loc[:, 'macd_hist'] = macd.compute()

    for span in [2, 5, 10]:
        candles[f'macd_change_sign_pos_{span}'], candles[f'macd_change_sign_neg_{span}'] = compute_sign_changement(candles, 'macd_hist', span)
    return candles    

## Moyennes mobiles

In [5]:
def moyenne_mobile_features(candles):
    for span in [5, 10, 20, 50, 100, 200]:
        candles[f'ma_{span}'] = candles['close'].rolling(span, min_periods=span).mean()
        
    # tendances haussières
    candles['above_ma_5'] = np.where(candles['close'] > candles['ma_5'], 1, 0)
    candles['above_ma_200'] = np.where(candles['close'] > candles['ma_200'], 1, 0)
    candles['ma_10_above_ma_100'] = np.where(candles['ma_10'] > candles['ma_100'], 1, 0)
    candles['ma_20_above_ma_50'] = np.where(candles['ma_20'] > candles['ma_50'], 1, 0)

    # tendances baissières
    candles['below_ma_5'] = np.where(candles['close'] < candles['ma_5'], 1, 0)
    candles['below_ma_200'] = np.where(candles['close'] < candles['ma_200'], 1, 0)
    candles['ma_10_velow_ma_100'] = np.where(candles['ma_10'] < candles['ma_100'], 1, 0)
    candles['ma_20_below_ma_50'] = np.where(candles['ma_20'] < candles['ma_50'], 1, 0)
    
    # Signaux d'achats / ventes
    candles['close_minus_ma_20'] = candles['close'] - candles['ma_20']
    candles['close_minus_ma_200'] = candles['close'] - candles['ma_200']
    candles['ma_50_ma_100'] = candles['ma_50'] - candles['ma_100']
    candles['ma_50_ma_200'] = candles['ma_50'] - candles['ma_200']
    
    for span in [2, 5, 10]:
        candles[f'close_ma_20_change_sign_pos_{span}'], candles[f'close_ma_20_change_sign_neg_{span}'] = compute_sign_changement(candles, 'close_minus_ma_20', span)
        candles[f'close_ma_200_change_sign_pos_{span}'], candles[f'close_ma_200_change_sign_neg_{span}'] = compute_sign_changement(candles, 'close_minus_ma_200', span)
        candles[f'ma_50_ma_100_change_sign_pos_{span}'], candles[f'ma_50_ma_100_change_sign_neg_{span}'] = compute_sign_changement(candles, 'ma_50_ma_100', span)
        candles[f'ma_50_ma_200_change_sign_pos_{span}'], candles[f'ma_50_ma_200_change_sign_neg_{span}'] = compute_sign_changement(candles, 'ma_50_ma_200', span)
    
    for span in [5, 10, 20, 50, 100, 200]:
        del candles[f'ma_{span}']
    
    return candles

### Analyse moyenne mobile

In [26]:
candles = all_candles[(all_candles['table'] == 'candle1h') & (all_candles['symbol'] == 'EUR/USD')]
candles = moyenne_mobile_features(candles)

In [27]:
candles.head()

Unnamed: 0,table,date,symbol,open,close,low,high,tickqty,above_ma_5,above_ma_200,ma_10_above_ma_100,ma_20_above_ma_50,below_ma_5,below_ma_200,ma_10_velow_ma_100,ma_20_below_ma_50,close_minus_ma_20,close_minus_ma_200,ma_50_ma_100,ma_50_ma_200,close_ma_20_change_sign_pos_2,close_ma_20_change_sign_neg_2,close_ma_200_change_sign_pos_2,close_ma_200_change_sign_neg_2,ma_50_ma_100_change_sign_pos_2,ma_50_ma_100_change_sign_neg_2,ma_50_ma_200_change_sign_pos_2,ma_50_ma_200_change_sign_neg_2,close_ma_20_change_sign_pos_5,close_ma_20_change_sign_neg_5,close_ma_200_change_sign_pos_5,close_ma_200_change_sign_neg_5,ma_50_ma_100_change_sign_pos_5,ma_50_ma_100_change_sign_neg_5,ma_50_ma_200_change_sign_pos_5,ma_50_ma_200_change_sign_neg_5,close_ma_20_change_sign_pos_10,close_ma_20_change_sign_neg_10,close_ma_200_change_sign_pos_10,close_ma_200_change_sign_neg_10,ma_50_ma_100_change_sign_pos_10,ma_50_ma_100_change_sign_neg_10,ma_50_ma_200_change_sign_pos_10,ma_50_ma_200_change_sign_neg_10
5456054,candle1h,2015-01-02 03:00:00,EUR/USD,1.21008,1.20586,1.20576,1.21008,1068,0,0,0,0,0,0,0,0,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5456055,candle1h,2015-01-02 04:00:00,EUR/USD,1.20586,1.2054,1.20501,1.20609,4281,0,0,0,0,0,0,0,0,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5456056,candle1h,2015-01-02 05:00:00,EUR/USD,1.2054,1.20531,1.2047,1.20566,4253,0,0,0,0,0,0,0,0,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5456057,candle1h,2015-01-02 06:00:00,EUR/USD,1.20531,1.20556,1.20496,1.20587,4324,0,0,0,0,0,0,0,0,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5456058,candle1h,2015-01-02 07:00:00,EUR/USD,1.20556,1.20571,1.20356,1.20591,11343,1,0,0,0,0,0,0,0,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [28]:
for w in [3, 5, 10, 20, 40]:
    candles[f'highest_{w}'] = candles['high'][::-1].rolling(window=w).max()[::-1]
    candles[f'lowest_{w}'] = candles['low'][::-1].rolling(window=w).min()[::-1]
    candles[f'diff_highest_{w}'] = candles[f'highest_{w}'] - candles['close']
    candles[f'diff_lowest_{w}'] = candles['close'] - candles[f'lowest_{w}']
    

In [49]:
candles['indicator'] = candles['close_ma_20_change_sign_neg_2']
print(candles[candles['indicator'] == 1].shape)
candles['indicator_roll'] = candles['indicator'].rolling(window=100).max()
candles['indicator'] = np.where((candles['indicator'] == 1) & (candles['indicator_roll'] == 1), 1, 0)
print(candles[candles['indicator'] == 1].shape)

(2245, 66)
(2240, 66)


In [50]:
candles.loc[candles['indicator'] == 1, ['diff_highest_3', 'diff_lowest_3',
                                                            'diff_highest_5', 'diff_lowest_5', 
                                                            'diff_highest_10', 'diff_lowest_10',
                                                            'diff_highest_20', 'diff_lowest_20',
                                                            'diff_highest_40', 'diff_lowest_40']].describe()

Unnamed: 0,diff_highest_3,diff_lowest_3,diff_highest_5,diff_lowest_5,diff_highest_10,diff_lowest_10,diff_highest_20,diff_lowest_20,diff_highest_40,diff_lowest_40
count,2240.0,2240.0,2240.0,2240.0,2240.0,2240.0,2238.0,2238.0,2237.0,2237.0
mean,0.002093,0.001329,0.002469,0.001848,0.003264,0.002733,0.004279,0.003709,0.006029,0.005444
std,0.001728,0.001396,0.001953,0.002084,0.002731,0.003008,0.003666,0.003652,0.005297,0.005055
min,0.00016,0.0,0.00016,0.0,0.00016,0.0,0.00016,0.0,0.00016,0.0
25%,0.001027,0.00046,0.00119,0.0006,0.00147,0.00087,0.00186,0.00125,0.00238,0.0019
50%,0.00162,0.00091,0.0019,0.00123,0.002485,0.001865,0.0032,0.00266,0.00454,0.00415
75%,0.002563,0.001743,0.00315,0.0024,0.004283,0.00356,0.00569,0.005098,0.00817,0.00754
max,0.0177,0.0153,0.0177,0.04213,0.046,0.04412,0.046,0.04412,0.04944,0.04763


In [48]:
candles.loc[candles['indicator'] == 1, ['diff_highest_3', 'diff_lowest_3',
                                                            'diff_highest_5', 'diff_lowest_5', 
                                                            'diff_highest_10', 'diff_lowest_10',
                                                            'diff_highest_20', 'diff_lowest_20',
                                                            'diff_highest_40', 'diff_lowest_40']].describe()

Unnamed: 0,diff_highest_3,diff_lowest_3,diff_highest_5,diff_lowest_5,diff_highest_10,diff_lowest_10,diff_highest_20,diff_lowest_20,diff_highest_40,diff_lowest_40
count,2175.0,2175.0,2175.0,2175.0,2175.0,2175.0,2173.0,2173.0,2172.0,2172.0
mean,0.002082,0.001315,0.002459,0.001829,0.00325,0.002675,0.004256,0.003633,0.006007,0.005335
std,0.001731,0.001365,0.001957,0.002044,0.002723,0.002898,0.003632,0.00351,0.005307,0.004812
min,0.00016,0.0,0.00016,0.0,0.00016,0.0,0.00016,0.0,0.00016,0.0
25%,0.001015,0.00045,0.001185,0.0006,0.00146,0.00086,0.00185,0.00123,0.002358,0.001858
50%,0.00161,0.00091,0.00189,0.00122,0.00248,0.00184,0.0032,0.00263,0.00454,0.00411
75%,0.00255,0.001715,0.00315,0.00238,0.00427,0.00351,0.00569,0.00502,0.008132,0.007443
max,0.0177,0.0153,0.0177,0.04213,0.046,0.04412,0.046,0.04412,0.04944,0.04412


## RSI

In [9]:
def rsi_features(candles):
    rsi = Rsi(candles, 'close')
    rsi_val = rsi.compute()
    candles['rsi'] = rsi_val
    candles['rsi_minus_70'] = candles['rsi'] - 70
    candles['rsi_minus_30'] = candles['rsi'] - 30
    for span in [2, 5]:
        _, candles[f'rsi_back_below_70_{span}'] = compute_sign_changement(candles, 'rsi_minus_70', span)
        candles[f'rsi_back_above_30_{span}'], _ = compute_sign_changement(candles, 'rsi_minus_30', span)
    del candles['rsi_minus_70']
    del candles['rsi_minus_30']
    
    return candles

## Stochastic

In [10]:
def stochastic_features(candles):
    stoch = Stochastic(candles)
    candles['stoch'], candles['stoch_ma'], candles['stoch_hist'] = stoch.compute()
    for span in [2, 5]:
        candles[f'stoch_change_sign_pos_{span}'], candles[f'stoch_change_sign_neg_{span}'] = compute_sign_changement(candles, 'stoch_hist', span)

    candles['stoch_minus_80'] = candles['stoch'] - 80
    candles['stoch_minus_20'] = candles['stoch'] - 20
    for span in [2, 5]:
        _, candles[f'stoch_back_below_80_{span}'] = compute_sign_changement(candles, 'stoch_minus_80', span)
        candles[f'stoch_back_above_20_{span}'], _ = compute_sign_changement(candles, 'stoch_minus_20', span)

    del candles['stoch_minus_80']
    del candles['stoch_minus_20']
    
    return candles

## Obv

In [11]:
def obv_features(candles):
    obv = Obv(candles)
    candles['obv'] = obv.compute()
    candles['obv_pct'] = candles['obv'].pct_change()
    del candles['obv']
    return candles

## Bollinger Bands

In [12]:
def bollinger_bands_features(candles):
    _, bb_up, bb_down = BollingerBands(candles, 'close').compute()
    candles['bb_up_minus_close'] = bb_up - candles['close']
    candles['close_minus_bb_down'] = candles['close'] - bb_down
    candles['above_bb_up'] = np.where(candles['close'] > bb_up, 1, 0) 
    candles['below_bb_down'] = np.where(candles['close'] < bb_down, 1, 0)
    return candles

## ADX

In [13]:
def adx_features(candles):
    candles['dm_plus_norm'], candles['dm_minus_norm'], candles['adx'] = Adx(candles).compute()
    return candles

## Slope

In [207]:
candles['slope_5'] = Slope(candles).compute(5)

KeyboardInterrupt: 

## All features

In [110]:
def compute_features_one_time_level(candles):
    candles = atr_features(candles)
    candles = macd_features(candles)
    candles = moyenne_mobile_features(candles)
    candles = rsi_features(candles)
    candles = stochastic_features(candles)
    candles = obv_features(candles)
    candles = bollinger_bands_features(candles)
    candles = adx_features(candles)
    
    return candles

In [16]:
candles_features = candles.copy()
candles_features = compute_features_one_dimension(candles_features)

In [54]:
def decompose_date(candles):
    candles['15min'] = 15 * (candles['date'].dt.minute / 15).astype(int)
    candles['30min'] = 30 * (candles['date'].dt.minute / 30).astype(int)
    candles['1h'] = (candles['date'].dt.hour).astype(int)
    candles['4h'] = 4 * (candles['date'].dt.hour / 4).astype(int)
    candles['date_only'] = candles['date'].dt.date
    return candles

In [48]:
candles1h = all_candles[(all_candles['table'] == 'candle1h') & (all_candles['symbol'] == 'EUR/USD')]

In [104]:
candles15min = all_candles[(all_candles['table'] == 'candle15m') & (all_candles['symbol'] == 'EUR/USD')]

In [105]:
candles15min = compute_features_one_dimension(candles15min)

In [107]:
candles15min = decompose_date(candles15min)

In [108]:
candles15min.tail()

Unnamed: 0,table,date,symbol,open,close,low,high,tickqty,atr_7_ma,atr_14_ma,atr_28_ma,atr_7_ewm,atr_14_ewm,atr_28_ewm,atr_7_wws,atr_14_wws,atr_28_wws,macd_hist,macd_change_sign_pos_2,macd_change_sign_neg_2,macd_change_sign_pos_5,macd_change_sign_neg_5,macd_change_sign_pos_10,macd_change_sign_neg_10,above_ma_5,above_ma_200,ma_10_above_ma_100,ma_20_above_ma_50,below_ma_5,below_ma_200,ma_10_velow_ma_100,ma_20_below_ma_50,close_minus_ma_20,close_minus_ma_200,ma_50_ma_100,ma_50_ma_200,close_ma_20_change_sign_pos_2,close_ma_20_change_sign_neg_2,close_ma_200_change_sign_pos_2,close_ma_200_change_sign_neg_2,ma_50_ma_100_change_sign_pos_2,ma_50_ma_100_change_sign_neg_2,ma_50_ma_200_change_sign_pos_2,ma_50_ma_200_change_sign_neg_2,close_ma_20_change_sign_pos_5,close_ma_20_change_sign_neg_5,close_ma_200_change_sign_pos_5,close_ma_200_change_sign_neg_5,ma_50_ma_100_change_sign_pos_5,ma_50_ma_100_change_sign_neg_5,ma_50_ma_200_change_sign_pos_5,ma_50_ma_200_change_sign_neg_5,close_ma_20_change_sign_pos_10,close_ma_20_change_sign_neg_10,close_ma_200_change_sign_pos_10,close_ma_200_change_sign_neg_10,ma_50_ma_100_change_sign_pos_10,ma_50_ma_100_change_sign_neg_10,ma_50_ma_200_change_sign_pos_10,ma_50_ma_200_change_sign_neg_10,rsi,rsi_back_below_70_2,rsi_back_above_30_2,rsi_back_below_70_5,rsi_back_above_30_5,stoch,stoch_ma,stoch_hist,stoch_change_sign_pos_2,stoch_change_sign_neg_2,stoch_change_sign_pos_5,stoch_change_sign_neg_5,stoch_back_below_80_2,stoch_back_above_20_2,stoch_back_below_80_5,stoch_back_above_20_5,obv_pct,bb_up_minus_close,close_minus_bb_down,above_bb_up,below_bb_down,dm_plus_norm,dm_minus_norm,adx,15min,30min,1h,4h,date_only
3367187,candle15m,2021-01-05 22:45:00,EUR/USD,1.22977,1.22977,1.22946,1.22986,118,0.000483,0.000484,0.000596,0.000458,0.000502,0.000588,0.000496,0.000583,0.000664,-8.5e-05,0,0,0,0,0,0,0,1,1,1,1,0,0,0,-4.5e-05,0.002779,0.001039,0.00167,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,55.763368,0,0,0,0,46.91358,47.325103,-0.4115226,0,0,0,1,0,0,0,0,0.0,0.000708,0.000618,0,0,23.234245,17.074413,21.857044,45,30,22,20,2021-01-05
3367188,candle15m,2021-01-05 23:00:00,EUR/USD,1.22975,1.22977,1.22963,1.2299,851,0.000451,0.000469,0.000583,0.000411,0.000471,0.000566,0.000463,0.00056,0.00065,-8.2e-05,0,0,0,0,0,0,0,1,1,1,1,0,0,0,-3.2e-05,0.002753,0.001023,0.001675,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,55.763368,0,0,0,0,46.91358,46.91358,-2.621903e-12,0,0,0,1,0,0,0,0,0.0,0.000686,0.000623,0,0,22.944459,16.486734,21.465627,0,0,23,20,2021-01-05
3367189,candle15m,2021-01-05 23:15:00,EUR/USD,1.22976,1.22983,1.22952,1.23006,1507,0.000487,0.000474,0.000574,0.000443,0.000481,0.000564,0.000474,0.000559,0.000646,-7.6e-05,0,0,0,0,0,0,1,1,1,1,0,0,0,0,5.2e-05,0.002786,0.001018,0.001688,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,56.681791,0,0,0,0,50.617284,48.148148,2.469136,1,0,1,0,0,0,0,0,0.002601,0.000559,0.000663,0,0,23.405844,15.348883,21.417341,15,0,23,20,2021-01-05
3367190,candle15m,2021-01-05 23:30:00,EUR/USD,1.22983,1.22986,1.22971,1.23003,908,0.000433,0.000447,0.000539,0.000413,0.000459,0.000547,0.000452,0.000542,0.000634,-6.9e-05,0,0,0,0,0,0,1,1,1,1,0,0,0,0,8.1e-05,0.002789,0.001005,0.001696,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,57.160705,0,0,0,0,61.594203,53.041689,8.552514,0,0,1,0,0,0,0,0,0.001563,0.000531,0.000693,0,0,22.418435,14.701368,21.372504,30,30,23,20,2021-01-05
3367191,candle15m,2021-01-05 23:45:00,EUR/USD,1.22986,1.22993,1.22973,1.23022,950,0.000379,0.000456,0.000533,0.000432,0.000463,0.000543,0.000458,0.000538,0.000629,-5.9e-05,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0.000124,0.00283,0.000989,0.001698,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,58.318656,0,0,0,0,66.666667,59.626051,7.040615,0,0,1,0,0,0,0,0,0.001633,0.000464,0.000713,0,0,23.482336,13.745158,21.714174,45,30,23,20,2021-01-05


In [109]:
decompose_date(candles_features).tail()

Unnamed: 0,table,date,symbol,open,close,low,high,tickqty,atr_7_ma,atr_14_ma,atr_28_ma,atr_7_ewm,atr_14_ewm,atr_28_ewm,atr_7_wws,atr_14_wws,atr_28_wws,macd_hist,macd_change_sign_pos_2,macd_change_sign_neg_2,macd_change_sign_pos_5,macd_change_sign_neg_5,macd_change_sign_pos_10,macd_change_sign_neg_10,above_ma_5,above_ma_200,ma_10_above_ma_100,ma_20_above_ma_50,below_ma_5,below_ma_200,ma_10_velow_ma_100,ma_20_below_ma_50,close_minus_ma_20,close_minus_ma_200,ma_50_ma_100,ma_50_ma_200,close_ma_20_change_sign_pos_2,close_ma_20_change_sign_neg_2,close_ma_200_change_sign_pos_2,close_ma_200_change_sign_neg_2,ma_50_ma_100_change_sign_pos_2,ma_50_ma_100_change_sign_neg_2,ma_50_ma_200_change_sign_pos_2,ma_50_ma_200_change_sign_neg_2,close_ma_20_change_sign_pos_5,close_ma_20_change_sign_neg_5,close_ma_200_change_sign_pos_5,close_ma_200_change_sign_neg_5,ma_50_ma_100_change_sign_pos_5,ma_50_ma_100_change_sign_neg_5,ma_50_ma_200_change_sign_pos_5,ma_50_ma_200_change_sign_neg_5,close_ma_20_change_sign_pos_10,close_ma_20_change_sign_neg_10,close_ma_200_change_sign_pos_10,close_ma_200_change_sign_neg_10,ma_50_ma_100_change_sign_pos_10,ma_50_ma_100_change_sign_neg_10,ma_50_ma_200_change_sign_pos_10,ma_50_ma_200_change_sign_neg_10,rsi,rsi_back_below_70_2,rsi_back_above_30_2,rsi_back_below_70_5,rsi_back_above_30_5,stoch,stoch_ma,stoch_hist,stoch_change_sign_pos_2,stoch_change_sign_neg_2,stoch_change_sign_pos_5,stoch_change_sign_neg_5,stoch_back_below_80_2,stoch_back_above_20_2,stoch_back_below_80_5,stoch_back_above_20_5,obv_pct,bb_up_minus_close,close_minus_bb_down,above_bb_up,below_bb_down,dm_plus_norm,dm_minus_norm,adx,15min,30min,1h,4h,date_only
1130961,candle,2021-01-05 23:40:00,EUR/USD,1.22987,1.22986,1.22979,1.22998,273,0.000271,0.000246,0.000248,0.00024,0.000248,0.000255,0.000247,0.000254,0.000287,1e-05,0,0,1,0,1,0,0,1,1,1,1,0,0,0,5.4e-05,0.001449,0.000275,0.001361,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,53.10996,0,0,0,0,66.666667,67.777778,-1.111111,0,1,0,1,0,0,1,0,-0.001088,0.000155,0.000263,0,0,19.064722,16.462522,12.257861,30,30,23,20,2021-01-05
1130962,candle,2021-01-05 23:45:00,EUR/USD,1.22986,1.22983,1.22973,1.22994,321,0.00028,0.000251,0.000248,0.000233,0.000243,0.000252,0.000242,0.000251,0.000284,7e-06,0,0,0,0,1,0,0,1,1,1,1,0,0,0,2.3e-05,0.001402,0.000242,0.001343,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,51.980822,0,0,0,0,61.666667,66.111111,-4.444444,0,0,0,1,0,0,0,0,-0.001281,0.000187,0.000232,0,0,17.925411,17.186152,11.532689,45,30,23,20,2021-01-05
1130963,candle,2021-01-05 23:50:00,EUR/USD,1.22983,1.22997,1.22983,1.23022,380,0.000274,0.000251,0.000253,0.000272,0.000262,0.000261,0.000263,0.000261,0.000288,1.4e-05,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0.000164,0.001525,0.000205,0.001323,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,56.616248,0,0,0,0,67.105263,65.146199,1.959064,1,0,1,0,0,0,0,0,0.001519,4.2e-05,0.000369,0,0,23.676533,15.351353,12.232595,45,30,23,20,2021-01-05
1130964,candle,2021-01-05 23:55:00,EUR/USD,1.22996,1.22993,1.22991,1.23006,249,0.000224,0.000258,0.000253,0.000242,0.000247,0.000254,0.000247,0.000253,0.000283,1.5e-05,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0.000126,0.00147,0.000171,0.001302,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,54.983118,0,0,0,0,61.842105,63.538012,-1.695906,0,1,0,1,0,0,0,0,-0.000994,7.4e-05,0.000325,0,0,22.673888,14.701259,12.882507,45,30,23,20,2021-01-05
1130965,candle,2021-01-06 00:00:00,EUR/USD,1.22994,1.2299,1.22977,1.23039,1165,0.000277,0.000298,0.00027,0.000336,0.000297,0.000279,0.0003,0.000279,0.000295,1.2e-05,0,0,0,0,1,0,1,1,1,1,0,0,0,0,9.5e-05,0.001424,0.00014,0.001284,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,53.731266,0,0,0,0,47.311828,58.753065,-11.441238,0,0,0,1,0,0,0,0,-0.004653,0.000107,0.000296,0,0,27.51955,12.369578,14.6752,0,0,0,0,2021-01-06


In [142]:
date_time_agg = {
    #'15m': ['date_only', '1h', '15min'],
    #'30m': ['date_only', '1h', '30min'],
    '1h': ['date_only', '1h'],
    '4h': ['date_only', '4h'],
    '1d': ['date_only']
}

date_time_level = ['15min', '30min', '1h', '4h', 'date_only']
date_time_level = ['1h', '4h', 'date_only']

def merge_features_one_time_level(candles5min, candles2, agg='1h'):
    candles2c = candles2.copy()
    candles5min = decompose_date(candles5min)
    # If we do not shifr data there will be data leak
    candles2c = pd.concat([candles2c[['date']], candles2c.shift(1)[[x for x in candles2c if x != 'date']]], axis=1)
    candles2c = decompose_date(candles2c)
    candles2c = candles2c[[x for x in candles2c.columns if x not in ['table', 'date', 'symbol', 'open', 'close', 'low', 'high', 'tickqty']]]
    candles2c.columns = [x + '_' + agg if x not in date_time_level else x for x in candles2c.columns]
    merge_candles = pd.merge(candles5min, candles2c, on=date_time_agg[agg], how='left')
    
    for col in date_time_level:
        del candles5min[col]
    
    for col in date_time_agg[agg]:
        del merge_candles[col]
    
    for col in [x for x in date_time_level if x not in date_time_agg[agg]]:
        del merge_candles[col + '_x']
        del merge_candles[col + '_y']
        
    return merge_candles

def compute_features_all_time_levels(candles, symbol='EUR/USD'):
    candles5m = candles[(candles['table'] == 'candle30m') & (candles['symbol'] == symbol)]
    candles5m = compute_features_one_time_level(candles5m)
    for agg in date_time_agg.keys():
        candles_one_time_level = candles[(candles['table'] == 'candle' + agg) & (candles['symbol'] == symbol)]
        candles_one_time_level = compute_features_one_time_level(candles_one_time_level)
        if candles_one_time_level.empty:
            print(f'No data for aggregation {agg}')
        candles5m = merge_features_one_time_level(candles5m, candles_one_time_level, agg)
    return candles5m

In [132]:
features = compute_features_all_time_levels(all_candles)
features = features.dropna()

In [147]:
features30m = compute_features_all_time_levels(all_candles)

In [148]:
features30m = features30m.dropna()

In [149]:
features30m.shape

(130357, 320)

In [150]:
features30m.head()

Unnamed: 0,table,date,symbol,open,close,low,high,tickqty,atr_7_ma,atr_14_ma,atr_28_ma,atr_7_ewm,atr_14_ewm,atr_28_ewm,atr_7_wws,atr_14_wws,atr_28_wws,macd_hist,macd_change_sign_pos_2,macd_change_sign_neg_2,macd_change_sign_pos_5,macd_change_sign_neg_5,macd_change_sign_pos_10,macd_change_sign_neg_10,above_ma_5,above_ma_200,ma_10_above_ma_100,ma_20_above_ma_50,below_ma_5,below_ma_200,ma_10_velow_ma_100,ma_20_below_ma_50,close_minus_ma_20,close_minus_ma_200,ma_50_ma_100,ma_50_ma_200,close_ma_20_change_sign_pos_2,close_ma_20_change_sign_neg_2,close_ma_200_change_sign_pos_2,close_ma_200_change_sign_neg_2,ma_50_ma_100_change_sign_pos_2,ma_50_ma_100_change_sign_neg_2,ma_50_ma_200_change_sign_pos_2,ma_50_ma_200_change_sign_neg_2,close_ma_20_change_sign_pos_5,close_ma_20_change_sign_neg_5,close_ma_200_change_sign_pos_5,close_ma_200_change_sign_neg_5,ma_50_ma_100_change_sign_pos_5,ma_50_ma_100_change_sign_neg_5,...,close_ma_20_change_sign_pos_2_1d,close_ma_20_change_sign_neg_2_1d,close_ma_200_change_sign_pos_2_1d,close_ma_200_change_sign_neg_2_1d,ma_50_ma_100_change_sign_pos_2_1d,ma_50_ma_100_change_sign_neg_2_1d,ma_50_ma_200_change_sign_pos_2_1d,ma_50_ma_200_change_sign_neg_2_1d,close_ma_20_change_sign_pos_5_1d,close_ma_20_change_sign_neg_5_1d,close_ma_200_change_sign_pos_5_1d,close_ma_200_change_sign_neg_5_1d,ma_50_ma_100_change_sign_pos_5_1d,ma_50_ma_100_change_sign_neg_5_1d,ma_50_ma_200_change_sign_pos_5_1d,ma_50_ma_200_change_sign_neg_5_1d,close_ma_20_change_sign_pos_10_1d,close_ma_20_change_sign_neg_10_1d,close_ma_200_change_sign_pos_10_1d,close_ma_200_change_sign_neg_10_1d,ma_50_ma_100_change_sign_pos_10_1d,ma_50_ma_100_change_sign_neg_10_1d,ma_50_ma_200_change_sign_pos_10_1d,ma_50_ma_200_change_sign_neg_10_1d,rsi_1d,rsi_back_below_70_2_1d,rsi_back_above_30_2_1d,rsi_back_below_70_5_1d,rsi_back_above_30_5_1d,stoch_1d,stoch_ma_1d,stoch_hist_1d,stoch_change_sign_pos_2_1d,stoch_change_sign_neg_2_1d,stoch_change_sign_pos_5_1d,stoch_change_sign_neg_5_1d,stoch_back_below_80_2_1d,stoch_back_above_20_2_1d,stoch_back_below_80_5_1d,stoch_back_above_20_5_1d,obv_pct_1d,bb_up_minus_close_1d,close_minus_bb_down_1d,above_bb_up_1d,below_bb_down_1d,dm_plus_norm_1d,dm_minus_norm_1d,adx_1d,15min_1d,30min_1d
7939,candle30m,2010-08-24 00:00:00,EUR/USD,1.2632,1.26453,1.26283,1.26456,1305,0.001216,0.001101,0.001656,0.001311,0.00131,0.001446,0.0013,0.001439,0.001555,-0.000171,0,0,0,0,0,0,1,0,0,0,0,1,1,1,-0.001429,-0.013764,-0.003025,-0.009415,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,35.321623,0.0,0.0,0.0,0.0,1.096812,4.289806,-3.192994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.174807,0.078048,0.011717,0.0,0.0,17.185442,28.078177,25.701018,0,0
7940,candle30m,2010-08-24 00:30:00,EUR/USD,1.26453,1.26393,1.26334,1.26453,840,0.001321,0.001109,0.001644,0.001281,0.001294,0.001428,0.001284,0.001421,0.001542,-0.000121,0,0,0,0,0,0,1,0,0,0,0,1,1,1,-0.00194,-0.014244,-0.002971,-0.009416,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,35.321623,0.0,0.0,0.0,0.0,1.096812,4.289806,-3.192994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.174807,0.078048,0.011717,0.0,0.0,17.185442,28.078177,25.701018,0,0
7941,candle30m,2010-08-24 01:00:00,EUR/USD,1.26393,1.26398,1.26366,1.26451,1323,0.001287,0.001081,0.001608,0.001173,0.001234,0.001388,0.001222,0.00138,0.001517,-6.8e-05,0,0,0,0,0,0,1,0,0,0,0,1,1,1,-0.001762,-0.014071,-0.002933,-0.009422,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,35.321623,0.0,0.0,0.0,0.0,1.096812,4.289806,-3.192994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.174807,0.078048,0.011717,0.0,0.0,17.185442,28.078177,25.701018,0,0
7942,candle30m,2010-08-24 01:30:00,EUR/USD,1.26398,1.26338,1.26211,1.26398,1875,0.00141,0.001134,0.001571,0.001347,0.001319,0.001422,0.001315,0.001415,0.00153,-5.7e-05,0,0,0,0,0,0,0,0,0,0,1,1,1,1,-0.002216,-0.014548,-0.0029,-0.00944,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,35.321623,0.0,0.0,0.0,0.0,1.096812,4.289806,-3.192994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.174807,0.078048,0.011717,0.0,0.0,17.185442,28.078177,25.701018,0,0
7943,candle30m,2010-08-24 02:00:00,EUR/USD,1.26338,1.26338,1.26252,1.26342,1082,0.00128,0.001117,0.001503,0.001235,0.001263,0.001386,0.001256,0.001378,0.001507,-3.4e-05,0,0,0,0,0,0,0,0,0,0,1,1,1,1,-0.002011,-0.014425,-0.002862,-0.009453,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,35.321623,0.0,0.0,0.0,0.0,1.096812,4.289806,-3.192994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.174807,0.078048,0.011717,0.0,0.0,17.185442,28.078177,25.701018,0,0


In [151]:
import pickle
pickle.dump(features30m, open('data/features30m.pkl', 'wb'))

# ML model
## Compute y

In [248]:
stop_gain = 50 * 1e-4
stop_loss = 10 * 1e-4
span = 20
#candles[f'min_{period}'] = np.min(candles['open'].rolling(span, min_periods=span).min(),)

In [249]:
candles_desc = candles[['date', 'open', 'close', 'high', 'low']].sort_values('date', ascending=False)

In [250]:
candles_desc[f'low_{span}'] = candles_desc['low'].rolling(span, min_periods=span).min()
candles_desc[f'high_{span}'] = candles_desc['high'].rolling(span, min_periods=span).max()

In [251]:
mask_buy = (candles_desc[f'high_{span}'] > candles_desc['close'] + stop_gain) & (candles_desc[f'low_{span}'] > candles_desc['close'] - stop_loss)
candles_desc[mask_buy].shape

(12234, 7)

In [252]:
mask_sell = (candles_desc[f'low_{span}'] < candles_desc['close'] - stop_gain) & (candles_desc[f'high_{span}'] < candles_desc['close'] + stop_loss)
candles_desc[mask_sell].shape

(11690, 7)

In [253]:
classes = ['do_nothing', 'buy', 'sell']
candles_desc['y'] = classes.index('do_nothing')
candles_desc.loc[mask_buy, 'y'] = classes.index('buy')
candles_desc.loc[mask_sell, 'y'] = classes.index('sell')

In [254]:
candles['y'] = candles_desc[['date', 'y']].sort_values('date')['y']

## Compute y next up / down

In [485]:
classes = ['sell', 'buy']
close_shift = pd.concat([candles[['date', 'close']].shift(1), candles[['date', 'close']]], axis=1)
close_shift.columns = ['date', 'close', 'next_date', 'next_close']
close_shift['y'] = np.where(close_shift['close'] < close_shift['next_close'], 1, 0)
#del candles['y']
candles = pd.merge(candles, close_shift[['date', 'y']], on='date')
#candles['y'] = [np.NaN] + y[1:-1].tolist() + [np.NaN]

## Split train / test

In [514]:
candles.replace([np.inf, -np.inf], np.nan, inplace=True)
candles.dropna(inplace=True)

In [515]:
train = candles[(candles['date'] >= '2010-01-01') & (candles['date'] < '2016-01-01')]
test = candles[(candles['date'] >= '2016-01-01') & (candles['date'] < '2018-01-01')]

In [516]:
cols = [x for x in candles if x not in ['table', 'date', 'symbol', 'open', 'close', 'low', 'high', 'tickqty', 'y']]
indicators_to_remove = ['obv_pct']
indicators_to_remove = []
cols = [x for x in cols if x not in indicators_to_remove]
y_train, y_test = train['y'], test['y']
X_train, X_test = train[cols], test[cols]

## Train model

In [517]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler

In [524]:
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)

RandomForestClassifier()

In [518]:
mms = MinMaxScaler()
mms.fit(X_train)
X_train_n = mms.transform(X_train)
X_test_n = mms.transform(X_test)

In [519]:
lr = LogisticRegression(max_iter=1000, class_weight='balanced')
lr.fit(X_train_n, y_train)

LogisticRegression(class_weight='balanced', max_iter=1000)

## Model Performances

In [520]:
from sklearn.metrics import precision_recall_fscore_support

In [526]:
model = rf
X_train_n = X_train
X_test_n = X_test

In [527]:
y_pred = model.predict(X_train_n)
res = precision_recall_fscore_support(y_train, y_pred, average=None)
pd.DataFrame(res, columns=classes, index=['precision', 'recall', 'f1-score', 'avg'])

Unnamed: 0,sell,buy
precision,1.0,1.0
recall,1.0,1.0
f1-score,1.0,1.0
avg,229836.0,219547.0


In [528]:
y_pred = model.predict(X_test_n)
res = precision_recall_fscore_support(y_test, y_pred, average=None)
pd.DataFrame(res, columns=classes, index=['precision', 'recall', 'f1-score', 'avg'])

Unnamed: 0,sell,buy
precision,0.525031,0.504304
recall,0.596347,0.43221
f1-score,0.558421,0.465482
avg,77029.0,73189.0


In [383]:
feat_importance = pd.DataFrame(rf.feature_importances_, index = X_train.columns, columns=['value'])

In [384]:
feat_importance.sort_values('value', ascending=False)

Unnamed: 0,value
obv_pct,0.48018
stoch_hist,0.099432
stoch_change_sign_pos_5,0.064769
stoch_change_sign_neg_5,0.053147
stoch_change_sign_pos_2,0.029953
below_ma_5,0.028316
above_ma_5,0.025494
stoch_change_sign_neg_2,0.022236
stoch,0.015455
bb_up_minus_close,0.011772


In [299]:
feat_importance.sort_values('value', ascending=False)

Unnamed: 0,value
obv_pct,0.514871
stoch_hist,0.085798
stoch_change_sign_neg_10,0.062894
stoch_change_sign_pos_5,0.043913
stoch_change_sign_neg_2,0.036039
above_ma_5,0.03426
stoch_change_sign_neg_5,0.022059
stoch_change_sign_pos_2,0.020818
below_ma_5,0.018139
stoch,0.016927
