In [2]:
import pandas as pd
import datetime as dt
import numpy as np

import ta
#import talib

from oandapyV20 import API
from oandapyV20.contrib.factories import InstrumentsCandlesFactory
import oandapyV20.endpoints.forexlabs as labs
# https://media.readthedocs.org/pdf/oanda-api-v20/latest/oanda-api-v20.pdf
# https://financetrain.com/best-python-librariespackages-finance-financial-data-scientists/
# https://github.com/mrjbq7/ta-lib

from sklearn import tree
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import classification_report, confusion_matrix

In [3]:
client = API(access_token='7f736aabc877f3ea75bc844c79814d7c-998e41725285ea6d54b836e8b93fe4f6')

In [4]:
def hist(api, instrument, start_days, end_days, granularity):

    start_date = (dt.datetime.now()-dt.timedelta(days=start_days)).strftime('%Y-%m-%dT%H:%M:%SZ')
    end_date = (dt.datetime.now()-dt.timedelta(days=end_days, hours=2,minutes=4)).strftime('%Y-%m-%dT%H:%M:%SZ')

    params ={
                "from": start_date,
                "to": end_date,
                "granularity":granularity,
            }

    df_list = []
    for r in InstrumentsCandlesFactory(instrument=instrument,params=params):
        api.request(r)
        df = pd.DataFrame(r.response['candles'])
        if(df.empty==False):
            time = df['time']
            volume = pd.DataFrame(df['volume'].apply(pd.Series))
            df = pd.DataFrame(df['mid'].apply(pd.Series))
            df = pd.concat([df,time,volume], axis=1)
            df['time'] = pd.to_datetime(df['time'], format='%Y-%m-%dT%H:%M:%S.000000000Z')
            #df.set_index('time',inplace=True)
            df_list.append(df)
    
    final = pd.concat(df_list)
    
    names = {
        'o': 'open',
        'c': 'close',
        'h': 'high',
        'l': 'low',
        0: 'vol',
        'time': 'time',
    }
    new_names = []
    for column_name in final.columns:
        new_names.append(names[column_name])
    final.columns = new_names
    
    return final

In [5]:
def cal(client, instrument, perdiod):

    
    
    params = {
        "instrument": instrument,
        "period": perdiod
    }
    
    # PERIOD VALUES
    #3600 - 1 hour
    #43200 - 12 hours
    #86400 - 1 day
    #604800 - 1 week
    #2592000 - 1 month
    #7776000 - 3 months
    #15552000 - 6 months
    #31536000 - 1 year
    # http://developer.oanda.com/rest-live/forex-labs/

    r = labs.Calendar(params=params)
    client.request(r)
    
    df = pd.DataFrame.from_dict(r.response, orient='columns')
    
    df['timestamp'] = pd.to_datetime(df['timestamp']*1000000000)
    df = df[['impact', 'timestamp']]
    df.columns = ['impact', 'time']

    return df.groupby('time').sum().reset_index()

In [6]:
def merge(history, calendar):
    return pd.merge(history, calendar, left_on = 'time', right_on = 'time', how='outer')\
                                                                            .set_index('time')\
                                                                            .astype(float)\
                                                                            .fillna(0)

In [7]:
def add_ta(df):
    df = ta.add_all_ta_features(df, "open", "high", "low", "close", "vol", fillna=False)
    
    for feature in df.drop(["open", "high", "low", "close", "vol", "impact"], axis=1).columns.tolist():
        df[feature+"_change"] = (df[feature] - df[feature].shift(1)) / df[feature].shift(1)
        df = df.drop(feature, axis=1)
        
    df = df.replace([np.inf, -np.inf], np.nan)\
            .dropna(axis=1, thresh=len(df) - 50)\
            .iloc[100:]
    
    return df

In [8]:
calendar = cal(client, 'EUR_USD', 2592000)

In [9]:
history = hist(client, 'EUR_USD', 30, 0, 'M5')

In [10]:
merged = merge(history, calendar)

In [11]:
len(merged)

5704

In [12]:
len(merged[merged.index.duplicated(keep=False) == True])

26

In [13]:
merged = merged[merged.index.duplicated(keep='first') == False]

In [14]:
with_ta = add_ta(merged)

  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  aroon_up = close.rolling(n).apply(lambda x: float(np.argmax(x) + 1) / n * 100)
  aroon_down = close.rolling(n).apply(lambda x: float(np.argmin(x) + 1) / n * 100)


In [15]:
len(with_ta)

5591

In [16]:
# check ta indicators
with_ta.head()

Unnamed: 0_level_0,open,high,low,close,vol,impact,volume_adi_change,volume_obvm_change,volume_cmf_change,volume_em_change,...,trend_aroon_up_change,trend_aroon_down_change,trend_aroon_ind_change,momentum_rsi_change,momentum_mfi_change,momentum_tsi_change,momentum_uo_change,momentum_stoch_signal_change,momentum_ao_change,others_cr_change
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-12-31 06:20:00,1.1431,1.14321,1.14298,1.14308,33.0,0.0,-0.815443,-0.056,0.305595,-0.233196,...,0.0,-0.111111,-0.125,-0.024627,-0.12132,-0.061458,-0.079296,-0.024506,-0.520127,0.034483
2018-12-31 06:25:00,1.14308,1.14308,1.14284,1.14299,66.0,0.0,0.370968,-0.822034,0.676424,-0.14142,...,21.0,-0.125,-3.142857,-0.113375,-0.058996,-0.046976,0.002506,-0.08738,-1.00305,0.15
2018-12-31 06:30:00,1.14298,1.14302,1.14264,1.14278,82.0,0.0,-1.416456,-5.952381,0.018635,-0.31216,...,-0.045455,-0.142857,0.0,-0.233858,-0.145163,-0.01615,-0.086293,-0.177456,172.0,0.304348
2018-12-31 06:35:00,1.14278,1.1428,1.14249,1.1428,46.0,0.0,-5.80829,-0.221154,0.447562,-0.408953,...,-0.047619,-0.166667,0.0,0.03283,0.062304,-0.018283,0.155828,-0.148148,-0.88921,-0.022222
2018-12-31 06:40:00,1.14284,1.14306,1.14282,1.14302,37.0,0.0,1.893678,-0.160494,-0.114707,2.104264,...,-0.05,-0.2,0.0,0.298431,0.120451,-0.057914,0.045375,0.018634,1.052174,-0.25


In [17]:
with_ta.iloc[-1]

open                             1.143420
high                             1.143470
low                              1.143180
close                            1.143240
vol                             88.000000
impact                           0.000000
volume_adi_change               -0.113002
volume_obvm_change               0.735160
volume_cmf_change               -0.057695
volume_em_change                -6.981920
volume_vpt_change               -0.173998
volume_nvi_change                0.000000
volatility_atr_change           -0.019559
volatility_bbh_change           -0.000044
volatility_bbl_change            0.000079
volatility_bbm_change            0.000017
volatility_kcc_change           -0.000031
volatility_kch_change           -0.000032
volatility_kcl_change           -0.000031
volatility_dch_change            0.000000
volatility_dcl_change            0.000193
trend_macd_change               -0.191278
trend_macd_signal_change        -0.076326
trend_macd_diff_change           0

In [18]:
def broaden_impact(with_ta):
    for impact in with_ta[with_ta['impact'] != 0].index:
        date = impact.date()
        hour = impact.hour

        with_ta.loc[(with_ta.index.date == date) & ((with_ta.index.hour == hour) | (with_ta.index.hour == hour -1)), 'impact_broaden'] = 1
    with_ta.loc[with_ta['impact_broaden'].isna(), 'impact_broaden'] = 0
    
    return with_ta

In [19]:
with_ta_and_impact = broaden_impact(with_ta)

In [20]:
def add_sessions(df):
    df.loc[df.index.hour.isin([7,8,9,10,11,12,13,14,15,16]), 'eu_session'] = 1
    df.loc[df['eu_session'] != 1, 'eu_session'] = 0

    df.loc[df.index.hour.isin([21,22,23,24,1,2,3,4,5,6,7]), 'asia_session'] = 1
    df.loc[df['asia_session'] != 1, 'asia_session'] = 0

    df.loc[df.index.hour.isin([12,13,14,15,16,17,18,19,20]), 'us_session'] = 1
    df.loc[df['us_session'] != 1, 'us_session'] = 0
    
    return df

In [21]:
with_ta_impact_sessions = add_sessions(with_ta_and_impact)

In [22]:
def add_change(df):
    df['change_5'] = (df['close'].shift(-5) - df['close']) * 100/ df['close']
    df['change_10'] = (df['close'].shift(-10) - df['close']) * 100/ df['close']
    df['change_15'] = (df['close'].shift(-15) - df['close']) * 100/ df['close']
    
    return df

In [23]:
with_ta_impact_sessions_change = add_change(with_ta_impact_sessions)

In [121]:
def label(df):
    df.loc[df['change_5'] > 0.03,'label_5'] = 2
    df.loc[df['change_5'] < -0.03,'label_5'] = 1
    df.loc[df['label_5'].isna(),'label_5'] = 0

    df.loc[df['change_10'] > 0.03,'label_10'] = 2
    df.loc[df['change_10'] < -0.03,'label_10'] = 1
    df.loc[df['label_10'].isna(),'label_10'] = 0

    df.loc[df['change_15'] > 0.03,'label_15'] = 2
    df.loc[df['change_15'] < -0.03,'label_15'] = 1
    df.loc[df['label_15'].isna(),'label_15'] = 0
    
    df = df.drop(['open', 'change_5', 'change_10', 'change_15'], axis=1)
    
    return df

In [122]:
labeled = label(with_ta_impact_sessions_change)

In [123]:
len(labeled)/12 # hours

465.9166666666667

In [124]:
len(labeled)/12/24 # days

19.413194444444446

In [125]:
labeled.head()

Unnamed: 0_level_0,high,low,close,vol,impact,volume_adi_change,volume_obvm_change,volume_cmf_change,volume_em_change,volume_vpt_change,...,momentum_stoch_signal_change,momentum_ao_change,others_cr_change,impact_broaden,eu_session,asia_session,us_session,label_5,label_10,label_15
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-12-31 06:20:00,1.14321,1.14298,1.14308,33.0,0.0,-0.815443,-0.056,0.305595,-0.233196,-1.1234,...,-0.024506,-0.520127,0.034483,0.0,0.0,1.0,0.0,0.0,1.0,2.0
2018-12-31 06:25:00,1.14308,1.14284,1.14299,66.0,0.0,0.370968,-0.822034,0.676424,-0.14142,6.500135,...,-0.08738,-1.00305,0.15,0.0,0.0,1.0,0.0,1.0,0.0,2.0
2018-12-31 06:30:00,1.14302,1.14264,1.14278,82.0,0.0,-1.416456,-5.952381,0.018635,-0.31216,2.509302,...,-0.177456,172.0,0.304348,0.0,0.0,1.0,0.0,0.0,2.0,2.0
2018-12-31 06:35:00,1.1428,1.14249,1.1428,46.0,0.0,-5.80829,-0.221154,0.447562,-0.408953,-0.296193,...,-0.148148,-0.88921,-0.022222,0.0,0.0,1.0,0.0,0.0,2.0,2.0
2018-12-31 06:40:00,1.14306,1.14282,1.14302,37.0,0.0,1.893678,-0.160494,-0.114707,2.104264,-1.555927,...,0.018634,1.052174,-0.25,0.0,0.0,1.0,0.0,1.0,0.0,2.0


In [126]:
def filter_data(df):
    # friday evening
    df = df[~((df.index.dayofweek == 4) & (df.index.hour >= 16))]
    # calendar
    df = df[df['impact_broaden'] != 1]
    # monday's morning
    df = df[~((df.index.dayofweek == 0) &( df.index.hour < 7))]
    
    return df

In [127]:
filtered = filter_data(labeled)

In [128]:
def add_day_counter(df):
    for i, date in enumerate(sorted(list(set(df.index.date)))):
        df.loc[df.index.date == date ,'day_index'] = i
        
    df['weekday'] = df.index.dayofweek.values
    
    return df

In [169]:
final = add_day_counter(filtered)

In [170]:
final = final.astype('double')

In [171]:
final = final.dropna(axis=1, how='any')

In [281]:
stop_loss = 0.0030
take_profit = 0.0070
periods = 5
commision = 0.0002
number_of_models_to_test = 10
days_to_train = 5

In [287]:
df_list = []
for i in range(len(final['day_index'].unique()) - days_to_train):
    
    train_df = final[(final['day_index'] >= i) & (final['day_index'] < (i+days_to_train))]
    train_Y = train_df['label_'+str(periods)].values.tolist()
    train_X = train_df.drop(['close', 'high', 'low', 'vol', 'impact', 'label_5', 'label_10', 'label_15','day_index'], axis=1).values
    
    test_df = final[final['day_index'] == (i + days_to_train)]
    if not test_df.empty:
        test_Y = test_df['label_'+str(periods)].values.tolist()
        test_X = test_df.drop(['close', 'high', 'low', 'vol', 'impact', 'label_5', 'label_10', 'label_15','day_index'], axis=1).values


        clf = ExtraTreesClassifier(n_estimators=30)
        clf = clf.fit(train_X, train_Y)

        # pick only those features (technical indicators) which hase at least mean influence on y label
        model = SelectFromModel(clf, prefit=True, threshold="mean")

        train_X = train_X[:, model.get_support()]
        test_X = test_X[:, model.get_support()]

        for criterion in ['entropy','gini']:
            for max_depth in [4,6,8,10]:
                for min_samples_leaf in [10, 25, 50, 100]:
                    clf = tree.DecisionTreeClassifier(criterion=criterion,
                                         max_depth=max_depth,
                                         min_samples_leaf=min_samples_leaf)
                    clf.fit(train_X, train_Y)
                    score = clf.score(test_X, test_Y)
                    pred_Y = clf.predict(test_X)

                    row = []
                    row.append(i)
                    row.append(score)
                    row.append(criterion)
                    row.append(max_depth)
                    row.append(min_samples_leaf)
                    row.append(clf)
                    row.append(pred_Y)
                    df_list.append(row)
    
    #break

In [288]:
scores = pd.DataFrame(df_list, columns=['day_index','score','criterion','max_depth','min_samples_leaf','clf','pred_Y'])\
    .sort_values(by='score', ascending=False)

In [289]:
bests = scores[['day_index', 'criterion', 'max_depth', 'min_samples_leaf','score']]\
    .groupby(['criterion', 'max_depth', 'min_samples_leaf']).mean()\
    .sort_values('score', ascending=False)\
    [['score']]

In [285]:
df_list = []
for i in range(number_of_models_to_test):
    row = []
    bests.reset_index().iloc[i]
    
    # take predicted Y from scores df
    scores_extract = scores[
        (scores['criterion'] == bests.reset_index().iloc[i]['criterion']) &
        (scores['max_depth'] == bests.reset_index().iloc[i]['max_depth']) &
        (scores['min_samples_leaf'] == bests.reset_index().iloc[i]['min_samples_leaf'])
    ].sort_values('day_index')
    
    final_scores_extract = []
    for day_of_scores in scores_extract['pred_Y'].values:
        for value in day_of_scores:
            final_scores_extract.append(value)
            
    # take high and low prices from final df
    final_extract = final[final['day_index'] >= days_to_train][['low','high','close']]
    
    # merge predictions to df
    final_extract['pred'] = final_scores_extract
    final_extract['next_low'] = None
    final_extract['next_high'] = None
    final_extract['flag'] = 0
    final_extract['score'] = None
    
    # for each period
    for n in range(periods):
        # add price of next candle
        final_extract['next_low'] = final_extract['low'].shift(-1-n)
        final_extract['next_high'] = final_extract['high'].shift(-1-n)
        
        # check buy trades
        # check its stop loss first
        final_extract.loc[
            (final_extract['pred'] == 2) &
            (final_extract['next_low'] <= final_extract['close'] - stop_loss) &
            (final_extract['score'].isna())
        , 'score'] = -stop_loss - commision
        # and a take profit
        final_extract.loc[
            (final_extract['pred'] == 2) &
            (final_extract['next_high'] >= final_extract['close'] + take_profit) &
            (final_extract['score'].isna())
        , 'score'] = take_profit - commision
    
        # sell trades
        # its stop loss
        final_extract.loc[
            (final_extract['pred'] == 1) &
            (final_extract['next_high'] >= final_extract['close'] + stop_loss) &
            (final_extract['score'].isna())
        , 'score'] = -stop_loss - commision
        # and take profit
        final_extract.loc[
            (final_extract['pred'] == 1) &
            (final_extract['next_low'] <= final_extract['close'] - take_profit) &
            (final_extract['score'].isna())
        , 'score'] = take_profit - commision
        
    # if still open...
    final_extract.loc[
        (final_extract['score'].isna()) &
        (final_extract['pred'] != 0)
    ,'score'] = final_extract['close'].shift(-1-periods) - final_extract['close'] - commision

    sumarized_pips = round(final_extract['score'].fillna(0).sum(), 4)
    
    row.append(bests.reset_index().iloc[i])
    row.append(sumarized_pips)
    
    df_list.append(row)
    
summarized_pips_dataframe = pd.DataFrame(df_list, columns=['model', 'score']).sort_values('score', ascending=False)

In [286]:
summarized_pips_dataframe.head(5)

Unnamed: 0,model,score
8,criterion entropy max_depth ...,-0.1045
0,criterion entropy max_depth ...,-0.1214
1,criterion entropy max_depth ...,-0.1214
2,criterion entropy max_depth ...,-0.1214
3,criterion entropy max_depth ...,-0.1214


In [None]:
# TODO


# change code so I can be able to loop by more parameters
