In [2]:
from oandapyV20.contrib.factories import InstrumentsCandlesFactory as history
from oandapyV20.endpoints.accounts import AccountInstruments
from oandapyV20 import API
import datetime as dt
import pandas as pd
#from pytrends.request import TrendReq
import ta

from sklearn import tree
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import classification_report, confusion_matrix

### Methods and how-to

In [3]:
token = '7f736aabc877f3ea75bc844c79814d7c-998e41725285ea6d54b836e8b93fe4f6'
aid = "101-004-8182547-001"

In [4]:
def instrument_list(token, aid):
    api = API(access_token = token)
    request = AccountInstruments(accountID = aid)
    response = api.request(request)
    
    return pd.DataFrame().from_dict(response['instruments']).sort_values('displayName')

In [5]:
def hist(token, aid, instrument, start_days, end_days, granularity):
    
    api = API(access_token=token)

    start_date = (dt.datetime.now()-dt.timedelta(days=start_days)).strftime('%Y-%m-%dT%H:%M:%SZ')
    end_date = (dt.datetime.now()-dt.timedelta(days=end_days, hours=2,minutes=4)).strftime('%Y-%m-%dT%H:%M:%SZ')

    params ={
                "from": start_date,
                "to": end_date,
                "granularity":granularity,
            }

    df_list = []
    for r in history(instrument=instrument,params=params):
        api.request(r)
        df = pd.DataFrame(r.response['candles'])
        if(df.empty==False):
            time = df['time']
            volume = pd.DataFrame(df['volume'].apply(pd.Series))
            df = pd.DataFrame(df['mid'].apply(pd.Series))
            df = pd.concat([df,time,volume], axis=1)
            df['time'] = pd.to_datetime(df['time'], format='%Y-%m-%dT%H:%M:%S.000000000Z')
            df.set_index('time',inplace=True)
            df_list.append(df)
    
    final = pd.concat(df_list)
    final.columns = ['Close','High','Low','Open','Volume']
    return final.astype(float)

In [6]:
instrument_list(token, aid).head()

Unnamed: 0,displayName,displayPrecision,marginRate,maximumOrderUnits,maximumPositionSize,maximumTrailingStopDistance,minimumTradeSize,minimumTrailingStopDistance,name,pipLocation,tags,tradeUnitsPrecision,type
47,AUD/CAD,5,0.05,100000000,0,1.0,1,0.0005,AUD_CAD,-4,"[{'type': 'ASSET_CLASS', 'name': 'CURRENCY'}]",0,CURRENCY
106,AUD/CHF,5,0.05,100000000,0,1.0,1,0.0005,AUD_CHF,-4,"[{'type': 'ASSET_CLASS', 'name': 'CURRENCY'}]",0,CURRENCY
66,AUD/HKD,5,0.05,100000000,0,1.0,1,0.0005,AUD_HKD,-4,"[{'type': 'ASSET_CLASS', 'name': 'CURRENCY'}]",0,CURRENCY
22,AUD/JPY,3,0.05,100000000,0,100.0,1,0.05,AUD_JPY,-2,"[{'type': 'ASSET_CLASS', 'name': 'CURRENCY'}]",0,CURRENCY
30,AUD/NZD,5,0.05,100000000,0,1.0,1,0.0005,AUD_NZD,-4,"[{'type': 'ASSET_CLASS', 'name': 'CURRENCY'}]",0,CURRENCY


In [7]:
hist(token, aid, 'EUR_USD', 20, 0, 'H4').head()

Unnamed: 0_level_0,Close,High,Low,Open,Volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-01-07 14:00:00,1.14578,1.14728,1.14538,1.1469,7844.0
2019-01-07 18:00:00,1.14686,1.14827,1.14678,1.14748,3118.0
2019-01-07 22:00:00,1.14773,1.14848,1.14471,1.14485,2918.0
2019-01-08 02:00:00,1.14488,1.14535,1.14322,1.14438,2162.0
2019-01-08 06:00:00,1.14438,1.14612,1.14332,1.14521,5497.0


In [8]:
def add_features_and_labels(df):
    df = ta.add_all_ta_features(df, "Open", "High", "Low", "Close", "Volume", fillna=False).dropna()
    
    df['change'] = (df['Close'] - df['Open']) / df['Open'] * 100
    df['shifted_change'] = df['change'].shift(1)
    
    df = df.drop(['change','Open','Low','High','Close','Volume'], axis=1).dropna()
    
    df.loc[(df['shifted_change'] > -0.02)&(df['shifted_change'] < 0.02), 'y'] = 0
    df.loc[(df['shifted_change'] >= 0.02), 'y'] = 1
    df.loc[(df['shifted_change'] <= -0.02), 'y'] = 2
    
    change = df.iloc[:int(len(df)*0.2)]['shifted_change'].values.tolist()
    df = df.drop('shifted_change', axis=1)
    
    for feature in df.drop('y', axis=1).columns.tolist():
        df[feature+"_change"] = (df[feature] - df[feature].shift(1)) / df[feature].shift(1)
        df = df.drop(feature, axis=1)
    
    return df.iloc[2:], change[2:]

In [9]:
# trend_req = TrendReq(hl='en-US', tz=360)

# kw_list = ["Blockchain"] 
# trend_req.build_payload(kw_list, timeframe='today 1-y')

### Test specific data

In [10]:
df_list = []
for i in range(0, 100, 20):
    df_list.append(hist(token, aid, 'EUR_USD', (i+20), i, 'H1').astype(float))
df = pd.concat(df_list)

In [11]:
# how many days are there
len(df)/24

68.04166666666667

In [12]:
df, change = add_features_and_labels(df)

  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])
  aroon_up = close.rolling(n).apply(lambda x: float(np.argmax(x) + 1) / n * 100)
  aroon_down = close.rolling(n).apply(lambda x: float(np.argmin(x) + 1) / n * 100)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [13]:
df['ones'] = 1
df[['y','ones']].groupby('y').count().plot(kind='bar');
df = df.drop('ones', axis=1)
# Distribution of y label are more less the same, to there is no need to normalize them

In [14]:
df = df.dropna(axis=1)
df = df.dropna(axis=0)

In [15]:
len(df)

1553

#### Features selection

In [16]:
train_df = df.iloc[int(len(df)*0.2):]
test_df = df.iloc[:int(len(df)*0.2)]

train_Y = train_df['y'].values.tolist()
train_X = train_df.drop('y',axis=1).values.tolist()

test_Y = test_df['y'].values.tolist()
test_X = test_df.drop('y',axis=1).values.tolist()

In [17]:
columns_to_validate = df.drop('y', axis=1).columns

In [18]:
len(columns_to_validate)

53

In [19]:
clf = ExtraTreesClassifier(n_estimators=50)
clf = clf.fit(train_X, train_Y)

# pick only those features (technical indicators) which hase at least mean influence on y label
model = SelectFromModel(clf, prefit=True, threshold="mean")

  return umr_sum(a, axis, dtype, out, keepdims)


ValueError: Input contains NaN, infinity or a value too large for dtype('float32').

In [20]:
columns_validated = columns_to_validate[model.get_support()]

NameError: name 'model' is not defined

In [136]:
len(columns_validated)

12

In [137]:
train_df = train_df[columns_validated.tolist() + ['y']]
test_df = test_df[columns_validated.tolist() + ['y']]

In [138]:
train_Y = train_df['y'].values.tolist()
train_X = train_df.drop('y',axis=1).values.tolist()

test_Y = test_df['y'].values.tolist()
test_X = test_df.drop('y',axis=1).values.tolist()

#### Models

In [139]:
# try few parameters
df_list = []
for criterion in ['entropy','gini']:
    for max_depth in [4,6,8]:
        for min_samples_leaf in [10]:
            clf = tree.DecisionTreeClassifier(criterion=criterion,
                                 max_depth=max_depth,
                                 min_samples_leaf=min_samples_leaf)
            clf.fit(train_X, train_Y)
            score = clf.score(test_X, test_Y)
            
            row = []
            row.append(score)
            row.append(criterion)
            row.append(max_depth)
            row.append(min_samples_leaf)
            row.append(clf)
            df_list.append(row)

In [140]:
scores = pd.DataFrame(df_list, columns=['score','criterion','max_depth','min_samples_leaf','clf'])\
    .sort_values(by='score', ascending=False)

In [141]:
scores.head(10)

Unnamed: 0,score,criterion,max_depth,min_samples_leaf,clf
5,0.785489,gini,8,10,"DecisionTreeClassifier(class_weight=None, crit..."
4,0.77918,gini,6,10,"DecisionTreeClassifier(class_weight=None, crit..."
1,0.712934,entropy,6,10,"DecisionTreeClassifier(class_weight=None, crit..."
2,0.694006,entropy,8,10,"DecisionTreeClassifier(class_weight=None, crit..."
3,0.652997,gini,4,10,"DecisionTreeClassifier(class_weight=None, crit..."
0,0.646688,entropy,4,10,"DecisionTreeClassifier(class_weight=None, crit..."


In [142]:
# pick the best one (.iloc[0])
clf = scores.iloc[0]['clf']

pred_Y = clf.predict(test_X)

print(classification_report(test_Y, pred_Y, digits=3))
print(confusion_matrix(test_Y, pred_Y))

             precision    recall  f1-score   support

        0.0      0.663     0.704     0.683        81
        1.0      0.852     0.846     0.849       123
        2.0      0.807     0.779     0.793       113

avg / total      0.788     0.785     0.786       317

[[ 57  10  14]
 [ 12 104   7]
 [ 17   8  88]]


In [143]:
crosstab = pd.crosstab(pd.Series(test_Y), pd.Series(pred_Y), rownames=['True'], colnames=['Predicted'], margins=True)

In [144]:
crosstab

Predicted,0.0,1.0,2.0,All
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,57,10,14,81
1.0,12,104,7,123
2.0,17,8,88,113
All,86,122,109,317


In [145]:
final_scores = pd.concat([pd.Series(test_Y),pd.Series(pred_Y.tolist()),pd.Series(change)], axis=1)
final_scores.columns = ['real','pred','change']

In [146]:
# create new column which tells whether we gained during this hour or not

final_scores.loc[(final_scores['real'] == 0)&(final_scores['pred'] == 0) ,'gain_loss'] = 0
final_scores.loc[(final_scores['real'] == 0)&(final_scores['pred'] == 1)&(final_scores['change'] > 0) ,'gain_loss'] = 1
final_scores.loc[(final_scores['real'] == 0)&(final_scores['pred'] == 1)&(final_scores['change'] <= 0) ,'gain_loss'] = -1
final_scores.loc[(final_scores['real'] == 0)&(final_scores['pred'] == 2)&(final_scores['change'] >= 0) ,'gain_loss'] = -1
final_scores.loc[(final_scores['real'] == 0)&(final_scores['pred'] == 2)&(final_scores['change'] < 0) ,'gain_loss'] = 1

final_scores.loc[(final_scores['real'] == 1)&(final_scores['pred'] == 0) ,'gain_loss'] = 0
final_scores.loc[(final_scores['real'] == 1)&(final_scores['pred'] == 1) ,'gain_loss'] = 1
final_scores.loc[(final_scores['real'] == 1)&(final_scores['pred'] == 2) ,'gain_loss'] = -1

final_scores.loc[(final_scores['real'] == 2)&(final_scores['pred'] == 0) ,'gain_loss'] = 0
final_scores.loc[(final_scores['real'] == 2)&(final_scores['pred'] == 1) ,'gain_loss'] = -1
final_scores.loc[(final_scores['real'] == 2)&(final_scores['pred'] == 2) ,'gain_loss'] = 1

# gain_loss
# 0 - nothing happend
# 1 - gain
# -1 loss

In [147]:
final_scores['change_abs'] = abs(final_scores['change'])

In [148]:
final_scores.head()

Unnamed: 0,real,pred,change,gain_loss,change_abs
0,1.0,1.0,0.051191,1.0,0.051191
1,2.0,0.0,-0.044089,0.0,0.044089
2,0.0,0.0,0.015879,0.0,0.015879
3,2.0,2.0,-0.026461,1.0,0.026461
4,2.0,2.0,-0.034407,1.0,0.034407


In [149]:
# calculate final account balance
position_scores = final_scores['gain_loss'].values.tolist()
changes = final_scores['change_abs'].values.tolist()
money = 1000
position_size = 0.10

for i in range(len(final_scores)):
    if position_scores[i] == 0:
        continue
    elif position_scores[i] == 1:
        money = money + position_size*money*changes[i]
        continue
    elif position_scores[i] == -1:
        money = money - position_size*money*changes[i]
        continue

In [150]:
money

3326.259162830186

In [None]:
# draft

### Deploying on pythonanywhere

In [151]:
import oandapyV20.endpoints.trades as trades
import oandapyV20 as opy

# start a script
def main():
    token = '7ba356ef1f42fb5ce5641b394d258b10-dd8759083ea7207afcabaff70603ab28'
    aid = "101-004-8182547-005"
    api = opy.API(access_token=token)
    
    # run a bit before full hour
    
    # check time
    
    # get difference
    
    # wait the difference
    
    # for every hour do:

    # close current trades
    close_trades(aid, api)
    
    # get last data
    df = hist(token, aid, 'EUR_USD', 2, 0, 'H1').astype(float)
    
    # calculate indicators
    df = ta.add_all_ta_features(df, "Open", "High", "Low", "Close", "Volume", fillna=False).dropna()
    
    # pick important ones
    
    # pass values to model
    
    # based on model prediction open a trade
    
    
def close_trades(aid, api):
    trades_list = trades.TradesList(accountID=aid)
    trades_list = api.request(trades_list)

    for trade in trades_list['trades']:
        r = trades.TradeClose(aid, tradeID=trade['id'], data={'units': str(abs(int(trade['currentUnits'])))})
        api.request(r)
        
def hist(token, aid, instrument, start_days, end_days, granularity):
    
    api = API(access_token=token)

    start_date = (dt.datetime.now()-dt.timedelta(days=start_days)).strftime('%Y-%m-%dT%H:%M:%SZ')
    end_date = (dt.datetime.now()-dt.timedelta(days=end_days, hours=2,minutes=4)).strftime('%Y-%m-%dT%H:%M:%SZ')

    params ={
                "from": start_date,
                "to": end_date,
                "granularity":granularity,
            }

    df_list = []
    for r in history(instrument=instrument,params=params):
        api.request(r)
        df = pd.DataFrame(r.response['candles'])
        if(df.empty==False):
            time = df['time']
            volume = pd.DataFrame(df['volume'].apply(pd.Series))
            df = pd.DataFrame(df['mid'].apply(pd.Series))
            df = pd.concat([df,time,volume], axis=1)
            df['time'] = pd.to_datetime(df['time'], format='%Y-%m-%dT%H:%M:%S.000000000Z')
            df.set_index('time',inplace=True)
            df_list.append(df)
    
    final = pd.concat(df_list)
    final.columns = ['Close','High','Low','Open','Volume']
    return final
    
    
if __name__ == '__main__':
    main()

  dip[i] = 100 * (dip_mio[i]/trs[i])
  din[i] = 100 * (din_mio[i]/trs[i])


IndexError: index 14 is out of bounds for axis 0 with size 2

In [None]:
df.head()

In [152]:
df = pd.read_csv('datas.csv', sep=',')

FileNotFoundError: File b'datas.csv' does not exist