In [1]:
# Import libraries

import pandas as pd
import pymssql
import talib as ta
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

import matplotlib.pyplot as plt
%matplotlib notebook

In [2]:
market = 'Brent'  # TTF or NBP

if market == 'Brent':
    ric = 'CO1 Comdty'
elif market == 'TTF':
    ric = 'TTFG1MON Index'
elif market == 'NBP':
    ric = 'NBPG1MON Index'
else:
    print('Market is not find!')
    
user = 'INNOWATIO\luca.nicoli'
pwd = 'Middle1992'
server = 'ser-sviluppo'
db = 'Middle'

query = "select * from algo_EndOfDayPrices"

with pymssql.connect(server, user, pwd, db) as conn:
    df = pd.read_sql(query, conn)
    
df_1 = df[df['Ticker'] == ric]
df_1 = df_1.pivot(index='RefDay', columns='PriceType', values='Value')
df_1 = df_1[df_1.index >= '2014-01-01']
df_1.rename(columns={'PX_SETTLE': 'close', 'PX_OPEN': 'open', 'PX_HIGH': 'high', 'PX_LOW': 'low', 'PX_VOLUME':'volume'}, inplace=True)
df_1.dropna(inplace=True)

df_smothed = df_1.ewm(alpha=0.1).mean()
df_smothed['Smoothed_target'] = df_smothed['close'].diff().apply(lambda x: +1 if x > 0 else -1).shift(-1)
df_smothed['target'] = df_1['close'].diff().apply(lambda x: +1 if x > 0 else -1).shift(-1)


In [3]:
# Indicators

df_smothed['RSI'] = ta.abstract.RSI(df_smothed, timeperiod = 14)
df_smothed['Stock'] = ta.abstract.STOCH(df_smothed)['slowk']
df_smothed['Will'] = ta.abstract.WILLR(df_smothed)
df_smothed['MACD'] = ta.abstract.MACD(df_smothed)['macd']
df_smothed['POC'] = ta.abstract.ROC(df_smothed)
df_smothed['Balance_volume'] = ta.abstract.OBV(df_smothed)

df_smothed.dropna(inplace=True)
df_smothed_x = df_smothed[['RSI', 'Stock', 'Will', 'MACD', 'POC', 'Balance_volume']]
df_smothed_y = df_smothed['Smoothed_target']
df_y = df_smothed['target']


In [None]:
X_train, X_test, y_train, y_test = train_test_split(df_smothed_x, df_smothed_y)
clf = RandomForestClassifier(n_jobs=-1, n_estimators= 10).fit(X_train, y_train)

predict_y = clf.predict(X_test)
print('Accuracy of RF classifier on training set: {:.2f}'
     .format(clf.score(X_train, y_train)))
print('Accuracy of RF classifier on test set: {:.2f}'
     .format(clf.score(X_test, y_test)))


In [16]:
X_train, X_test, y_train, y_test = train_test_split(df_smothed_x, df_y)
                                                                   
clf = RandomForestClassifier(n_jobs=-1, n_estimators= 5).fit(X_train, y_train)

predict_y = clf.predict(X_test)
print('Accuracy of RF classifier on training set: {:.2f}'
     .format(clf.score(X_train, y_train)))
print('Accuracy of RF classifier on test set: {:.2f}'
     .format(clf.score(X_test, y_test)))

Accuracy of RF classifier on training set: 0.93
Accuracy of RF classifier on test set: 0.47


In [None]:
from sklearn.metrics import confusion_matrix
confusion = confusion_matrix(y_test, predict_y)
print('Random forest classifier (linear kernel, C=1)\n', confusion)

In [None]:
alphas = np.linspace(0.1, 1.0, 10)
scores = {}
final_scores = []
score = []

scores = pd.DataFrame(columns=['R', 'Score'])

for al in alphas:
    for r in range(0,10):
        df_smothed = df_1.ewm(alpha=al).mean()
        df_smothed['Smoothed_target'] = df_smothed['close'].diff().apply(lambda x: +1 if x > 0 else -1).shift(-1)
        df_smothed['RSI'] = ta.abstract.RSI(df_smothed, timeperiod = 14)
        df_smothed['Stock'] = ta.abstract.STOCH(df_smothed)['slowk']
        df_smothed['Will'] = ta.abstract.WILLR(df_smothed)
        df_smothed['MACD'] = ta.abstract.MACD(df_smothed)['macd']
        df_smothed['POC'] = ta.abstract.ROC(df_smothed)
        df_smothed['Balance_volume'] = ta.abstract.OBV(df_smothed)

        df_smothed.dropna(inplace=True)
        df_smothed_x = df_smothed[['RSI', 'Stock', 'Will', 'MACD', 'POC', 'Balance_volume']]
        df_smothed_y = df_smothed['Smoothed_target']

        X_train, X_test, y_train, y_test = train_test_split(df_smothed_x, df_smothed_y)
        clf = RandomForestClassifier(n_jobs=-1, n_estimators= 10).fit(X_train, y_train)
        score_r = clf.score(X_test, y_test)
        score.append(score_r)
    mean = np.mean(score)
    final_scores.append(mean)


In [None]:
alphas.shape = (10,)

fig = plt.figure()
plt.plot(alphas, final_scores)
plt.xlabel('Alpha for smoothing')
plt.ylabel('Average Accuracy on Test set (%)')
plt.title('Average accuracy for Random Forest')
plt.tight_layout()