# Financial Forecast Demo

In [1]:
import os
import sys
import uuid
import time
import json
from pathlib import Path

##### Import my module build from python file
module_path = str(Path.cwd() / "py")
debug_data_path = str(Path.cwd() / "debug_data")

if module_path not in sys.path:
    sys.path.append(module_path)
import build as build
import utils
from utils import get_module_version
log = utils.setup_logging(__name__)

##### Import common machine learning modules
import numpy as np
import pandas as pd
import pickle
log.debug('numpy version: %s' % np.__version__)

##### Import sklearn
import sklearn
from sklearn.linear_model import LinearRegression
from sklearn import preprocessing, svm 
from sklearn.model_selection import train_test_split
log.debug('sklearn version: %s' % sklearn.__version__)
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler


2021-03-29 23:29:01,357 - utils - DEBUG - test debug logging
2021-03-29 23:29:01,364 - __main__ - DEBUG - numpy version: 1.20.2
2021-03-29 23:29:01,441 - __main__ - DEBUG - sklearn version: 0.23.0


Model parameters.

In [2]:
MODEL_WINDOW_SIZE = 60
# Sklearn test_size
SKLEARN_TEST_SIZE = 0.20

All credentials for Watson Machine Learning, Watson OpenScale and its associated DataMart

In [3]:
model_path = build.get_model_base_path()
log.debug("Model Base Path: " + model_path)

2021-03-29 23:29:01,455 - __main__ - DEBUG - Model Base Path: /Users/jack/sources/pholas/forex-forecast/saved_model


In [4]:
#debug_data_csv = 'debug_data/EURUSD_M1_202103150000_202103251441.csv'

#debug_data_csv = 'debug_data/EURUSD_M1_202102250000_202103251443.csv'

debug_data_csv = 'debug_data/EURUSD_M1_202012150519_202103251444.csv'
def get_training_float_data_from_exported_csv() :
    history_dataset = pd.read_csv(debug_data_csv, sep='\t')
    #history_dataset['DATE_TIME'] = history_dataset['<DATE>'] + ' ' + history_dataset['<TIME>']
    values = history_dataset['<CLOSE>'].values
    log.debug(type(values.tolist()))
    return values.tolist()

In [5]:
def get_training_float_data_from_json() :
    debug_data_json_path = debug_data_path + '/input_data.json'
    with open(debug_data_json_path) as json_file:
        input_data = json.load(json_file)
    log.debug(type(input_data['Data']))
    float_data = [float(s) for s in input_data['Data']]

In [6]:
float_data = get_training_float_data_from_exported_csv()
X, y = build.get_feature_label_sklearn(float_data, default_window_size=MODEL_WINDOW_SIZE)

print("====" + str(len(X[-1])))
print("====")
# Splitting the data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = SKLEARN_TEST_SIZE, random_state=None, shuffle=False) 

log.debug(type(X_test[0]))
print(X_test[0])
print(pd.DataFrame(X_test))

2021-03-29 23:29:01,627 - __main__ - DEBUG - <class 'list'>
====60
====
2021-03-29 23:29:01,881 - __main__ - DEBUG - <class 'list'>
[1.193, 1.19308, 1.19325, 1.19337, 1.19352, 1.19359, 1.19352, 1.19366, 1.19365, 1.19353, 1.19372, 1.19374, 1.19378, 1.1938, 1.19383, 1.19359, 1.19342, 1.19344, 1.19362, 1.19372, 1.19356, 1.19348, 1.19359, 1.19341, 1.19384, 1.19389, 1.1941, 1.19411, 1.19402, 1.19383, 1.1938, 1.19359, 1.1933, 1.19329, 1.19319, 1.19301, 1.19307, 1.19342, 1.19342, 1.19321, 1.19317, 1.1931, 1.19302, 1.19312, 1.19302, 1.19302, 1.19322, 1.19343, 1.19341, 1.19334, 1.19338, 1.19327, 1.19327, 1.19332, 1.19307, 1.19287, 1.19271, 1.19277, 1.19271, 1.19271]
            0        1        2        3        4        5        6        7   \
0      1.19300  1.19308  1.19325  1.19337  1.19352  1.19359  1.19352  1.19366   
1      1.19308  1.19325  1.19337  1.19352  1.19359  1.19352  1.19366  1.19365   
2      1.19325  1.19337  1.19352  1.19359  1.19352  1.19366  1.19365  1.19353   
3      1.1

In [7]:
fields = ['f' + str(i) for i in range(MODEL_WINDOW_SIZE)]
log.debug(fields)

#print(len(X[-1]))

#X_np = np.array([np.array(xi) for xi in X])

training_data_df = pd.DataFrame(X, columns=fields)
log.debug(len(y))
training_data_df['l1'] = y

training_data_file_name="model_data.csv"

training_data_df.head(10)
training_data_df.to_csv(training_data_file_name, index = False, header=True)

2021-03-29 23:29:02,006 - __main__ - DEBUG - ['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f20', 'f21', 'f22', 'f23', 'f24', 'f25', 'f26', 'f27', 'f28', 'f29', 'f30', 'f31', 'f32', 'f33', 'f34', 'f35', 'f36', 'f37', 'f38', 'f39', 'f40', 'f41', 'f42', 'f43', 'f44', 'f45', 'f46', 'f47', 'f48', 'f49', 'f50', 'f51', 'f52', 'f53', 'f54', 'f55', 'f56', 'f57', 'f58', 'f59']
2021-03-29 23:29:02,983 - __main__ - DEBUG - 100824


In [8]:
scaler = MinMaxScaler()

model = LinearRegression()

pipeline = Pipeline([('scaler', scaler), ('lr', model)])
type(pipeline)

pipeline.fit(X_train, y_train)

# model.fit(X_train, y_train)

log.debug("Linear regression score: %f" % model.score(X_test, y_test)) 
log.debug("Coeffience: ")
log.debug(model.coef_)
log.debug("Intercept: %f" % model.intercept_)

# Save model
sklearn_model_file_pickle = 'saved_model/ff_sklearn_pickle.pkl'
build.save_sklearn_model(model, sklearn_model_file_pickle)

# Load model
model = build.load_sklearn_model(sklearn_model_file_pickle)

# Predict X_test, all test samples
y#_pred = model.predict(X_test)
y_pred = pipeline.predict(X_test)

2021-03-29 23:29:08,139 - __main__ - DEBUG - Linear regression score: -157.457677
2021-03-29 23:29:08,140 - __main__ - DEBUG - Coeffience: 
2021-03-29 23:29:08,140 - __main__ - DEBUG - [ 1.25720177e-04 -1.70583874e-04 -2.84721157e-04  2.91931965e-04
 -7.75534475e-06  2.02355012e-04  1.95276989e-04 -2.57249601e-04
 -2.40465427e-04  3.58156977e-04 -3.49816649e-05 -2.83035792e-04
  3.82707178e-04 -2.39859639e-04 -1.67303870e-04  7.70265944e-05
  8.39032897e-05 -4.67857472e-06 -2.62710853e-04  1.09721925e-04
 -4.12379017e-05  2.59765891e-04  2.27252879e-04 -4.44769734e-04
  1.49281550e-04 -2.50624662e-04  2.07092495e-04 -1.87093523e-04
  1.72472167e-04  5.75223181e-05  6.12840659e-05 -6.49652582e-05
 -9.05531329e-05  1.94115934e-04 -1.03493373e-04 -3.25860005e-04
  2.69571634e-04 -2.01182127e-04  4.42036849e-04  2.16210444e-05
 -1.03019583e-04 -4.48232077e-04  4.37954711e-04 -1.10629003e-05
 -4.97216981e-04  9.48253250e-04 -8.61887080e-04 -2.19410375e-06
  6.68074750e-04 -3.27473925e-04  2

In [9]:
df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
df

Unnamed: 0,Actual,Predicted
0,1.19256,1.192718
1,1.19245,1.192565
2,1.19242,1.192461
3,1.19229,1.192422
4,1.19242,1.192297
...,...,...
20160,1.18031,1.180158
20161,1.17957,1.180312
20162,1.17990,1.179596
20163,1.17963,1.179909


In [10]:
from sklearn import metrics
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('R2 score:', metrics.r2_score(y_test, y_pred))

Mean Absolute Error: 8.40318366615625e-05
Mean Squared Error: 1.6048633406592367e-08
Root Mean Squared Error: 0.00012668320096442294
R2 score: 0.9990856998275862
