In [15]:
import sqlalchemy
import pandas as pd


In [16]:
db_connection_string = 'sqlite:///./Resources/products.db'
engine  = sqlalchemy.create_engine(db_connection_string)

inspector = sqlalchemy.inspect(engine)
table_names = inspector.get_table_names()
print(table_names)

['AAPL_1_Day_Candles', 'AAPL_1_Min_Candles', 'AAPL_Info', 'Indicators', 'MARA_1_Day_Candles', 'MARA_1_Min_Candles', 'MARA_Info', 'RIOT_1_Day_Candles', 'RIOT_1_Min_Candles', 'RIOT_Info', 'TSLA_1_Day_Candles', 'TSLA_1_Min_Candles', 'TSLA_Info', 'WHD_1_Day_Candles', 'WHD_1_Min_Candles', 'WHD_Info']


In [17]:
indicators_df = pd.read_sql_table('Indicators', con=engine, index_col='Datetime')

In [18]:
print(indicators_df.head())

                       Open    High     Low   Close  Volume  CDLDOJI  \
Datetime                                                               
2021-09-07 03:21:00  154.59  154.59  154.59  154.59     717      100   
2021-09-07 03:22:00  154.59  154.60  154.59  154.60    3021        0   
2021-09-07 03:24:00  154.60  154.63  154.60  154.60    2700      100   
2021-09-07 03:25:00  154.61  154.61  154.61  154.61     607      100   
2021-09-07 03:26:00  154.61  154.61  154.61  154.61     340      100   

                     Trailing Stop  Trade Signal  Pct Change   Stop Loss  \
Datetime                                                                   
2021-09-07 03:21:00     154.456071           0.0   -0.000065    0.000000   
2021-09-07 03:22:00     154.473852           0.0    0.000065  154.473852   
2021-09-07 03:24:00     154.477505           0.0    0.000000    0.000000   
2021-09-07 03:25:00     154.494469           0.0    0.000065  154.494469   
2021-09-07 03:26:00     154.502722     

In [19]:
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import balanced_accuracy_score
from imblearn.metrics import classification_report_imbalanced

In [20]:
#Segment features from the trade signal
df = indicators_df.copy()
# X = df.drop(columns=['Trade Signal', 'Open', 'High', 'Low', 'Trailing Stop', 'Pct Change'])
X = df.drop(columns=['Trade Signal'])
y = df["Trade Signal"]

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [22]:
# import StandardScaler
from sklearn.preprocessing import StandardScaler

# Scale the data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [23]:
display(X_train_scaled, y_train)

array([[-0.93400999, -0.94297031, -0.92729185, ...,  0.00586607,
        -0.89491042,  0.03137472],
       [-0.31993049, -0.32076796, -0.31360535, ...,  0.13124981,
         1.09586276,  0.03137472],
       [ 0.18010567,  0.17853023,  0.17317955, ..., -0.84595021,
        -0.89491042,  0.03137472],
       ...,
       [-0.1817626 , -0.19018228, -0.17990936, ...,  0.75539024,
         1.10386225,  0.03137472],
       [-0.95045854, -0.94735975, -0.94372988, ...,  0.38960355,
         1.05472727,  0.03137472],
       [-0.26071568, -0.26699738, -0.26538713, ..., -0.11931145,
        -0.89491042,  0.03137472]])

Datetime
2021-09-20 18:01:00    0.0
2021-09-27 14:17:00    0.0
2021-09-16 12:30:00    0.0
2021-09-10 07:20:00    0.0
2021-09-16 10:37:00    0.0
                      ... 
2021-09-17 04:04:00    0.0
2021-09-17 12:08:00    0.0
2021-09-27 05:09:00    0.0
2021-09-28 14:12:00    0.0
2021-09-22 15:05:00    0.0
Name: Trade Signal, Length: 11376, dtype: float64

In [24]:
# MODEL
from sklearn.svm import SVC
# from sklearn.svm import SVR

# Instantiate a linear SVM model
model = SVC(kernel='linear')
# model = LogisticRegression()
model

SVC(kernel='linear')

In [25]:
# FIT
# Fit the data
model.fit(X_train_scaled, y_train)

SVC(kernel='linear')

In [26]:
# PREDICT
# Make predictions using the test data
y_pred = model.predict(X_test_scaled)

results = pd.DataFrame(
    {
        "Predictions": y_pred,
        "Actual":y_test
    }
)
results

Unnamed: 0_level_0,Predictions,Actual
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09-23 09:13:00,0.0,0.0
2021-09-08 03:05:00,0.0,0.0
2021-09-14 07:55:00,0.0,0.0
2021-09-08 12:43:00,0.0,0.0
2021-09-23 13:32:00,0.0,0.0
...,...,...
2021-09-15 15:09:00,0.0,0.0
2021-09-08 13:19:00,0.0,0.0
2021-09-14 05:19:00,0.0,0.0
2021-09-07 06:18:00,0.0,0.0


In [27]:
# EVALUATE
# Generate confusion matrix
print(confusion_matrix(y_test, y_pred))

[[  85    0    0]
 [   0 3647    0]
 [   0    0   61]]


In [28]:
# Generate Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

        -1.0       1.00      1.00      1.00        85
         0.0       1.00      1.00      1.00      3647
         1.0       1.00      1.00      1.00        61

    accuracy                           1.00      3793
   macro avg       1.00      1.00      1.00      3793
weighted avg       1.00      1.00      1.00      3793

