In [1]:
import sqlalchemy
import pandas as pd


In [2]:
db_connection_string = 'sqlite:///./Resources/products.db'
engine  = sqlalchemy.create_engine(db_connection_string)

inspector = sqlalchemy.inspect(engine)
table_names = inspector.get_table_names()
print(table_names)

['AAPL_1_Day_Candles', 'AAPL_1_Min_Candles', 'AAPL_Info', 'Indicators', 'MARA_1_Day_Candles', 'MARA_1_Min_Candles', 'MARA_Info', 'RIOT_1_Day_Candles', 'RIOT_1_Min_Candles', 'RIOT_Info', 'TSLA_1_Day_Candles', 'TSLA_1_Min_Candles', 'TSLA_Info', 'WHD_1_Day_Candles', 'WHD_1_Min_Candles', 'WHD_Info']


In [3]:
indicators_df = pd.read_sql_table('Indicators', con=engine, index_col='Datetime')

In [4]:
print(indicators_df.head())

                       Open    High     Low   Close  Volume  CDLDOJI  \
Datetime                                                               
2021-09-07 03:21:00  154.59  154.59  154.59  154.59     717      100   
2021-09-07 03:22:00  154.59  154.60  154.59  154.60    3021        0   
2021-09-07 03:24:00  154.60  154.63  154.60  154.60    2700      100   
2021-09-07 03:25:00  154.61  154.61  154.61  154.61     607      100   
2021-09-07 03:26:00  154.61  154.61  154.61  154.61     340      100   

                     Trailing Stop  Trade Signal  Pct Change   Stop Loss  \
Datetime                                                                   
2021-09-07 03:21:00     154.456071           0.0   -0.000065    0.000000   
2021-09-07 03:22:00     154.473852           0.0    0.000065  154.473852   
2021-09-07 03:24:00     154.477505           0.0    0.000000    0.000000   
2021-09-07 03:25:00     154.494469           0.0    0.000065  154.494469   
2021-09-07 03:26:00     154.502722     

In [5]:
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import balanced_accuracy_score
from imblearn.metrics import classification_report_imbalanced

In [6]:
#Segment features from the trade signal
df = indicators_df.copy()
# X = df.drop(columns=['Trade Signal', 'Open', 'High', 'Low', 'Trailing Stop', 'Pct Change'])
X = df.drop(columns=['Trade Signal'])
y = df["Trade Signal"]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [8]:
# import StandardScaler
from sklearn.preprocessing import StandardScaler

# Scale the data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [9]:
display(X_train_scaled, y_train)

array([[-1.13023823, -1.13941182, -1.12346273, ...,  0.14715638,
         1.06543265, -0.01147569],
       [-1.91663427, -1.92401522, -1.93115075, ..., -1.15643925,
        -0.8805222 , -0.01147569],
       [-0.93479759, -0.94161265, -0.9566576 , ..., -1.87910589,
        -0.8805222 , -0.01147569],
       ...,
       [-0.93699355, -0.94161265, -0.93470955, ...,  0.14635382,
         1.07499384, -0.01147569],
       [-0.10691982, -0.11525165, -0.1116579 , ..., -0.72032202,
        -0.8805222 , -0.01147569],
       [-0.81950957, -0.82732868, -0.82277453, ..., -0.48481379,
        -0.8805222 , -0.01147569]])

Datetime
2021-09-28 16:41:00    0.0
2021-10-04 11:46:00    0.0
2021-09-30 14:20:00    0.0
2021-09-07 14:56:00    0.0
2021-09-27 17:14:00    0.0
                      ... 
2021-09-28 03:35:00    0.0
2021-09-07 10:35:00    0.0
2021-09-29 12:48:00    0.0
2021-09-24 12:17:00    0.0
2021-09-21 12:43:00    0.0
Name: Trade Signal, Length: 11376, dtype: float64

In [10]:
# MODEL
from sklearn.svm import SVC
# from sklearn.svm import SVR

# Instantiate a linear SVM model
model = SVC(kernel='linear')
# model = LogisticRegression()
model

SVC(kernel='linear')

In [11]:
# FIT
# Fit the data
model.fit(X_train_scaled, y_train)

SVC(kernel='linear')

In [12]:
# PREDICT
# Make predictions using the test data
y_pred = model.predict(X_test_scaled)

results = pd.DataFrame(
    {
        "Predictions": y_pred,
        "Actual":y_test
    }
)
results

Unnamed: 0_level_0,Predictions,Actual
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09-27 05:56:00,0.0,0.0
2021-09-20 11:51:00,0.0,0.0
2021-09-21 09:03:00,0.0,0.0
2021-09-14 10:18:00,0.0,0.0
2021-09-24 13:03:00,0.0,0.0
...,...,...
2021-09-13 03:07:00,0.0,0.0
2021-09-23 15:09:00,0.0,0.0
2021-10-01 06:29:00,0.0,0.0
2021-09-16 14:51:00,0.0,0.0


In [13]:
# EVALUATE
# Generate confusion matrix
print(confusion_matrix(y_test, y_pred))

[[  37    0    0]
 [   0 3713    0]
 [   0    0   43]]


In [14]:
# Generate Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

        -1.0       1.00      1.00      1.00        37
         0.0       1.00      1.00      1.00      3713
         1.0       1.00      1.00      1.00        43

    accuracy                           1.00      3793
   macro avg       1.00      1.00      1.00      3793
weighted avg       1.00      1.00      1.00      3793

