In [43]:
import sqlalchemy
import pandas as pd


In [44]:
db_connection_string = 'sqlite:///./Resources/products.db'
engine  = sqlalchemy.create_engine(db_connection_string)

inspector = sqlalchemy.inspect(engine)
table_names = inspector.get_table_names()
print(table_names)

['AAPL_1_Day_Candles', 'AAPL_1_Min_Candles', 'AAPL_Info', 'CLOV_1_Day_Candles', 'CLOV_1_Min_Candles', 'CLOV_Info', 'Indicators', 'MARA_1_Day_Candles', 'MARA_1_Min_Candles', 'MARA_Info', 'RIOT_1_Day_Candles', 'RIOT_1_Min_Candles', 'RIOT_Info', 'TSLA_1_Day_Candles', 'TSLA_1_Min_Candles', 'TSLA_Info', 'WHD_1_Day_Candles', 'WHD_1_Min_Candles', 'WHD_Info']


In [45]:
indicators_df = pd.read_sql_table('Indicators', con=engine, index_col='Datetime')

In [46]:
print(indicators_df.head())

                     Open  High   Low  Close  Volume  CDLDOJI  Trailing Stop  \
Datetime                                                                       
2021-09-07 03:16:00  9.09  9.10  9.07   9.09    3340      100       9.003265   
2021-09-07 03:17:00  9.10  9.13  9.10   9.13    1720        0       9.042318   
2021-09-07 03:18:00  9.13  9.13  9.13   9.13     570      100       9.048581   
2021-09-07 03:19:00  9.13  9.13  9.13   9.13     412      100       9.054396   
2021-09-07 03:20:00  9.13  9.13  9.13   9.13     277      100       9.059797   

                     Trade Signal  Pct Change  Stop Loss  
Datetime                                                  
2021-09-07 03:16:00           1.0    0.003311   9.003265  
2021-09-07 03:17:00           0.0    0.004400   9.042318  
2021-09-07 03:18:00           1.0    0.000000   0.000000  
2021-09-07 03:19:00           1.0    0.000000   9.048581  
2021-09-07 03:20:00           1.0    0.000000   9.054396  


In [47]:
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import balanced_accuracy_score
from imblearn.metrics import classification_report_imbalanced

In [48]:
#Segment features from the trade signal
df = indicators_df.copy()
# X = df.drop(columns=['Trade Signal', 'Open', 'High', 'Low', 'Trailing Stop', 'Pct Change'])
X = df.drop(columns=['Trade Signal'])
y = df["Trade Signal"]

In [49]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [50]:
# import StandardScaler
from sklearn.preprocessing import StandardScaler

# Scale the data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [51]:
display(X_train_scaled, y_train)

array([[-0.64931867, -0.65531826, -0.64308122, ..., -0.65566221,
        -1.09223895, -1.32695589],
       [-0.47225855, -0.47905315, -0.46516667, ..., -0.45404394,
         0.00640027, -1.32695589],
       [ 0.01322887,  0.00425439,  0.02266355, ...,  0.02929321,
         0.51958043,  0.74277269],
       ...,
       [-0.58077927, -0.57571466, -0.57421107, ..., -0.58816083,
         0.00640027, -1.32695589],
       [-0.72356968, -0.72923588, -0.71769055, ..., -0.71634568,
        -0.27110207, -1.32695589],
       [-0.5922025 , -0.5870866 , -0.58568943, ..., -0.55367353,
         0.00640027,  0.61914632]])

Datetime
2021-09-27 06:03:00    1.0
2021-09-23 11:32:00    1.0
2021-09-14 06:55:00    1.0
2021-09-09 13:16:00    0.0
2021-09-15 12:15:00    1.0
                      ... 
2021-09-09 05:24:00    0.0
2021-09-20 14:48:00    0.0
2021-09-28 14:52:00    1.0
2021-09-24 10:57:00    1.0
2021-09-24 07:55:00    0.0
Name: Trade Signal, Length: 9391, dtype: float64

In [52]:
# MODEL
from sklearn.svm import SVC
# from sklearn.svm import SVR

# Instantiate a linear SVM model
model = SVC(kernel='linear')
# model = LogisticRegression()
model

SVC(kernel='linear')

In [53]:
# FIT
# Fit the data
model.fit(X_train_scaled, y_train)

SVC(kernel='linear')

In [54]:
# PREDICT
# Make predictions using the test data
y_pred = model.predict(X_test_scaled)

results = pd.DataFrame(
    {
        "Predictions": y_pred,
        "Actual":y_test
    }
)
results

Unnamed: 0_level_0,Predictions,Actual
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09-08 06:31:00,0.0,0.0
2021-09-08 09:32:00,0.0,0.0
2021-09-27 05:30:00,1.0,1.0
2021-09-29 12:08:00,0.0,0.0
2021-09-20 03:31:00,1.0,1.0
...,...,...
2021-09-29 18:46:00,1.0,1.0
2021-09-24 10:50:00,0.0,0.0
2021-09-13 11:20:00,1.0,1.0
2021-09-30 09:00:00,1.0,1.0


In [55]:
# EVALUATE
# Generate confusion matrix
print(confusion_matrix(y_test, y_pred))

[[1845    0]
 [   0 1286]]


In [56]:
# Generate Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      1845
         1.0       1.00      1.00      1.00      1286

    accuracy                           1.00      3131
   macro avg       1.00      1.00      1.00      3131
weighted avg       1.00      1.00      1.00      3131

