In [163]:
import sqlalchemy
import pandas as pd


In [164]:
db_connection_string = 'sqlite:///./Resources/products.db'
engine  = sqlalchemy.create_engine(db_connection_string)

inspector = sqlalchemy.inspect(engine)
table_names = inspector.get_table_names()
print(table_names)

['Indicators', 'MARA_1_Day_Candles', 'MARA_1_Min_Candles', 'MARA_Info', 'RIOT_1_Day_Candles', 'RIOT_1_Min_Candles', 'RIOT_Info', 'TSLA_1_Day_Candles', 'TSLA_1_Min_Candles', 'TSLA_Info', 'WHD_1_Day_Candles', 'WHD_1_Min_Candles', 'WHD_Info']


In [165]:
indicators_df = pd.read_sql_table('Indicators', con=engine, index_col='Datetime')

In [166]:
print(indicators_df.head())

                      Open   High    Low  Close  Volume  CDLDOJI  \
Datetime                                                           
2021-09-02 16:45:00  42.62  42.62  42.62  42.62     120      100   
2021-09-02 16:52:00  42.60  42.60  42.60  42.60     150      100   
2021-09-02 16:55:00  42.58  42.58  42.58  42.58     169      100   
2021-09-02 16:57:00  42.59  42.60  42.59  42.60     334        0   
2021-09-02 17:00:00  42.60  42.60  42.60  42.60     337      100   

                     Trailing Stop  Trade Signal  Pct Change  Stop Loss  
Datetime                                                                 
2021-09-02 16:45:00      42.528291           1.0    0.000939  42.528291  
2021-09-02 16:52:00      42.511270           1.0   -0.000469  42.528291  
2021-09-02 16:55:00      42.494036           1.0   -0.000469  42.511270  
2021-09-02 16:57:00      42.516605           0.0    0.000470  42.516605  
2021-09-02 17:00:00      42.522562           1.0    0.000000   0.000000  


In [167]:
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import balanced_accuracy_score
from imblearn.metrics import classification_report_imbalanced

In [186]:
#Segment features from the trade signal
df = indicators_df.copy()
# X = df.drop(columns=['Trade Signal', 'Open', 'High', 'Low', 'Trailing Stop', 'Pct Change'])
X = df.drop(columns=['Trade Signal'])
y = df["Trade Signal"]

In [187]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [188]:
# import StandardScaler
from sklearn.preprocessing import StandardScaler

# Scale the data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [189]:
X_train

Unnamed: 0_level_0,Open,High,Low,Close,Volume,CDLDOJI,Trailing Stop,Pct Change,Stop Loss
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-09-28 14:43:00,32.965,33.00,32.900,32.91,15366,0,32.741181,-0.001820,0.000000
2021-09-15 13:12:00,36.330,36.38,36.320,36.38,7812,0,36.204083,0.002204,36.204083
2021-09-16 08:21:00,36.820,36.82,36.800,36.80,4793,0,36.693991,0.000000,0.000000
2021-09-30 12:58:00,31.630,31.63,31.600,31.62,10787,0,31.492420,-0.000316,31.498375
2021-09-13 09:03:00,35.260,35.29,34.770,34.83,162095,0,34.123845,-0.012195,0.000000
...,...,...,...,...,...,...,...,...,...
2021-09-08 03:09:00,38.250,38.25,38.200,38.20,3196,0,37.875909,-0.000262,37.870595
2021-09-24 08:19:00,34.950,34.95,34.950,34.95,534,100,34.836358,-0.001429,0.000000
2021-09-28 13:44:00,33.490,33.50,33.485,33.50,5733,0,33.349050,0.001195,33.349050
2021-09-13 11:32:00,35.610,35.62,35.560,35.61,9455,100,35.426963,-0.001122,0.000000


In [190]:
# MODEL
from sklearn.svm import SVC

# Instantiate a linear SVM model
svm_model = SVC(kernel='linear')
svm_model

SVC(kernel='linear')

In [191]:
# FIT
# Fit the data
svm_model.fit(X_train_scaled, y_train)

SVC(kernel='linear')

In [192]:
# PREDICT
# Make predictions using the test data
y_pred = svm_model.predict(X_test_scaled)

results = pd.DataFrame(
    {
        "Predictions": y_pred,
        "Actual":y_test
    }
)
results

Unnamed: 0_level_0,Predictions,Actual
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09-29 10:36:00,0.0,0.0
2021-09-13 13:30:00,0.0,0.0
2021-09-29 08:21:00,0.0,0.0
2021-09-29 11:09:00,0.0,0.0
2021-09-27 18:23:00,1.0,1.0
...,...,...
2021-09-28 05:16:00,1.0,1.0
2021-09-20 10:47:00,0.0,0.0
2021-09-09 10:24:00,1.0,1.0
2021-09-10 15:47:00,1.0,1.0


In [193]:
# EVALUATE
# Generate confusion matrix
print(confusion_matrix(y_test, y_pred))

[[2157    0]
 [   0  751]]


In [194]:
# Generate Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      2157
         1.0       1.00      1.00      1.00       751

    accuracy                           1.00      2908
   macro avg       1.00      1.00      1.00      2908
weighted avg       1.00      1.00      1.00      2908

