In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
market_data = pd.read_csv("./data/Bitcoin Price Monthly 2022 to 2010.csv")
graph_parameters= pd.read_csv("./data/monthlyparameters.csv")

In [5]:
Y = []
threshold=5
for i in market_data["Change %"][1:]:
    if abs(i)>threshold:
        if i>0:
            Y.append(1)
        else:
            Y.append(-1)
    else:
        Y.append(0)

In [6]:
dataframes=[]
score_dfs=[]
for i in range(market_data.shape[0]-graph_parameters.shape[0]):
    dataframes.append(((pd.concat([market_data.iloc[i:i+graph_parameters.shape[0],1:].reset_index().drop("index",axis=1), graph_parameters], axis=1))\
                      ,Y[i:i+graph_parameters.shape[0]]))

In [7]:
for i in range(len(dataframes)):
    x, y = dataframes[i]
    date = market_data["Date "][i]
    scaler = StandardScaler()
    scaled_x = scaler.fit_transform(x)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
    
    svm_model           = svm.SVC()
    stockhastic_model   = SGDClassifier(loss="hinge", penalty="l2", max_iter=5)
    knn_model           = KNeighborsClassifier(n_neighbors=3)
    decisiontree_model  = DecisionTreeClassifier()
    
    svm_model.fit(x_train, y_train)
    stockhastic_model.fit(x_train, y_train)
    knn_model.fit(x_train, y_train)
    decisiontree_model.fit(x_train, y_train)
    
    prediction_svm_model          = svm_model.predict(x_test)
    prediction_stockhastic_model  = stockhastic_model.predict(x_test)
    prediction_knn_model          = knn_model.predict(x_test)
    prediction_decisiontree_model = decisiontree_model.predict(x_test)
    
    f1_score_svm_model           = f1_score(y_test, prediction_svm_model,          average='weighted')
    f1_score_stockhastic_model   = f1_score(y_test, prediction_stockhastic_model,  average='weighted')
    f1_score_knn_model           = f1_score(y_test, prediction_knn_model,          average='weighted')
    f1_score_decisiontree_model  = f1_score(y_test, prediction_decisiontree_model, average='weighted')
    
    precision_score_svm_model          = precision_score(y_test, prediction_svm_model          , average='weighted')
    precision_score_stockhastic_model  = precision_score(y_test, prediction_stockhastic_model  , average='weighted')
    precision_score_knn_model          = precision_score(y_test, prediction_knn_model          , average='weighted')
    precision_score_decisiontree_model = precision_score(y_test, prediction_decisiontree_model , average='weighted')
    
    accuracy_score_svm_model           = accuracy_score(y_test, prediction_svm_model          )
    accuracy_score_stockhastic_model   = accuracy_score(y_test, prediction_stockhastic_model  )
    accuracy_score_knn_model           = accuracy_score(y_test, prediction_knn_model          )
    accuracy_score_decisiontree_model  = accuracy_score(y_test, prediction_decisiontree_model )
    
    scores= [[f1_score_svm_model, precision_score_svm_model, accuracy_score_svm_model]\
            ,[f1_score_stockhastic_model, precision_score_stockhastic_model, accuracy_score_stockhastic_model]\
            ,[f1_score_knn_model, precision_score_knn_model, accuracy_score_knn_model]\
            ,[f1_score_decisiontree_model, precision_score_decisiontree_model, accuracy_score_decisiontree_model]]
    
    temp_df = pd.DataFrame(scores)
    temp_df.columns = ["F1", "Precision", "Accuracy"]
    temp_df = temp_df.T
    temp_df.columns=["SVM_{}".format(date), "Stockhastic_{}".format(date), "KNN_{}".format(date), "DecisionTree_{}".format(date)]
    score_dfs.append(temp_df.T)

In [8]:
full_df = pd.concat(score_dfs)

In [9]:
full_df

Unnamed: 0,F1,Precision,Accuracy
SVM_Aug 2010,0.294425,0.215561,0.464286
Stockhastic_Aug 2010,0.294425,0.215561,0.464286
KNN_Aug 2010,0.377398,0.403571,0.392857
DecisionTree_Aug 2010,0.443713,0.563492,0.464286
SVM_Sep 2010,0.257143,0.183673,0.428571
...,...,...,...
DecisionTree_Mar 2015,0.224954,0.270238,0.250000
SVM_Apr 2015,0.333333,0.250000,0.500000
Stockhastic_Apr 2015,0.156371,0.103316,0.321429
KNN_Apr 2015,0.483295,0.440222,0.535714


In [10]:
full_df.describe()

Unnamed: 0,F1,Precision,Accuracy
count,228.0,228.0,228.0
mean,0.342246,0.325065,0.420426
std,0.141369,0.167889,0.125614
min,0.002463,0.001276,0.035714
25%,0.275454,0.215561,0.357143
50%,0.367178,0.331349,0.428571
75%,0.432984,0.44182,0.5
max,0.653216,0.722619,0.678571


In [11]:
full_df[full_df["F1"]>0.60]

Unnamed: 0,F1,Precision,Accuracy
DecisionTree_Oct 2012,0.601814,0.599206,0.607143
KNN_May 2014,0.653216,0.703297,0.642857


In [12]:
full_df.iloc[180:184][:]

Unnamed: 0,F1,Precision,Accuracy
SVM_May 2014,0.373754,0.28699,0.535714
Stockhastic_May 2014,0.156371,0.103316,0.321429
KNN_May 2014,0.653216,0.703297,0.642857
DecisionTree_May 2014,0.372565,0.359694,0.392857
