In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
market_data = pd.read_csv("./data/Bitcoin Price Monthly 2022 to 2010.csv")
graph_parameters= pd.read_csv("./data/monthlyparameters.csv")

In [5]:
Y = []
threshold=3
for i in market_data["Change %"][1:]:
    if abs(i)>threshold:
        if i>0:
            Y.append(1)
        else:
            Y.append(-1)
    else:
        Y.append(0)

In [6]:
dataframes_with_graphs=[]
score_dfs_with_graphs=[]
for i in range(market_data.shape[0]-graph_parameters.shape[0]):
    dataframes_with_graphs.append(((pd.concat([market_data.iloc[i:i+graph_parameters.shape[0],1:].reset_index()\
                                               .drop("index",axis=1), graph_parameters], axis=1))\
                      ,Y[i:i+graph_parameters.shape[0]]))
    
for i in range(len(dataframes_with_graphs)):
    x, y = dataframes_with_graphs[i]
    date = market_data["Date "][i]
    scaler = StandardScaler()
    scaled_x = scaler.fit_transform(x)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
    
    svm_model           = svm.SVC()
    stockhastic_model   = SGDClassifier(loss="hinge", penalty="l2", max_iter=5)
    knn_model           = KNeighborsClassifier(n_neighbors=3)
    decisiontree_model  = DecisionTreeClassifier()
    
    svm_model.fit(x_train, y_train)
    stockhastic_model.fit(x_train, y_train)
    knn_model.fit(x_train, y_train)
    decisiontree_model.fit(x_train, y_train)
    
    prediction_svm_model          = svm_model.predict(x_test)
    prediction_stockhastic_model  = stockhastic_model.predict(x_test)
    prediction_knn_model          = knn_model.predict(x_test)
    prediction_decisiontree_model = decisiontree_model.predict(x_test)
    
    f1_score_svm_model           = f1_score(y_test, prediction_svm_model,          average='weighted')
    f1_score_stockhastic_model   = f1_score(y_test, prediction_stockhastic_model,  average='weighted')
    f1_score_knn_model           = f1_score(y_test, prediction_knn_model,          average='weighted')
    f1_score_decisiontree_model  = f1_score(y_test, prediction_decisiontree_model, average='weighted')
    
    precision_score_svm_model          = precision_score(y_test, prediction_svm_model          , average='weighted')
    precision_score_stockhastic_model  = precision_score(y_test, prediction_stockhastic_model  , average='weighted')
    precision_score_knn_model          = precision_score(y_test, prediction_knn_model          , average='weighted')
    precision_score_decisiontree_model = precision_score(y_test, prediction_decisiontree_model , average='weighted')
    
    accuracy_score_svm_model           = accuracy_score(y_test, prediction_svm_model          )
    accuracy_score_stockhastic_model   = accuracy_score(y_test, prediction_stockhastic_model  )
    accuracy_score_knn_model           = accuracy_score(y_test, prediction_knn_model          )
    accuracy_score_decisiontree_model  = accuracy_score(y_test, prediction_decisiontree_model )
    
    scores= [[f1_score_svm_model, precision_score_svm_model, accuracy_score_svm_model]\
            ,[f1_score_stockhastic_model, precision_score_stockhastic_model, accuracy_score_stockhastic_model]\
            ,[f1_score_knn_model, precision_score_knn_model, accuracy_score_knn_model]\
            ,[f1_score_decisiontree_model, precision_score_decisiontree_model, accuracy_score_decisiontree_model]]
    
    temp_df = pd.DataFrame(scores)
    temp_df.columns = ["F1", "Precision", "Accuracy"]
    temp_df = temp_df.T
    temp_df.columns=["SVM_{}".format(date), "Stockhastic_{}".format(date), "KNN_{}".format(date), "DecisionTree_{}".format(date)]
    score_dfs_with_graphs.append(temp_df.T)

In [7]:
dataframes_without_graphs=[]
score_dfs_without_graphs=[]
for i in range(market_data.shape[0]-graph_parameters.shape[0]):
    dataframes_without_graphs.append((market_data.iloc[i:i+graph_parameters.shape[0],1:].reset_index()\
                                      .drop("index",axis=1),\
                                      Y[i:i+graph_parameters.shape[0]]))
    
for i in range(len(dataframes_without_graphs)):
    x, y = dataframes_without_graphs[i]
    date = market_data["Date "][i]
    scaler = StandardScaler()
    scaled_x = scaler.fit_transform(x)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
    
    svm_model           = svm.SVC()
    stockhastic_model   = SGDClassifier(loss="hinge", penalty="l2", max_iter=5)
    knn_model           = KNeighborsClassifier(n_neighbors=3)
    decisiontree_model  = DecisionTreeClassifier()
    
    svm_model.fit(x_train, y_train)
    stockhastic_model.fit(x_train, y_train)
    knn_model.fit(x_train, y_train)
    decisiontree_model.fit(x_train, y_train)
    
    prediction_svm_model          = svm_model.predict(x_test)
    prediction_stockhastic_model  = stockhastic_model.predict(x_test)
    prediction_knn_model          = knn_model.predict(x_test)
    prediction_decisiontree_model = decisiontree_model.predict(x_test)
    
    f1_score_svm_model           = f1_score(y_test, prediction_svm_model,          average='weighted')
    f1_score_stockhastic_model   = f1_score(y_test, prediction_stockhastic_model,  average='weighted')
    f1_score_knn_model           = f1_score(y_test, prediction_knn_model,          average='weighted')
    f1_score_decisiontree_model  = f1_score(y_test, prediction_decisiontree_model, average='weighted')
    
    precision_score_svm_model          = precision_score(y_test, prediction_svm_model          , average='weighted')
    precision_score_stockhastic_model  = precision_score(y_test, prediction_stockhastic_model  , average='weighted')
    precision_score_knn_model          = precision_score(y_test, prediction_knn_model          , average='weighted')
    precision_score_decisiontree_model = precision_score(y_test, prediction_decisiontree_model , average='weighted')
    
    accuracy_score_svm_model           = accuracy_score(y_test, prediction_svm_model          )
    accuracy_score_stockhastic_model   = accuracy_score(y_test, prediction_stockhastic_model  )
    accuracy_score_knn_model           = accuracy_score(y_test, prediction_knn_model          )
    accuracy_score_decisiontree_model  = accuracy_score(y_test, prediction_decisiontree_model )
    
    scores= [[f1_score_svm_model, precision_score_svm_model, accuracy_score_svm_model]\
            ,[f1_score_stockhastic_model, precision_score_stockhastic_model, accuracy_score_stockhastic_model]\
            ,[f1_score_knn_model, precision_score_knn_model, accuracy_score_knn_model]\
            ,[f1_score_decisiontree_model, precision_score_decisiontree_model, accuracy_score_decisiontree_model]]
    
    temp_df = pd.DataFrame(scores)
    temp_df.columns = ["F1", "Precision", "Accuracy"]
    temp_df = temp_df.T
    temp_df.columns=["SVM_{}".format(date), "Stockhastic_{}".format(date), "KNN_{}".format(date), "DecisionTree_{}".format(date)]
    score_dfs_without_graphs.append(temp_df.T)

In [8]:
full_df_with_graphs = pd.concat(score_dfs_with_graphs)

In [9]:
full_df_without_graphs = pd.concat(score_dfs_without_graphs)

In [10]:
full_df_with_graphs

Unnamed: 0,F1,Precision,Accuracy
SVM_Aug 2010,0.294425,0.215561,0.464286
Stockhastic_Aug 2010,0.221612,0.154337,0.392857
KNN_Aug 2010,0.426965,0.395833,0.464286
DecisionTree_Aug 2010,0.314853,0.321769,0.321429
SVM_Sep 2010,0.257143,0.183673,0.428571
...,...,...,...
DecisionTree_Mar 2015,0.232931,0.313283,0.250000
SVM_Apr 2015,0.333333,0.250000,0.500000
Stockhastic_Apr 2015,0.221612,0.154337,0.392857
KNN_Apr 2015,0.536866,0.508403,0.571429


In [11]:
full_df_without_graphs

Unnamed: 0,F1,Precision,Accuracy
SVM_Aug 2010,0.294425,0.215561,0.464286
Stockhastic_Aug 2010,0.294425,0.215561,0.464286
KNN_Aug 2010,0.426965,0.395833,0.464286
DecisionTree_Aug 2010,0.500683,0.502976,0.500000
SVM_Sep 2010,0.257143,0.183673,0.428571
...,...,...,...
DecisionTree_Mar 2015,0.616071,0.721805,0.642857
SVM_Apr 2015,0.333333,0.250000,0.500000
Stockhastic_Apr 2015,0.221612,0.154337,0.392857
KNN_Apr 2015,0.536866,0.508403,0.571429


In [12]:
full_df_with_graphs.describe()

Unnamed: 0,F1,Precision,Accuracy
count,228.0,228.0,228.0
mean,0.385344,0.375476,0.462563
std,0.131077,0.165401,0.105277
min,0.009524,0.005102,0.071429
25%,0.294425,0.25,0.392857
50%,0.400043,0.398564,0.464286
75%,0.479733,0.494505,0.535714
max,0.67581,0.731481,0.678571


In [13]:
full_df_without_graphs.describe()

Unnamed: 0,F1,Precision,Accuracy
count,228.0,228.0,228.0
mean,0.378721,0.367928,0.456297
std,0.126958,0.163137,0.096916
min,0.035714,0.020408,0.142857
25%,0.292383,0.25,0.392857
50%,0.389608,0.376129,0.464286
75%,0.472042,0.482796,0.535714
max,0.644147,0.738235,0.678571


In [15]:
difference = full_df_with_graphs-full_df_without_graphs

In [16]:
difference.describe()

Unnamed: 0,F1,Precision,Accuracy
count,228.0,228.0,228.0
mean,0.006623,0.007548,0.006266
std,0.097797,0.097281,0.102314
min,-0.383141,-0.408521,-0.464286
25%,0.0,0.0,0.0
50%,0.0,0.0,0.0
75%,0.0,0.0,0.0
max,0.421648,0.378827,0.392857


In [17]:
difference[difference["F1"]>0.3]

Unnamed: 0,F1,Precision,Accuracy
Stockhastic_Dec 2010,0.421648,0.378827,0.392857
Stockhastic_Oct 2012,0.30236,0.265306,0.285714
Stockhastic_Dec 2013,0.30236,0.265306,0.285714


In [31]:
full_df_with_graphs.iloc[16:20]

Unnamed: 0,F1,Precision,Accuracy
SVM_Dec 2010,0.548632,0.460459,0.678571
Stockhastic_Dec 2010,0.548632,0.460459,0.678571
KNN_Dec 2010,0.515152,0.566327,0.5
DecisionTree_Dec 2010,0.600649,0.688776,0.535714


In [32]:
full_df_without_graphs.iloc[16:20]

Unnamed: 0,F1,Precision,Accuracy
SVM_Dec 2010,0.548632,0.460459,0.678571
Stockhastic_Dec 2010,0.126984,0.081633,0.285714
KNN_Dec 2010,0.515152,0.566327,0.5
DecisionTree_Dec 2010,0.396429,0.465201,0.357143


In [33]:
full_df_with_graphs.to_csv("./data/embeddings_and_marketdata results.csv")
full_df_without_graphs.to_csv("./data/marketdata results.csv")