In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
market_data = pd.read_csv("./data/Bitcoin Price Monthly 2022 to 2010.csv")
graph_parameters= pd.read_csv("./data/monthlyparameters.csv")

In [5]:
Y = []
threshold=3
for i in market_data["Change %"][1:]:
    if abs(i)>threshold:
        if i>0:
            Y.append(1)
        else:
            Y.append(-1)
    else:
        Y.append(0)

In [6]:
dataframes_with_graphs=[]
score_dfs_with_graphs=[]
for i in range(market_data.shape[0]-graph_parameters.shape[0]):
    dataframes_with_graphs.append(((pd.concat([market_data.iloc[i:i+graph_parameters.shape[0],1:].reset_index()\
                                               .drop("index",axis=1), graph_parameters], axis=1))\
                      ,Y[i:i+graph_parameters.shape[0]]))
    
for i in range(len(dataframes_with_graphs)):
    x, y = dataframes_with_graphs[i]
    date = market_data["Date "][i]
    scaler = StandardScaler()
    scaled_x = scaler.fit_transform(x)
    x_train, x_test, y_train, y_test = train_test_split(scaled_x, y, test_size=0.33, random_state=42)
    
    svm_model           = svm.SVC()
    stockhastic_model   = SGDClassifier(loss="hinge", penalty="l2", max_iter=5)
    knn_model           = KNeighborsClassifier(n_neighbors=3)
    decisiontree_model  = DecisionTreeClassifier()
    random_forest_model = RandomForestClassifier(max_depth=15, random_state=0)
    
    svm_model.fit(x_train, y_train)
    stockhastic_model.fit(x_train, y_train)
    knn_model.fit(x_train, y_train)
    decisiontree_model.fit(x_train, y_train)
    random_forest_model.fit(x_train, y_train)
    
    prediction_svm_model          = svm_model.predict(x_test)
    prediction_stockhastic_model  = stockhastic_model.predict(x_test)
    prediction_knn_model          = knn_model.predict(x_test)
    prediction_decisiontree_model = decisiontree_model.predict(x_test)
    prediction_random_forest_model= random_forest_model.predict(x_test)
    
    f1_score_svm_model           = f1_score(y_test, prediction_svm_model,          average='weighted')
    f1_score_stockhastic_model   = f1_score(y_test, prediction_stockhastic_model,  average='weighted')
    f1_score_knn_model           = f1_score(y_test, prediction_knn_model,          average='weighted')
    f1_score_decisiontree_model  = f1_score(y_test, prediction_decisiontree_model, average='weighted')
    f1_score_random_forest_model = f1_score(y_test, prediction_random_forest_model,average="weighted")
    
    precision_score_svm_model           = precision_score(y_test, prediction_svm_model          , average='weighted')
    precision_score_stockhastic_model   = precision_score(y_test, prediction_stockhastic_model  , average='weighted')
    precision_score_knn_model           = precision_score(y_test, prediction_knn_model          , average='weighted')
    precision_score_decisiontree_model  = precision_score(y_test, prediction_decisiontree_model , average='weighted')
    precision_sscore_random_forest_model= precision_score(y_test, prediction_random_forest_model, average="weighted")
    
    accuracy_score_svm_model           = accuracy_score(y_test, prediction_svm_model          )
    accuracy_score_stockhastic_model   = accuracy_score(y_test, prediction_stockhastic_model  )
    accuracy_score_knn_model           = accuracy_score(y_test, prediction_knn_model          )
    accuracy_score_decisiontree_model  = accuracy_score(y_test, prediction_decisiontree_model )
    accuracy_score_random_forest_model = accuracy_score(y_test, prediction_random_forest_model)
    
    scores= [[f1_score_svm_model, precision_score_svm_model, accuracy_score_svm_model]\
            ,[f1_score_stockhastic_model, precision_score_stockhastic_model, accuracy_score_stockhastic_model]\
            ,[f1_score_knn_model, precision_score_knn_model, accuracy_score_knn_model]\
            ,[f1_score_decisiontree_model, precision_score_decisiontree_model, accuracy_score_decisiontree_model]\
            ,[f1_score_random_forest_model,precision_sscore_random_forest_model, accuracy_score_random_forest_model]]
    
    temp_df = pd.DataFrame(scores)
    temp_df.columns = ["F1", "Precision", "Accuracy"]
    temp_df = temp_df.T
    temp_df.columns=["SVM_{}".format(date), "Stockhastic_{}".format(date), "KNN_{}".format(date), "DecisionTree_{}".format(date), "RandomForest_{}".format(date)]
    score_dfs_with_graphs.append(temp_df.T)

In [7]:
dataframes_without_graphs=[]
score_dfs_without_graphs=[]
for i in range(market_data.shape[0]-graph_parameters.shape[0]):
    dataframes_without_graphs.append((market_data.iloc[i:i+graph_parameters.shape[0],1:].reset_index()\
                                      .drop("index",axis=1),\
                                      Y[i:i+graph_parameters.shape[0]]))
    
for i in range(len(dataframes_without_graphs)):
    x, y = dataframes_without_graphs[i]
    date = market_data["Date "][i]
    scaler = StandardScaler()
    scaled_x = scaler.fit_transform(x)
    x_train, x_test, y_train, y_test = train_test_split(scaled_x, y, test_size=0.33, random_state=42)
    
    svm_model           = svm.SVC()
    stockhastic_model   = SGDClassifier(loss="hinge", penalty="l2", max_iter=5)
    knn_model           = KNeighborsClassifier(n_neighbors=3)
    decisiontree_model  = DecisionTreeClassifier()
    random_forest_model = RandomForestClassifier(max_depth=15, random_state=0)
    
    svm_model.fit(x_train, y_train)
    stockhastic_model.fit(x_train, y_train)
    knn_model.fit(x_train, y_train)
    decisiontree_model.fit(x_train, y_train)
    random_forest_model.fit(x_train, y_train)
    
    prediction_svm_model          = svm_model.predict(x_test)
    prediction_stockhastic_model  = stockhastic_model.predict(x_test)
    prediction_knn_model          = knn_model.predict(x_test)
    prediction_decisiontree_model = decisiontree_model.predict(x_test)
    prediction_random_forest_model= random_forest_model.predict(x_test)
    
    f1_score_svm_model           = f1_score(y_test, prediction_svm_model,          average='weighted')
    f1_score_stockhastic_model   = f1_score(y_test, prediction_stockhastic_model,  average='weighted')
    f1_score_knn_model           = f1_score(y_test, prediction_knn_model,          average='weighted')
    f1_score_decisiontree_model  = f1_score(y_test, prediction_decisiontree_model, average='weighted')
    f1_score_random_forest_model = f1_score(y_test, prediction_random_forest_model,average="weighted")
    
    precision_score_svm_model           = precision_score(y_test, prediction_svm_model          , average='weighted')
    precision_score_stockhastic_model   = precision_score(y_test, prediction_stockhastic_model  , average='weighted')
    precision_score_knn_model           = precision_score(y_test, prediction_knn_model          , average='weighted')
    precision_score_decisiontree_model  = precision_score(y_test, prediction_decisiontree_model , average='weighted')
    precision_score_random_forest_model= precision_score(y_test, prediction_random_forest_model, average="weighted")
    
    accuracy_score_svm_model           = accuracy_score(y_test, prediction_svm_model          )
    accuracy_score_stockhastic_model   = accuracy_score(y_test, prediction_stockhastic_model  )
    accuracy_score_knn_model           = accuracy_score(y_test, prediction_knn_model          )
    accuracy_score_decisiontree_model  = accuracy_score(y_test, prediction_decisiontree_model )
    accuracy_score_random_forest_model = accuracy_score(y_test, prediction_random_forest_model)
    
    scores= [[f1_score_svm_model, precision_score_svm_model, accuracy_score_svm_model]\
            ,[f1_score_stockhastic_model, precision_score_stockhastic_model, accuracy_score_stockhastic_model]\
            ,[f1_score_knn_model, precision_score_knn_model, accuracy_score_knn_model]\
            ,[f1_score_decisiontree_model, precision_score_decisiontree_model, accuracy_score_decisiontree_model]\
            ,[f1_score_random_forest_model,precision_score_random_forest_model, accuracy_score_random_forest_model]]
    
    temp_df = pd.DataFrame(scores)
    temp_df.columns = ["F1", "Precision", "Accuracy"]
    temp_df = temp_df.T
    temp_df.columns=["SVM_{}".format(date), "Stockhastic_{}".format(date), "KNN_{}".format(date), "DecisionTree_{}".format(date), "RandomForest_{}".format(date)]
    score_dfs_without_graphs.append(temp_df.T)

In [8]:
full_df_with_graphs = pd.concat(score_dfs_with_graphs)

In [9]:
full_df_without_graphs = pd.concat(score_dfs_without_graphs)

In [10]:
full_df_with_graphs

Unnamed: 0,F1,Precision,Accuracy
SVM_Aug 2010,0.294425,0.215561,0.464286
Stockhastic_Aug 2010,0.476352,0.477679,0.500000
KNN_Aug 2010,0.523810,0.487395,0.571429
DecisionTree_Aug 2010,0.277956,0.276042,0.285714
RandomForest_Aug 2010,0.457023,0.449675,0.535714
...,...,...,...
SVM_Apr 2015,0.468894,0.443277,0.500000
Stockhastic_Apr 2015,0.535427,0.536859,0.535714
KNN_Apr 2015,0.504969,0.477679,0.535714
DecisionTree_Apr 2015,0.429762,0.437004,0.428571


In [11]:
full_df_without_graphs

Unnamed: 0,F1,Precision,Accuracy
SVM_Aug 2010,0.294425,0.215561,0.464286
Stockhastic_Aug 2010,0.334135,0.276107,0.428571
KNN_Aug 2010,0.475000,0.464052,0.500000
DecisionTree_Aug 2010,0.573696,0.578571,0.571429
RandomForest_Aug 2010,0.439076,0.439626,0.500000
...,...,...,...
SVM_Apr 2015,0.333333,0.250000,0.500000
Stockhastic_Apr 2015,0.420551,0.446429,0.500000
KNN_Apr 2015,0.471646,0.496584,0.535714
DecisionTree_Apr 2015,0.360504,0.327381,0.428571


In [12]:
full_df_with_graphs.describe()

Unnamed: 0,F1,Precision,Accuracy
count,285.0,285.0,285.0
mean,0.458248,0.467302,0.484962
std,0.0915,0.105405,0.086487
min,0.206767,0.145503,0.214286
25%,0.390909,0.399529,0.428571
50%,0.456583,0.46627,0.5
75%,0.518341,0.536859,0.535714
max,0.695238,0.742857,0.714286


In [13]:
full_df_without_graphs.describe()

Unnamed: 0,F1,Precision,Accuracy
count,285.0,285.0,285.0
mean,0.412101,0.429229,0.447995
std,0.09418,0.116613,0.093504
min,0.139881,0.123626,0.142857
25%,0.345738,0.349817,0.392857
50%,0.415966,0.431502,0.464286
75%,0.484274,0.503759,0.5
max,0.630542,0.761905,0.678571


In [14]:
difference = full_df_with_graphs-full_df_without_graphs

In [15]:
difference.describe()

Unnamed: 0,F1,Precision,Accuracy
count,285.0,285.0,285.0
mean,0.046148,0.038073,0.036967
std,0.114558,0.128811,0.115992
min,-0.366548,-0.516369,-0.357143
25%,-0.012085,-0.02521,-0.035714
50%,0.041016,0.038596,0.035714
75%,0.117658,0.110714,0.107143
max,0.41813,0.435348,0.357143


In [16]:
difference[difference["F1"]>0.3]

Unnamed: 0,F1,Precision,Accuracy
Stockhastic_Jun 2014,0.41813,0.435348,0.321429
Stockhastic_Jul 2014,0.362696,0.265306,0.357143


In [18]:
full_df_without_graphs.iloc[16:20]

Unnamed: 0,F1,Precision,Accuracy
Stockhastic_Nov 2010,0.362637,0.359048,0.464286
KNN_Nov 2010,0.356438,0.329365,0.392857
DecisionTree_Nov 2010,0.411866,0.413165,0.428571
RandomForest_Nov 2010,0.338761,0.309524,0.392857


In [17]:
full_df_with_graphs.iloc[16:20]

Unnamed: 0,F1,Precision,Accuracy
Stockhastic_Nov 2010,0.34127,0.364698,0.321429
KNN_Nov 2010,0.514286,0.495536,0.535714
DecisionTree_Nov 2010,0.400794,0.392857,0.464286
RandomForest_Nov 2010,0.46176,0.441729,0.5


In [19]:
full_df_with_graphs.to_csv("./data/embeddings_and_marketdata results.csv")
full_df_without_graphs.to_csv("./data/marketdata results.csv")