In [7]:
import requests
import datetime
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.neural_network import MLPClassifier, MLPRegressor
import pickle

In [8]:
def moving_average(x, w):
        moving_avg = np.convolve(x, np.ones(w), 'valid') / w
        padding = np.full_like(np.empty(w), np.nan)
        return np.insert(moving_avg, 0, padding)

def moving_avg_diff(short, long):
        return (short - long) / long


## Binary Directional Prediction

In [9]:
def get_X_y(df, binary_output=True):
    
    min_period = 24*31 + 1
    
    percent_change = (df["Close"] - df["Close"].shift(1)) / df["Close"].shift(1)
    print(percent_change[:5])

    y = percent_change.copy()
    
    print(len(y))
    
    ## TEMP
    if binary_output == True:
        y = np.zeros(percent_change.shape)
        y[np.where(percent_change > 0)] = 1
    else:
        y = y.to_numpy()
    
    print(type(y))

    moving_average_1_hours = moving_average(df["Close"], 1)
    moving_average_2_hours = moving_average(df["Close"], 2)
    moving_average_3_hours = moving_average(df["Close"], 3)
    moving_average_12_hours = moving_average(df["Close"], 12)
    moving_average_24_hours = moving_average(df["Close"], 24)
    moving_average_48_hours = moving_average(df["Close"], 48)
    moving_average_1_week = moving_average(df["Close"], 24*7)
    moving_average_1_month = moving_average(df["Close"], 24*31)
    
    print(len(moving_average_1_month))
    
    mv_1h_2h = moving_avg_diff(moving_average_1_hours, moving_average_2_hours)
    mv_1h_3h = moving_avg_diff(moving_average_1_hours, moving_average_3_hours)
    mv_3h_12h = moving_avg_diff(moving_average_3_hours, moving_average_12_hours)
    mv_12h_24h = moving_avg_diff(moving_average_12_hours, moving_average_24_hours)
    mv_24h_48h = moving_avg_diff(moving_average_24_hours, moving_average_48_hours)
    mv_48h_1w = moving_avg_diff(moving_average_48_hours, moving_average_1_week)
    mv_1w_1m = moving_avg_diff(moving_average_1_week, moving_average_1_month)
    
    print(len(mv_1w_1m))
    
    X = np.stack((mv_1h_2h, mv_1h_3h, mv_3h_12h, mv_12h_24h, mv_24h_48h, mv_48h_1w, mv_1w_1m), axis=1)
    X = X[min_period:len(X)-1]
    y = y[min_period:]
    
    print(len(X), len(y))
    
    return (X, y)

In [5]:
df = pd.read_csv("../data/clean/bitcoin_prices.csv")

X, y = get_X_y(df, binary_output=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)

clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1, max_iter=10000)
clf.fit(X_train, y_train)

0         NaN
1    0.000504
2    0.006687
3    0.003179
4   -0.001887
Name: Close, dtype: float64
47698
<class 'numpy.ndarray'>
47699
47699
46953 46953


MLPClassifier(alpha=1e-05, hidden_layer_sizes=(5, 2), max_iter=10000,
              random_state=1, solver='lbfgs')

In [6]:
y_train_pred = clf.predict(X_train)
print("Train Accuracy:",metrics.accuracy_score(y_train, y_train_pred))

y_test_pred = clf.predict(X_test)
print("Test Accuracy:",metrics.accuracy_score(y_test, y_test_pred))

Train Accuracy: 0.544249475491131
Test Accuracy: 0.5490158115521135


In [7]:
with open('./pickles/svm_binary_classifier', 'wb') as f:
#     pickle.dump(object, f)
    pickle.dump(clf, f)

In [52]:
df = pd.read_csv("../data/clean/bitcoin_prices.csv")

X, y = get_X_y(df, binary_output=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)

clf = MLPClassifier(solver='lbfgs', alpha=1e-7, hidden_layer_sizes=(50), random_state=1, max_iter=10000)
clf.fit(X_train, y_train)

0         NaN
1    0.000504
2    0.006687
3    0.003179
4   -0.001887
Name: Close, dtype: float64
47698
<class 'numpy.ndarray'>
47699
47699
46953 46953


MLPClassifier(alpha=1e-07, hidden_layer_sizes=50, max_iter=10000,
              random_state=1, solver='lbfgs')

In [53]:
y_train_pred = clf.predict(X_train)
print("Train Accuracy:",metrics.accuracy_score(y_train, y_train_pred))

y_test_pred = clf.predict(X_test)
print("Test Accuracy:",metrics.accuracy_score(y_test, y_test_pred))

Train Accuracy: 0.5446945133193465
Test Accuracy: 0.5359148112294289


## Continous Prediction Regression

In [63]:
df = pd.read_csv("../data/clean/bitcoin_prices.csv")

X, y = get_X_y(df, binary_output=False)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)

regr = MLPRegressor(random_state=1, solver='adam', hidden_layer_sizes=(10, 5, 2), max_iter=10000).fit(X_train, y_train)

0         NaN
1    0.000504
2    0.006687
3    0.003179
4   -0.001887
Name: Close, dtype: float64
47698
<class 'numpy.ndarray'>
47699
47699
46953 46953


In [64]:
y_train_pred = regr.predict(X_train)
print("Train +- Accuracy:",metrics.accuracy_score(np.ceil(y_train), np.ceil(y_train_pred)))

y_test_pred = regr.predict(X_test)
print("Test +- Accuracy:",metrics.accuracy_score(np.ceil(y_test), np.ceil(y_test_pred)))

print("Train R^2", regr.score(X_train, y_train))
print("Test R^2", regr.score(X_test, y_test))

print(y_train)
print(y_train_pred)
print(str(np.mean(np.absolute(y_test_pred)) * 100) + "%")
print(str(np.mean(np.absolute(y_test)) * 100) + "%")
print(y_test_pred)
print(y_test)

Train +- Accuracy: 0.5250174836289656
Test +- Accuracy: 0.5314617618586641
Train R^2 -0.007986770990023206
Test R^2 -0.03627920326857126
[-0.00033801 -0.0043956   0.00084904 ... -0.00054595 -0.00089112
  0.00242379]
[0.00052716 0.00060767 0.00084726 ... 0.00014781 0.00011936 0.00011124]
0.051127092138924024%
0.4447695029795137%
[-5.69697358e-05  1.37706646e-04  9.20661407e-05 ...  6.16420547e-04
  1.75036563e-03  1.00509895e-03]
[-0.00205599 -0.00010326  0.00024937 ... -0.01550605  0.00300411
  0.00142161]


In [None]:
with open('./pickles/svm_regressor', 'wb') as f:
    pickle.dump(object, f)

In [65]:
## Best 52.2, 51.2

In [67]:
unique, counts = np.unique(np.ceil(y_test), return_counts=True)
dict(zip(unique, counts))

{-0.0: 7555, 1.0: 7940}

In [39]:
7940/(7555+7940)

0.5124233623749597