In [3]:
import requests
import datetime
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.neural_network import MLPClassifier, MLPRegressor
import pickle

In [4]:
def moving_average(x, w):
        moving_avg = np.convolve(x, np.ones(w), 'valid') / w
        padding = np.full_like(np.empty(w), np.nan)
        return np.insert(moving_avg, 0, padding)

def moving_avg_diff(short, long):
        return (short - long) / long


## Binary Directional Prediction

In [5]:
def get_X_y(df, binary_output=True):
    
    min_period = 30
    
    percent_change = (df["Close"] - df["Close"].shift(1)) / df["Close"].shift(1)
    print(percent_change[:5])

    y = percent_change.copy()
    
    print(len(y))
    
    ## TEMP
    if binary_output == True:
        y = np.zeros(percent_change.shape)
        y[np.where(percent_change > 0)] = 1
    else:
        y = y.to_numpy()
    
    moving_average_1_day = moving_average(df["Close"], 1)
    moving_average_2_day = moving_average(df["Close"], 2)
    moving_average_3_day = moving_average(df["Close"], 3)
    # moving_average_4_day = moving_average(df["Close"], 4)
    # moving_average_5_day = moving_average(df["Close"], 5)
    # moving_average_6_day = moving_average(df["Close"], 6)
    moving_average_7_day = moving_average(df["Close"], 7)
    moving_average_14_day = moving_average(df["Close"], 14)
    moving_average_30_day = moving_average(df["Close"], 30)
    
    mv_1d_2d = moving_avg_diff(moving_average_1_day, moving_average_2_day)
    mv_1d_3d = moving_avg_diff(moving_average_1_day, moving_average_3_day)
    # mv_2d_4d = moving_avg_diff(moving_average_1_day, moving_average_3_day)
    mv_3d_7d = moving_avg_diff(moving_average_3_day, moving_average_7_day)
    mv_7d_14d = moving_avg_diff(moving_average_7_day, moving_average_14_day)
    mv_7d_30d = moving_avg_diff(moving_average_7_day, moving_average_30_day)
        
    X = np.stack((mv_1d_2d, mv_1d_3d, mv_3d_7d, mv_7d_14d, mv_7d_30d), axis=1)
    X = X[min_period:len(X)-1]
    y = y[min_period:]
    
    print(len(X), len(y))
    
    return (X, y)

In [6]:
df = pd.read_csv("../data/clean/bitcoin_prices.csv")
df = df.iloc[::24, :]

X, y = get_X_y(df, binary_output=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)
print(df.iloc[len(X_train)])

clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1, max_iter=10000)
clf.fit(X_train, y_train)

0          NaN
24    0.006691
48   -0.009862
72   -0.001444
96    0.045501
Name: Close, dtype: float64
1988
1958 1958
Unnamed: 0    3.146400e+04
Time          1.550696e+09
Low           3.914130e+03
High          3.939170e+03
Open          3.931390e+03
Close         3.914140e+03
Volume        5.787809e+02
Name: 31464, dtype: float64


MLPClassifier(alpha=1e-05, hidden_layer_sizes=(5, 2), max_iter=10000,
              random_state=1, solver='lbfgs')

In [7]:
y_train_pred = clf.predict(X_train)
print("Train Accuracy:",metrics.accuracy_score(y_train, y_train_pred))

y_test_pred = clf.predict(X_test)
print("Test Accuracy:",metrics.accuracy_score(y_test, y_test_pred))

Train Accuracy: 0.585812356979405
Test Accuracy: 0.5687789799072643


In [8]:
with open('./pickles/neural_net_v2_daily_price', 'wb') as f:
    pickle.dump(clf, f)

In [11]:
df = pd.read_csv("../data/clean/bitcoin_prices.csv")
df = df.iloc[::24, :]

X, y = get_X_y(df, binary_output=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=False)

clf = MLPClassifier(solver='lbfgs', alpha=1e-7, hidden_layer_sizes=(50), random_state=1, max_iter=10000)
clf.fit(X_train, y_train)

0          NaN
24    0.006691
48   -0.009862
72   -0.001444
96    0.045501
Name: Close, dtype: float64
1988
1958 1958


MLPClassifier(alpha=1e-07, hidden_layer_sizes=50, max_iter=10000,
              random_state=1, solver='lbfgs')

In [10]:
y_train_pred = clf.predict(X_train)
print("Train Accuracy:",metrics.accuracy_score(y_train, y_train_pred))

y_test_pred = clf.predict(X_test)
print("Test Accuracy:",metrics.accuracy_score(y_test, y_test_pred))

Train Accuracy: 0.6071700991609459
Test Accuracy: 0.527047913446677
