In [1]:
import keras
from keras import layers
from keras.layers.core import Dense, Activation
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

import pandas as pd

from sklearn.model_selection import train_test_split

import numpy as np

from sklearn.metrics import accuracy_score

from keras.layers import LSTM
from sklearn import preprocessing

from sklearn.ensemble import RandomForestClassifier

Using TensorFlow backend.


In [2]:
# Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
def to_xy(df, target):
    result = []
    for x in df.columns:
        if x != target:
            result.append(x)
    # find out the type of the target column.  Is it really this hard? :(
    target_type = df[target].dtypes
    target_type = target_type[0] if hasattr(target_type, '__iter__') else target_type
    # Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
    if target_type in (np.int64, np.int32):
        # Classification
        dummies = pd.get_dummies(df[target])
        return df.as_matrix(result).astype(np.float32), dummies.as_matrix().astype(np.float32)
    else:
        # Regression
        return df.as_matrix(result).astype(np.float32), df.as_matrix([target]).astype(np.float32)
    
# Encode text values to indexes(i.e. [1],[2],[3] for red,green,blue).
def encode_text_index(df, name):
    le = preprocessing.LabelEncoder()
    df[name] = le.fit_transform(df[name])
    return le.classes_

In [3]:
df = pd.read_csv('data/BTC-USD-edited.csv')
df.head()

Unnamed: 0,Date,Value USD,Max 7,Min 7,Mean 7,Change,Mean Change 7,Drop 7,Up 7,Predict,Actual
0,2010-07-22,0.06262,0.08584,0.04951,0.069031,0.00808,-0.000719,3.0,4.0,-1.0,0
1,2010-07-23,0.05454,0.08584,0.0505,0.06975,0.00404,0.005049,2.0,5.0,2.0,0
2,2010-07-24,0.0505,0.0808,0.0505,0.064701,-0.0055,0.003543,3.0,4.0,-1.0,1
3,2010-07-25,0.056,0.07921,0.0505,0.061159,-0.004,0.002106,4.0,3.0,-2.0,1
4,2010-07-26,0.06,0.07921,0.0505,0.059053,0.0011,0.002901,3.0,4.0,2.0,0


In [4]:
df.tail()

Unnamed: 0,Date,Value USD,Max 7,Min 7,Mean 7,Change,Mean Change 7,Drop 7,Up 7,Predict,Actual
2947,2018-08-16,6591.160156,6591.160156,6091.140137,6262.038714,185.450195,-44.938546,4.0,2.0,0.0,0
2948,2018-08-17,6405.709961,6591.160156,6091.140137,6306.97726,-96.470215,-58.720006,5.0,2.0,-2.0,1
2949,2018-08-18,6502.180176,6591.160156,6199.600098,6365.697266,232.280274,-0.957101,4.0,3.0,0.0,0
2950,2018-08-19,6269.899902,6591.160156,6199.600098,6366.654367,-221.209961,-41.644252,5.0,2.0,-2.0,1
2951,2018-08-20,6491.109863,6591.160156,6269.899902,6408.298619,124.97998,-13.129953,4.0,3.0,0.0,0


In [5]:
df2 = pd.read_csv('data/EOD-MSFT-edited.csv')

In [6]:
X = df[['Value USD', 'Max 7', 'Min 7', 'Change', 'Mean Change 7', 'Drop 7', 'Up 7']].values
y = df['Actual'].values

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [10]:
clf = RandomForestClassifier(max_depth=2, random_state=0)
clf.fit(X_train, y_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=2, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

In [11]:
clf.score(X_test, y_test)

0.934010152284264

In [12]:
from sklearn.externals import joblib

In [13]:
joblib.dump(clf, 'data/bitcoin-predictor.pkl')

['data/bitcoin-predictor.pkl']

In [14]:
clf = joblib.load('data/bitcoin-predictor.pkl')

In [15]:
[0.05050, 0.08584, 0.04951, -0.01212, -0.001873, 3.0, 3.0]

[0.0505, 0.08584, 0.04951, -0.01212, -0.001873, 3.0, 3.0]

In [16]:
clf.predict(np.array([0.05050, 0.08584, 0.04951, -0.01212, -0.001873, 3.0, 3.0]).reshape(1, -1))[0]

1

In [15]:
clf.score(X_test, y_test)

0.934010152284264

In [21]:
df.iloc[10]

Date             2010-08-01
Value USD              0.06
Max 7                0.0699
Min 7                0.0589
Mean 7            0.0629214
Change                    0
Mean Change 7             0
Drop 7                    2
Up 7                      4
Predict                  -1
Actual                    1
Name: 10, dtype: object

In [24]:
df[['Value USD', 'Max 7', 'Min 7', 'Change', 'Mean Change 7', 'Drop 7', 'Up 7']].iloc[10].to_list()

[0.06, 0.0699, 0.0589, 0.0, 0.0, 2.0, 4.0]

In [25]:
X2 = df2[['Value USD', 'Max 7', 'Min 7', 'Change', 'Mean Change 7', 'Drop 7', 'Up 7']].values
y2 = df2['Actual'].values

In [26]:
clf.score(X2, y2)

0.9616562538282494

In [27]:
df.drop(['Date'], inplace=True, axis=1)
df.head()

Unnamed: 0,Value USD,Max 7,Min 7,Mean 7,Change,Mean Change 7,Drop 7,Up 7,Predict,Actual
0,0.06262,0.08584,0.04951,0.069031,0.00808,-0.000719,3.0,4.0,-1.0,0
1,0.05454,0.08584,0.0505,0.06975,0.00404,0.005049,2.0,5.0,2.0,0
2,0.0505,0.0808,0.0505,0.064701,-0.0055,0.003543,3.0,4.0,-1.0,1
3,0.056,0.07921,0.0505,0.061159,-0.004,0.002106,4.0,3.0,-2.0,1
4,0.06,0.07921,0.0505,0.059053,0.0011,0.002901,3.0,4.0,2.0,0


In [48]:
X,y = to_xy(df,"Actual")

  


In [49]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [50]:
model = Sequential()
model.add(Dense(80, activation='relu', input_dim=X_train.shape[1]))
model.add(Dense(80, activation='relu'))
model.add(Dense(80, activation='relu'))
model.add(Dense(80, activation='relu'))
model.add(Dense(y_train.shape[1], activation='softmax'))

#model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#model.fit(X_train, y_train, epochs=1000)

model.compile(loss='categorical_crossentropy', optimizer='adam')
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-2, patience=25, verbose=1, mode='auto')
checkpointer = ModelCheckpoint(filepath="best_weights.hdf5", verbose=0, save_best_only=True) # save best model

model.fit(X_train, y_train, validation_data=(X_test, y_test), callbacks=[monitor,checkpointer], verbose=2, epochs=1000)
model.load_weights('best_weights.hdf5') # load weights from best model

Train on 2361 samples, validate on 591 samples
Epoch 1/1000
 - 1s - loss: 5.2333 - val_loss: 6.2240
Epoch 2/1000
 - 0s - loss: 6.2714 - val_loss: 5.8287
Epoch 3/1000
 - 0s - loss: 5.4753 - val_loss: 5.1213
Epoch 4/1000
 - 0s - loss: 5.5735 - val_loss: 5.2229
Epoch 5/1000
 - 0s - loss: 5.4159 - val_loss: 5.8435
Epoch 6/1000
 - 0s - loss: 5.1097 - val_loss: 5.1993
Epoch 7/1000
 - 0s - loss: 5.1239 - val_loss: 6.1216
Epoch 8/1000
 - 0s - loss: 4.5427 - val_loss: 5.6184
Epoch 9/1000
 - 0s - loss: 5.7912 - val_loss: 5.3018
Epoch 10/1000
 - 0s - loss: 5.4892 - val_loss: 5.6501
Epoch 11/1000
 - 0s - loss: 5.5344 - val_loss: 5.0769
Epoch 12/1000
 - 0s - loss: 3.9198 - val_loss: 5.0415
Epoch 13/1000
 - 0s - loss: 5.3313 - val_loss: 7.0359
Epoch 14/1000
 - 1s - loss: 5.7116 - val_loss: 0.2911
Epoch 15/1000
 - 0s - loss: 4.6532 - val_loss: 6.1625
Epoch 16/1000
 - 0s - loss: 6.4403 - val_loss: 1.8754
Epoch 17/1000
 - 0s - loss: 4.4925 - val_loss: 2.2967
Epoch 18/1000
 - 0s - loss: 4.3440 - val_los

In [51]:
model.save('data/amzn-predictor-shuffled.hd5')

In [52]:
pred = model.predict(X_test)

In [53]:
print("Shape: {}".format(pred.shape))
print(pred)

Shape: (591, 2)
[[1.0000000e+00 7.3317275e-21]
 [1.0000000e+00 3.5655733e-21]
 [0.0000000e+00 1.0000000e+00]
 ...
 [0.0000000e+00 1.0000000e+00]
 [8.6648399e-01 1.3351606e-01]
 [2.6517687e-16 1.0000000e+00]]


In [54]:
predict_classes = np.argmax(pred,axis=1)
print("Predictions: {}".format(predict_classes))
print("Expected: {}".format(np.argmax(y_test, axis=1)))
y_test_arg = np.argmax(y_test, axis=1)

Predictions: [0 0 1 1 0 0 1 0 0 0 1 1 1 1 1 0 1 1 0 1 0 0 0 0 0 1 1 0 1 0 0 0 0 1 1 0 0
 0 1 1 1 0 0 0 0 1 1 0 0 0 0 1 0 0 1 0 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1
 0 1 0 1 1 1 0 0 1 1 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 1 1 0 0 0 1 1 1
 1 1 1 1 1 0 0 0 0 1 0 0 1 0 0 1 1 0 0 1 1 0 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0
 1 0 0 0 0 1 1 1 1 0 0 0 1 1 0 1 1 1 1 0 0 0 1 0 0 1 0 1 1 1 1 0 1 0 0 0 1
 0 0 0 1 1 1 1 1 0 0 1 0 0 0 0 1 1 1 1 1 1 0 0 0 1 0 0 1 0 0 0 1 0 0 0 1 1
 0 1 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 1 1 1 0 1 0 0 0 1 1 1 1 1 1 0 1 1 0 1
 1 0 1 1 0 0 0 1 1 1 1 1 1 0 0 1 0 1 0 0 1 0 1 1 0 1 1 0 0 0 0 0 0 1 1 0 1
 0 1 0 1 1 1 1 0 1 1 1 0 1 1 1 1 0 1 1 0 0 1 1 0 1 1 1 0 1 1 0 0 1 1 1 0 0
 1 1 1 1 0 0 0 1 0 0 0 1 1 0 0 1 0 1 0 0 1 1 0 1 1 1 0 1 0 1 0 0 1 0 1 0 0
 0 1 0 1 1 1 1 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 0 0 1 0 1 0 1 1 0 1
 0 1 0 0 0 0 1 1 1 1 0 0 0 1 0 0 0 1 1 1 1 0 1 1 0 1 0 0 1 1 1 0 1 1 1 0 1
 1 1 0 0 1 0 0 0 1 1 0 1 1 0 1 1 0 1 0 1 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 1
 1 0 0 0 1 0

In [55]:
correct = accuracy_score(y_test_arg,predict_classes)
print("Accuracy: {}".format(correct))

Accuracy: 0.9289340101522843


In [58]:
X,y = to_xy(df2,"Actual")

  


In [59]:
pred = model.predict(X)

In [60]:
print("Shape: {}".format(pred.shape))
print(pred)

Shape: (8163, 2)
[[1.0000000e+00 0.0000000e+00]
 [1.0000000e+00 0.0000000e+00]
 [1.0000000e+00 0.0000000e+00]
 ...
 [6.6095157e-10 1.0000000e+00]
 [2.1864238e-10 1.0000000e+00]
 [1.2927927e-01 8.7072074e-01]]


In [61]:
predict_classes = np.argmax(pred,axis=1)
print("Predictions: {}".format(predict_classes))
print("Expected: {}".format(np.argmax(y, axis=1)))
y_arg = np.argmax(y, axis=1)

Predictions: [0 0 0 ... 1 1 1]
Expected: [0 0 1 ... 1 1 0]


In [62]:
correct = accuracy_score(y_arg,predict_classes)
print("Accuracy: {}".format(correct))

Accuracy: 0.5978194291314468


In [63]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [64]:
df3 = pd.read_csv('data/AMZN-edited.csv')
df3.head()

Unnamed: 0,Date,Value USD,Drop 7,Up 7,Min 7,Max 7,Mean 7,Mean Change 7,Change,Predict,Actual
0,05/23/1997,1.5,2.0,5.0,1.395833,1.958333,1.622024,0.053571,-0.083333,-1.0,1
1,05/27/1997,1.583333,2.0,5.0,1.395833,1.729167,1.568452,0.028274,0.052083,2.0,0
2,05/28/1997,1.53125,2.0,5.0,1.395833,1.708333,1.540178,0.029018,0.026042,2.0,0
3,05/29/1997,1.505208,2.0,5.0,1.395833,1.635417,1.511161,0.019345,0.005208,2.0,0
4,05/30/1997,1.5,3.0,4.0,1.395833,1.583333,1.491815,-0.011905,-0.010417,-2.0,1


In [65]:
df3.drop(['Date'], axis=1, inplace=True)

In [66]:
X,y = to_xy(df3,"Actual")

  


In [67]:
pred = model.predict(X)

In [68]:
print("Shape: {}".format(pred.shape))
print(pred)

Shape: (5337, 2)
[[7.1641695e-01 2.8358299e-01]
 [9.9999988e-01 1.5703834e-07]
 [9.9999988e-01 1.7221944e-07]
 ...
 [1.0000000e+00 0.0000000e+00]
 [1.0000000e+00 0.0000000e+00]
 [1.0000000e+00 0.0000000e+00]]


In [69]:
predict_classes = np.argmax(pred,axis=1)
print("Predictions: {}".format(predict_classes))
print("Expected: {}".format(np.argmax(y, axis=1)))
y_arg = np.argmax(y, axis=1)

Predictions: [0 0 0 ... 0 0 0]
Expected: [1 0 0 ... 1 1 1]


In [70]:
correct = accuracy_score(y_arg,predict_classes)
print("Accuracy: {}".format(correct))

Accuracy: 0.49278620948098184
