### Build a Neural Network Classifier to Predict Whether to Buy or Sell a Cryptocurrency

Scrape crypto data using Yfinance

In [2]:
import yfinance
import numpy as np
import pandas as pd

symbol = "BTC-USD"
start = "2014-01-01"
end = "2023-09-30"
df = yfinance.download(symbol, start, end)
df


[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200
2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700
2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600
2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100
...,...,...,...,...,...,...
2023-09-25,26253.775391,26421.507812,26011.468750,26298.480469,26298.480469,11997833257
2023-09-26,26294.757812,26389.884766,26090.712891,26217.250000,26217.250000,9985498161
2023-09-27,26209.498047,26817.841797,26111.464844,26352.716797,26352.716797,11718380997
2023-09-28,26355.812500,27259.500000,26327.322266,27021.546875,27021.546875,14079002707


In [3]:
df.to_csv("btc_2014-23.csv")

### Create more Features

In [4]:
BUY = 1
SELL = 0

df["Buy or Sell (Adj Close)"] = np.where(df["Adj Close"].shift(-1) >df["Adj Close"],
                                         BUY,
                                         SELL)
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Buy or Sell (Adj Close)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800,0
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200,0
2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700,1
2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600,0
2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100,1
...,...,...,...,...,...,...,...
2023-09-25,26253.775391,26421.507812,26011.468750,26298.480469,26298.480469,11997833257,0
2023-09-26,26294.757812,26389.884766,26090.712891,26217.250000,26217.250000,9985498161,1
2023-09-27,26209.498047,26817.841797,26111.464844,26352.716797,26352.716797,11718380997,1
2023-09-28,26355.812500,27259.500000,26327.322266,27021.546875,27021.546875,14079002707,0


In [5]:
df["Buy or Sell (Open)"] = np.where(df["Open"].shift(-1) >df["Open"],
                                         BUY,
                                         SELL)
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Buy or Sell (Adj Close),Buy or Sell (Open)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800,0,0
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200,0,0
2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700,1,0
2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600,0,1
2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100,1,0
...,...,...,...,...,...,...,...,...
2023-09-25,26253.775391,26421.507812,26011.468750,26298.480469,26298.480469,11997833257,0,1
2023-09-26,26294.757812,26389.884766,26090.712891,26217.250000,26217.250000,9985498161,1,0
2023-09-27,26209.498047,26817.841797,26111.464844,26352.716797,26352.716797,11718380997,1,1
2023-09-28,26355.812500,27259.500000,26327.322266,27021.546875,27021.546875,14079002707,0,1


Calculate amount of Returns each day

In [6]:
#percentage change in Adj Close
df["Returns"] = df["Adj Close"].pct_change()
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Buy or Sell (Adj Close),Buy or Sell (Open),Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800,0,0,
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200,0,0,-0.071926
2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700,1,0,-0.069843
2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600,0,1,0.035735
2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100,1,0,-0.024659
...,...,...,...,...,...,...,...,...,...
2023-09-25,26253.775391,26421.507812,26011.468750,26298.480469,26298.480469,11997833257,0,1,0.001586
2023-09-26,26294.757812,26389.884766,26090.712891,26217.250000,26217.250000,9985498161,1,0,-0.003089
2023-09-27,26209.498047,26817.841797,26111.464844,26352.716797,26352.716797,11718380997,1,1,0.005167
2023-09-28,26355.812500,27259.500000,26327.322266,27021.546875,27021.546875,14079002707,0,1,0.025380


Calculate whether Volume Increases or Decreases

In [7]:
INCREASE = 1
DECREASE = 0

df["Volume Increase or Decrease"] = np.where(df["Volume"].shift(-1) > df["Volume"],
                                             INCREASE,
                                             DECREASE)
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Buy or Sell (Adj Close),Buy or Sell (Open),Returns,Volume Increase or Decrease
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800,0,0,,1
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200,0,0,-0.071926,1
2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700,1,0,-0.069843,0
2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600,0,1,0.035735,0
2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100,1,0,-0.024659,0
...,...,...,...,...,...,...,...,...,...,...
2023-09-25,26253.775391,26421.507812,26011.468750,26298.480469,26298.480469,11997833257,0,1,0.001586,0
2023-09-26,26294.757812,26389.884766,26090.712891,26217.250000,26217.250000,9985498161,1,0,-0.003089,1
2023-09-27,26209.498047,26817.841797,26111.464844,26352.716797,26352.716797,11718380997,1,1,0.005167,1
2023-09-28,26355.812500,27259.500000,26327.322266,27021.546875,27021.546875,14079002707,0,1,0.025380,0


### Drop NaN Values

In [8]:
df = df.dropna()
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Buy or Sell (Adj Close),Buy or Sell (Open),Returns,Volume Increase or Decrease
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200,0,0,-0.071926,1
2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700,1,0,-0.069843,0
2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600,0,1,0.035735,0
2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100,1,0,-0.024659,0
2014-09-22,399.100006,406.915985,397.130005,402.152008,402.152008,24127600,1,1,0.008352,1
...,...,...,...,...,...,...,...,...,...,...
2023-09-25,26253.775391,26421.507812,26011.468750,26298.480469,26298.480469,11997833257,0,1,0.001586,0
2023-09-26,26294.757812,26389.884766,26090.712891,26217.250000,26217.250000,9985498161,1,0,-0.003089,1
2023-09-27,26209.498047,26817.841797,26111.464844,26352.716797,26352.716797,11718380997,1,1,0.005167,1
2023-09-28,26355.812500,27259.500000,26327.322266,27021.546875,27021.546875,14079002707,0,1,0.025380,0


### Train-test Split and

In [10]:
y = df["Buy or Sell (Adj Close)"]
y

Date
2014-09-18    0
2014-09-19    1
2014-09-20    0
2014-09-21    1
2014-09-22    1
             ..
2023-09-25    0
2023-09-26    1
2023-09-27    1
2023-09-28    0
2023-09-29    0
Name: Buy or Sell (Adj Close), Length: 3299, dtype: int32

In [11]:
X = df.drop(["Buy or Sell (Adj Close)"], axis=1)
X

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Buy or Sell (Open),Returns,Volume Increase or Decrease
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200,0,-0.071926,1
2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700,0,-0.069843,0
2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600,1,0.035735,0
2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100,0,-0.024659,0
2014-09-22,399.100006,406.915985,397.130005,402.152008,402.152008,24127600,1,0.008352,1
...,...,...,...,...,...,...,...,...,...
2023-09-25,26253.775391,26421.507812,26011.468750,26298.480469,26298.480469,11997833257,1,0.001586,0
2023-09-26,26294.757812,26389.884766,26090.712891,26217.250000,26217.250000,9985498161,0,-0.003089,1
2023-09-27,26209.498047,26817.841797,26111.464844,26352.716797,26352.716797,11718380997,1,0.005167,1
2023-09-28,26355.812500,27259.500000,26327.322266,27021.546875,27021.546875,14079002707,1,0.025380,0


In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y)
X_train.shape,y_train.shape

((2474, 9), (2474,))

In [14]:
X_test.shape, y_test.shape

((825, 9), (825,))

#### scaling the data

In [15]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [16]:
scaler.fit(X_train)

In [17]:
X_train = scaler.transform(X_train)
X_train

array([[ 1.25974111,  1.27398226,  1.28941782, ...,  0.94645942,
         0.55731103,  1.02621247],
       [-0.27632685, -0.28233945, -0.26456622, ...,  0.94645942,
         0.19308786, -0.97445707],
       [-0.39546398, -0.40284851, -0.3962838 , ..., -1.05656933,
        -0.63790152,  1.02621247],
       ...,
       [-0.31786822, -0.32048776, -0.30416552, ...,  0.94645942,
         0.33719306,  1.02621247],
       [-0.62068225, -0.61980266, -0.6156453 , ...,  0.94645942,
         0.63359386, -0.97445707],
       [ 0.12719267,  0.10529782,  0.02916455, ..., -1.05656933,
        -2.15126971,  1.02621247]])

In [18]:
X_test = scaler.transform(X_test)
X_test

array([[-0.84256748, -0.84261628, -0.8435731 , ..., -1.05656933,
        -0.11836124,  1.02621247],
       [-0.84902333, -0.84893512, -0.85147054, ..., -1.05656933,
        -1.71508897,  1.02621247],
       [-0.39929755, -0.40130397, -0.39427581, ...,  0.94645942,
         0.37182442, -0.97445707],
       ...,
       [ 0.54096626,  0.5167085 ,  0.57262047, ..., -1.05656933,
        -0.0627327 ,  1.02621247],
       [ 0.59154203,  0.58945888,  0.6015143 , ...,  0.94645942,
         0.51747095,  1.02621247],
       [ 0.06232911,  0.0898709 ,  0.0814001 , ...,  0.94645942,
         1.11701188,  1.02621247]])

#### Build a Neural Network Classifier 

Using Multi-layer Perceptron

In [98]:
from sklearn.neural_network import MLPClassifier

NUMBER_OF_NODES = 10
NUMBER_OF_LOOPS = 500

model = MLPClassifier(hidden_layer_sizes= (NUMBER_OF_NODES,
                                           NUMBER_OF_NODES,
                                           NUMBER_OF_NODES),
                                           activation="relu",
                                           random_state=0,
                                           learning_rate="adaptive",
                                           max_iter=NUMBER_OF_LOOPS)
model

In [99]:
model.fit(X_train, y_train)

In [100]:
y_pred = model.predict(X_test)
y_pred

array([1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0,
       1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1,
       1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1,
       0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1,

### Calculate Neural Network Accuracy From Confusion Matrix

In [101]:
from sklearn.metrics import confusion_matrix
conf_matrix = confusion_matrix(y_test, y_pred)
conf_matrix

array([[148, 253],
       [135, 289]], dtype=int64)

In [102]:
conf_matrix.trace()

437

In [103]:
conf_matrix.sum()

825

In [104]:
def calculate_accuracy_score_from_matrix(matrix):
    diagonal_sum = matrix.trace()
    sum_of_all_values = matrix.sum()
    accuracy = diagonal_sum/sum_of_all_values
    return accuracy

In [105]:
calculate_accuracy_score_from_matrix(conf_matrix)

0.5296969696969697

In [106]:
from sklearn.metrics import accuracy_score
accu_score = accuracy_score(y_test, y_pred)
accu_score

0.5296969696969697

In [107]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.52      0.37      0.43       401
           1       0.53      0.68      0.60       424

    accuracy                           0.53       825
   macro avg       0.53      0.53      0.52       825
weighted avg       0.53      0.53      0.52       825

