In [None]:
#imports
import pandas as pd
import os
import numpy as np
import random
from tensorflow import keras
from tensorflow.keras import Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten 
from tensorflow.keras.layers import BatchNormalization, AveragePooling2D 
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import EarlyStopping


In [None]:
#load and transform data
data_path = '/content/data_v2.csv'
df = pd.read_csv(data_path)
num_rows = df.shape[0]
print(df)


     Date(UTC)  UnixTimeStamp  Difficulty  BlockSize  TxFeeUSD   PriceBtcUsd  \
0       8/7/15     1438905600       1.471        633      0.05    278.740997   
1       8/8/15     1438992000       1.586        668      0.02    279.742004   
2       8/9/15     1439078400       1.709        618      0.01    261.115997   
3      8/10/15     1439164800       1.838        631      0.01    265.477997   
4      8/11/15     1439251200       2.036        692      0.00    264.342010   
...        ...            ...         ...        ...       ...           ...   
2456   4/28/22     1651104000   13348.834      90735      2.33  39241.429690   
2457   4/29/22     1651190400   13317.627      88751      1.94  39768.617190   
2458   4/30/22     1651276800   13622.983      85276      1.46  38605.859380   
2459    5/1/22     1651363200   13826.536      87744     24.54  37713.265630   
2460    5/2/22     1651449600   13770.496      86346      1.80  38472.187500   

      ERC20Transfer  ERC20Addr  Gtrends

In [None]:
#Add column for each row indicating whether row + 1 is higher/lower: 
num_cols = len(df.columns) - 1
df['PriceEth_next_day'] = 0 # column 6
df['Increased'] = 1 #column 7
for i in range(0, num_rows - 1):
  curr_Price = df.iat[i,num_cols] #eth price in last column:
  next_day_price = df.iat[i+1,num_cols]
  df.iloc[i,num_cols+1] = next_day_price
  df.iloc[i,num_cols+2] = 1 if next_day_price > curr_Price else 0


#drop last day in csv since we have no comparison about price increase/decrease
df.drop(df.tail(1).index,inplace=True)


In [None]:
print(df)

     Date(UTC)  UnixTimeStamp  Difficulty  BlockSize  TxFeeUSD   PriceBtcUsd  \
0       8/7/15     1438905600       1.471        633      0.05    278.740997   
1       8/8/15     1438992000       1.586        668      0.02    279.742004   
2       8/9/15     1439078400       1.709        618      0.01    261.115997   
3      8/10/15     1439164800       1.838        631      0.01    265.477997   
4      8/11/15     1439251200       2.036        692      0.00    264.342010   
...        ...            ...         ...        ...       ...           ...   
2455   4/27/22     1651017600   13204.400      97534      2.31  38120.300780   
2456   4/28/22     1651104000   13348.834      90735      2.33  39241.429690   
2457   4/29/22     1651190400   13317.627      88751      1.94  39768.617190   
2458   4/30/22     1651276800   13622.983      85276      1.46  38605.859380   
2459    5/1/22     1651363200   13826.536      87744     24.54  37713.265630   

      ERC20Transfer  ERC20Addr  Gtrends

In [None]:
#Global Variables

m = 10
batch_size = 64
epochs = 100


In [None]:
#split into test, train 

#reminder, do not split randomly !! Split based on date

index_of_last_train_example = int(num_rows * 0.6)
index_of_first_train_example2 = int(num_rows *0.8)


#drop next_day price (saved in df in case we run regression)
df = df.drop('PriceEth_next_day', 1)
df = df.drop('Date(UTC)', 1)
df = df.drop('UnixTimeStamp', 1)

#normalize: 
for column in df:
    if column not in ['Increased', 'ERC20Transfer', 'ERC20Addr','GtrendsEth']:
      df[column] = (df[column] / df[column][0]) - 1


  # Remove the CWD from sys.path while we load stuff.
  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':


In [None]:
print(df)

       Difficulty   BlockSize  TxFeeUSD  PriceBtcUsd  ERC20Transfer  \
0        0.000000    0.000000       0.0     0.000000              0   
1        0.078178    0.055292      -0.6     0.003591              0   
2        0.161795   -0.023697      -0.8    -0.063231              0   
3        0.249490   -0.003160      -0.8    -0.047582              0   
4        0.384092    0.093207      -1.0    -0.051657              0   
...           ...         ...       ...          ...            ...   
2455  8975.478586  153.082148      45.2   135.758859         596398   
2456  9073.666213  142.341232      45.6   139.780976         639161   
2457  9052.451394  139.206951      37.8   141.672293         704970   
2458  9260.035350  133.717220      28.2   137.500830         702661   
2459  9398.412644  137.616114     489.8   134.298596         530086   

      ERC20Addr  GtrendsEth      Gold     SP500  TickerRIOT  TickerMSTR  \
0             0           0  0.000000  0.000000    0.000000    0.000000 

In [None]:
print(df.groupby(['Increased']).count())

#dataset is relatively balanced! 1216 vs 1244

           Difficulty  BlockSize  TxFeeUSD  PriceBtcUsd  ERC20Transfer  \
Increased                                                                
0                1216       1216      1216         1216           1216   
1                1244       1244      1244         1244           1244   

           ERC20Addr  GtrendsEth  Gold  SP500  TickerRIOT  TickerMSTR  \
Increased                                                               
0               1216        1216  1216   1216        1216        1216   
1               1244        1244  1244   1244        1244        1244   

           PriceEthUsd  
Increased               
0                 1216  
1                 1244  


In [None]:
#Create sequences, convert to numpy (training)

train_values_arr = []
train_values_y = []

#create sequences of size m

for i in range(0, index_of_last_train_example - m): 
  sequence_indices = range(i, i+m)
  sequence = df.iloc[sequence_indices]
  sequence = sequence.drop('Increased', 1)
  seq_as_numpy = sequence.to_numpy()

  train_values_arr.append(seq_as_numpy)

  #Get y value for this sequence
  m_val_for_seq = int(df.iloc[i+m]['Increased'])
  train_values_y.append(m_val_for_seq)


for i in range(index_of_first_train_example2, len(df.index) - m): 
  sequence_indices = range(i, i+m)
  sequence = df.iloc[sequence_indices]
  sequence = sequence.drop('Increased', 1)
  seq_as_numpy = sequence.to_numpy()

  train_values_arr.append(seq_as_numpy)

  #Get y value for this sequence
  m_val_for_seq = int(df.iloc[i+m]['Increased'])
  train_values_y.append(m_val_for_seq)

train_x_np = np.array(train_values_arr)
train_y_np = np.array(train_values_y)

num_features = len(df.columns)

  # This is added back by InteractiveShellApp.init_path()


In [None]:
test_values_arr = []
test_values_y = []
test_buy = 0
#create sequences of size m

for i in range(index_of_last_train_example, index_of_first_train_example2): 
  sequence_indices = range(i, i+m)
  sequence = df.iloc[sequence_indices]
  sequence = sequence.drop('Increased', 1)
  seq_as_numpy = sequence.to_numpy()

  test_values_arr.append(seq_as_numpy)

  #Get y value for this sequence
  m_val_for_seq = int(df.iloc[i+m]['Increased'])
  if m_val_for_seq == 1: 
    test_buy = test_buy + 1

  test_values_y.append(m_val_for_seq)

test_x_np = np.array(test_values_arr)
test_y_np = np.array(test_values_y)

num_features = len(df.columns)

  if __name__ == '__main__':


In [None]:
#Part 2 

#DNN:
model = Sequential()
model.add(Flatten(input_shape=(m, num_features - 1, 1)))
model.add(Dense(8*m , activation='relu'))
model.add(Dense(8*m, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])



In [None]:
hist = model.fit(train_x_np, train_y_np, epochs=epochs, batch_size=batch_size, validation_data=[test_x_np,test_y_np])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
np.savetxt("dnn_val_acc_" + str(m) + ".csv", hist.history['val_accuracy'], delimiter=',')
np.savetxt("dnn_acc_" + str(m) + ".csv", hist.history['accuracy'], delimiter=',')


In [None]:
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model.evaluate(test_x_np, test_y_np)
print("test loss, test acc:", results)

Evaluate on test data
test loss, test acc: [153.2213592529297, 0.5284552574157715]


In [None]:
#Create profitability bot: 
tested_vals = []
num_tests = 5

for j in range(num_tests):
  val = 10000
  correct = 0
  incorrect = 0
  rows, columns, num_features = test_x_np.shape
  predicted_buy = 0
  print('rows =' + str(rows))
  for i in range(rows):
    rand_index = random.randint(0, rows -1)
    i = rand_index
    bet = val * .05
    sample = np.array(test_x_np[i])
    sample = np.reshape(sample,(1,columns,num_features))
    true_y = test_y_np[i]
    prediction = model.predict(sample)
    #print(prediction)
    predicted_binary = 1 if prediction > 0.5 else 0
    if predicted_binary == 1: 
      predicted_buy = predicted_buy + 1
    pred_correct = 1 if predicted_binary == true_y else 0
    if pred_correct == 1:
      correct = correct + 1
      val = val + bet

    else: 
      incorrect = incorrect + 1
      val = val - bet
  print(predicted_buy)
  tested_vals.append(val)



rows =492
438
rows =492
416
rows =492
427
rows =492
424
rows =492
430


In [None]:
print(len(test_x_np))
print(correct / (correct + incorrect))

492
0.5467479674796748


In [None]:
from statistics import mean
print(tested_vals)
print("average ending investment: ")
print(mean(tested_vals))

[32730.825619736643, 65950.30627898283, 9848.467020926602, 179421.19901406486, 53986.531897307774]
average ending investment: 
68387.46596620374


In [None]:
#baseline profitability bot: 
tested_vals = []
num_tests = 5
print()
for j in range(num_tests):
  val = 10000
  correct = 0
  incorrect = 0
  rows, columns, num_features = test_x_np.shape
  predicted_buy = 0
  for i in range(rows):
    bet = val * .05
    true_y = test_y_np[i]
    predicted_binary = random.randint(0, 1)
    if predicted_binary == 1: 
      predicted_buy = predicted_buy + 1
    pred_correct = 1 if predicted_binary == true_y else 0
    if pred_correct == 1:
      correct = correct + 1
      val = val + bet

    else: 
      incorrect = incorrect + 1
      val = val - bet
  tested_vals.append(val)




In [None]:
print(tested_vals)
print("average ending investment: ")
print(mean(tested_vals))

[9848.467020926615, 5402.247438693628, 4422.247903329702, 12030.95278733695, 3275.261674650525]
average ending investment: 
6995.835364987484


In [None]:
print(test_buy)