In [6]:
import pandas as pd
import datetime as dt
import numpy as np

from csv import writer

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GRU
from keras.models import load_model

from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

In [7]:
df = pd.read_csv('../../data/btc.csv')
print(df)

     formatted_date          high           low          open         close  \
0        2014-09-17    468.174011    452.421997    465.864014    457.334015   
1        2014-09-18    456.859985    413.104004    456.859985    424.440002   
2        2014-09-19    427.834991    384.532013    424.102997    394.795990   
3        2014-09-20    423.295990    389.882996    394.673004    408.903992   
4        2014-09-21    412.425995    393.181000    408.084991    398.821014   
...             ...           ...           ...           ...           ...   
3017     2022-12-21  16916.800781  16755.912109  16904.527344  16817.535156   
3018     2022-12-22  16866.673828  16592.408203  16818.380859  16830.341797   
3019     2022-12-23  16905.218750  16794.458984  16829.644531  16796.953125   
3020     2022-12-24  16864.703125  16793.527344  16796.976562  16847.755859   
3021     2022-12-25  16859.472656  16824.105469  16847.066406  16830.982422   

           volume      adjclose  
0        21056800

In [8]:
# Process data
df['H-L'] = df['high'] - df['low']
df['O-C'] = df['open'] - df['close']
ma_1 = 7
ma_2 = 14
ma_3 = 21
df[f'SMA_{ma_1}'] = df['high'].rolling(window=ma_1).mean()
df[f'SMA_{ma_2}'] = df['high'].rolling(window=ma_2).mean()
df[f'SMA_{ma_3}'] = df['high'].rolling(window=ma_3).mean()

df[f'SD_{ma_1}'] = df['high'].rolling(window=ma_1).std()
df[f'SD_{ma_3}'] = df['high'].rolling(window=ma_3).std()
df.dropna(inplace=True)

df.to_csv("../../data/BOOSTING/bitcoin_processed_high.csv")
df

Unnamed: 0,formatted_date,high,low,open,close,volume,adjclose,H-L,O-C,SMA_7,SMA_14,SMA_21,SD_7,SD_21
20,2014-10-07,339.247009,320.481995,330.584015,336.187012,49199900,336.187012,18.765015,-5.602997,363.605717,385.974141,401.938187,21.832183,35.781061
21,2014-10-08,354.364014,327.187988,336.115997,352.940002,54736300,352.940002,27.176025,-16.824005,358.317862,380.134999,396.518664,18.157595,33.811986
22,2014-10-09,382.726013,347.687012,352.747986,365.026001,83641104,365.026001,35.039001,-12.278015,357.922006,377.221143,392.988475,17.484013,30.945487
23,2014-10-10,375.066986,352.963013,364.687012,361.562012,43665700,361.562012,22.103973,3.125000,357.546574,374.373215,390.475712,17.010470,30.105456
24,2014-10-11,367.191010,355.950989,361.362000,362.299011,13345200,362.299011,11.240021,-0.937012,357.932861,371.556645,387.804047,17.223712,29.531240
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3017,2022-12-21,16916.800781,16755.912109,16904.527344,16817.535156,14882945045,16817.535156,160.888672,86.992188,17100.794085,17298.404576,17253.191592,412.188939,380.756888
3018,2022-12-22,16866.673828,16592.408203,16818.380859,16830.341797,16441573050,16830.341797,274.265625,-11.960938,16960.784040,17269.744420,17237.438058,251.843023,389.910164
3019,2022-12-23,16905.218750,16794.458984,16829.644531,16796.953125,15329265213,16796.953125,110.759766,32.691406,16875.025949,17242.935268,17228.702753,76.825191,395.425790
3020,2022-12-24,16864.703125,16793.527344,16796.976562,16847.755859,9744636213,16847.755859,71.175781,-50.779297,16884.184989,17217.783622,17216.734282,69.989608,402.742228


In [9]:
pre_day = 30
scala_x = MinMaxScaler(feature_range=(0,1))
scala_y = MinMaxScaler(feature_range=(0,1))
cols_x = ['close','low','open','H-L', 'O-C', f'SMA_{ma_1}', f'SMA_{ma_2}', f'SMA_{ma_3}', f'SD_{ma_1}', f'SD_{ma_3}']
cols_y = ['high']
scaled_data_x = scala_x.fit_transform(df[cols_x].values.reshape(-1, len(cols_x)))
scaled_data_y = scala_y.fit_transform(df[cols_y].values.reshape(-1, len(cols_y)))

x_total = []
y_total = []

for i in range(pre_day, len(df)):
    x_total.append(scaled_data_x[i-pre_day:i])
    y_total.append(scaled_data_y[i])

# test_size = (int)(len(scaled_data_y) * 0.2)
# print(test_size)

x_train = np.array(x_total[:len(x_total)])
# x_test = np.array(x_total[len(x_total)-test_size:])
y_train = np.array(y_total[:len(y_total)])
# y_test = np.array(y_total[len(y_total)-test_size:])



# print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

In [10]:
# Build model
model = Sequential()

model.add(GRU(units=60, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2])))
model.add(Dropout(0.2))
model.add(GRU(units=60, return_sequences=True))
model.add(Dropout(0.2))
model.add(GRU(units=60, return_sequences=True))
model.add(Dropout(0.2))
model.add(GRU(units=60, return_sequences=True))
model.add(Dropout(0.2))
model.add(GRU(units=60, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=len(cols_y)))

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, epochs=120, steps_per_epoch=40, use_multiprocessing=True)
model.save("../../model/BOOSTING/GRU_boosting_high.h5")

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78

In [11]:
# Testing
predict_price = model.predict(x_train)
predict_price = scala_y.inverse_transform(predict_price)
y_train_price = scala_y.inverse_transform(y_train)

error = pd.concat([pd.DataFrame(predict_price,columns=['predict_price'],index=None), pd.DataFrame(y_train_price,columns=['y_train_price'],index=None)], axis=1)
error['error_price'] = error['predict_price'] - error['y_train_price']
print(error)

      predict_price  y_train_price  error_price
0        -86.959343     352.966003  -439.925346
1        -76.026138     352.731995  -428.758133
2        -81.272522     347.032013  -428.304535
3        -81.503471     363.626007  -445.129478
4        -63.549881     374.816010  -438.365891
...             ...            ...          ...
2967   16590.785156   16916.800781  -326.015625
2968   16555.621094   16866.673828  -311.052734
2969   16522.302734   16905.218750  -382.916016
2970   16495.802734   16864.703125  -368.900391
2971   16577.634766   16859.472656  -281.837891

[2972 rows x 3 columns]


In [12]:
from sklearn.ensemble import GradientBoostingRegressor
import pickle
gradient =  GradientBoostingRegressor(learning_rate= 0.07, max_depth= 8, n_estimators= 1000, subsample= 0.3)

In [13]:
gradient.fit(error['predict_price'].values.reshape(-1, 1), error['error_price'].values)
pickle.dump(gradient, open("../../model/BOOSTING/booting_gradient_high.h5", "wb"))

In [14]:
gradient.predict(np.array([50762.023438]).reshape(-1,1))


array([-5000.79333655])

TEST

In [15]:
# predict_price_test = model.predict(x_test)
# predict_price_test = scala_y.inverse_transform(predict_price_test)
# y_test = scala_y.inverse_transform(y_test)



In [16]:
# validate = []
# for predict in predict_price_test:
#     predict= gradient.predict(np.array([predict]).reshape(-1,1))[0]
#     validate.append(predict)
# df_predict = pd.concat([pd.DataFrame(predict_price_test,columns=['predict_price_test'],index=None), pd.DataFrame(y_test,columns=['y_test'],index=None)], axis=1)
# df_predict['final_predict'] = df_predict['predict_price_test'] + validate
# print(df_predict.to_csv('result.csv'))