In [15]:
import datetime

import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras import Model, Sequential

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import MeanAbsoluteError

from tensorflow.keras.layers import Dense, Conv1D, LSTM, Lambda, Reshape, RNN, LSTMCell

import warnings
warnings.filterwarnings('ignore')

In [16]:
plt.rcParams['figure.figsize'] = (10, 7.5)
plt.rcParams['axes.grid'] = False

In [17]:
print(tf.__version__)

2.11.0


In [18]:
tf.random.set_seed(42)
np.random.seed(42)

In [19]:
df = pd.read_csv('BTC-Indicators.csv')
df=df.drop('Unnamed: 0',axis=1)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,SMA_7,SMA_14,EMA_7,EMA_14,RSI,Stoch_RSI,Upper_BB,Lower_BB,MACD
0,2014/10/28,353.214996,359.984009,352.678986,357.618011,357.618011,7845880.0,358.928859,373.1355,360.189252,365.64048,26.59727,0.0,407.000618,341.261085,-4.591695
1,2014/10/29,357.088989,357.833008,335.342987,335.591003,335.591003,18192700.0,352.133575,368.908214,354.038225,361.569705,22.686407,0.0,409.625952,335.692252,-6.585445
2,2014/10/30,335.709015,350.912994,335.071991,345.304993,345.304993,30177900.0,350.260433,366.247428,351.854527,359.371035,32.401375,0.170521,410.516219,333.176283,-7.297547
3,2014/10/31,345.009003,348.045013,337.141998,338.321014,338.321014,12545400.0,347.399863,363.001929,348.470696,356.530731,29.646025,0.122158,411.960112,329.33459,-8.329426
4,2014/11/1,338.649994,340.528992,321.054993,325.748993,325.748993,16677200.0,344.325291,358.309572,342.789699,352.383942,21.806794,0.0,413.709058,322.305642,-10.045852


In [20]:
df.shape

(3046, 16)

In [21]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Open,3046.0,13195.43,16123.75,176.897,751.8445,7361.77,18353.98,67549.73
High,3046.0,13525.35,16534.89,211.731,764.4685,7528.68,18843.02,68789.62
Low,3046.0,12829.73,15649.43,171.51,739.0708,7224.057,17731.79,66382.06
Close,3046.0,13201.46,16119.62,178.103,752.7688,7364.94,18368.53,67566.83
Adj Close,3046.0,13201.46,16119.62,178.103,752.7688,7364.94,18368.53,67566.83
Volume,3046.0,16711920000.0,19878200000.0,6491650.0,126893200.0,9037149000.0,28218660000.0,350968000000.0
SMA_7,3046.0,13178.72,16099.29,204.5456,741.1929,7330.519,18193.92,65510.88
SMA_14,3046.0,13151.54,16076.68,222.8731,737.7346,7321.023,18053.89,63983.06
EMA_7,3046.0,13178.59,16087.19,215.8651,739.9243,7357.175,18082.45,64733.56
EMA_14,3046.0,13151.93,16051.5,225.2705,734.3472,7358.198,17885.75,63945.64


In [22]:
#將時間轉換為數值，以方便後續可以運算
timestamp_s = pd.to_datetime(df['Date']).map(datetime.datetime.timestamp)
df['Coin_timestamp'] = timestamp_s

In [23]:
df = df.drop(['Date'], axis=1)
df.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,SMA_7,SMA_14,EMA_7,EMA_14,RSI,Stoch_RSI,Upper_BB,Lower_BB,MACD,Coin_timestamp
0,353.214996,359.984009,352.678986,357.618011,357.618011,7845880.0,358.928859,373.1355,360.189252,365.64048,26.59727,0.0,407.000618,341.261085,-4.591695,1414426000.0
1,357.088989,357.833008,335.342987,335.591003,335.591003,18192700.0,352.133575,368.908214,354.038225,361.569705,22.686407,0.0,409.625952,335.692252,-6.585445,1414512000.0
2,335.709015,350.912994,335.071991,345.304993,345.304993,30177900.0,350.260433,366.247428,351.854527,359.371035,32.401375,0.170521,410.516219,333.176283,-7.297547,1414598000.0
3,345.009003,348.045013,337.141998,338.321014,338.321014,12545400.0,347.399863,363.001929,348.470696,356.530731,29.646025,0.122158,411.960112,329.33459,-8.329426,1414685000.0
4,338.649994,340.528992,321.054993,325.748993,325.748993,16677200.0,344.325291,358.309572,342.789699,352.383942,21.806794,0.0,413.709058,322.305642,-10.045852,1414771000.0


In [24]:
#二值化收盤價，昨天比今天高就是跌，就為False
# UpDown=[True]
# for i in range(1,len(df['Close'])):
#     if df['Close'][i]>df['Close'][i-1]:
#         UpDown.append(True)
#     else:
#         UpDown.append(False)
# df['UpDown'] = UpDown

In [25]:
#將資料集分為70%訓練、20%驗證、10%測試
n = len(df)
train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):]

train_df.shape, val_df.shape, test_df.shape

((2132, 16), (609, 16), (305, 16))

In [26]:
#將值縮限在0~1之間，加速訓練時間
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(train_df)

train_df[train_df.columns] = scaler.transform(train_df[train_df.columns])
val_df[val_df.columns] = scaler.transform(val_df[val_df.columns])
test_df[test_df.columns] = scaler.transform(test_df[test_df.columns])

In [27]:
train_df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Open,2132.0,0.21952,0.212924,0.0,0.01385,0.180106,0.401374,1.0
High,2132.0,0.217044,0.212814,0.0,0.012002,0.176346,0.398726,1.0
Low,2132.0,0.219254,0.21114,0.0,0.014215,0.183112,0.400347,1.0
Close,2132.0,0.219493,0.212802,0.0,0.013792,0.180044,0.400975,1.0
Adj Close,2132.0,0.219493,0.212802,0.0,0.013792,0.180044,0.400975,1.0
Volume,2132.0,0.106368,0.156193,0.0,0.000746,0.028229,0.174537,1.0
SMA_7,2132.0,0.235121,0.229267,0.0,0.013405,0.193733,0.432332,1.0
SMA_14,2132.0,0.243595,0.238488,0.0,0.01265,0.203215,0.446403,1.0
EMA_7,2132.0,0.234503,0.228941,0.0,0.012773,0.194779,0.429802,1.0
EMA_14,2132.0,0.252283,0.246344,0.0,0.012853,0.212647,0.458585,1.0


In [29]:
train_df.to_csv('data/train.csv')
val_df.to_csv('data/val.csv')
test_df.to_csv('data/test.csv')