In [1]:
import datetime

import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras import Model, Sequential

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import MeanAbsoluteError

from tensorflow.keras.layers import Dense, Conv1D, LSTM, Lambda, Reshape, RNN, LSTMCell

import warnings
warnings.filterwarnings('ignore')

In [2]:
plt.rcParams['figure.figsize'] = (10, 7.5)
plt.rcParams['axes.grid'] = False

In [3]:
print(tf.__version__)

2.12.0


In [4]:
tf.random.set_seed(42)
np.random.seed(42)

In [7]:
df = pd.read_csv('data\BTC_final.csv')
df=df.drop('index',axis=1)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,ScorePositive,ScoreNegative,ScoreNeutral
0,2022/1/1,46311.74609,47827.3125,46288.48438,47686.8125,24582670000.0,0.035623,0.955734,0.008644
1,2022/1/2,47680.92578,47881.40625,46856.9375,47345.21875,27951570000.0,0.035266,0.946176,0.008557
2,2022/1/3,47343.54297,47510.72656,45835.96484,46458.11719,33071630000.0,0.163539,0.575673,0.260787
3,2022/1/4,46458.85156,47406.54688,45752.46484,45897.57422,42494680000.0,0.26076,0.27059,0.468649
4,2022/1/5,45899.35938,46929.04688,42798.22266,43569.00391,36851080000.0,0.321027,0.456918,0.222055


In [8]:
df.shape

(365, 9)

In [9]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Open,365.0,28278.69,10223.22,15782.3,19550.47,23179.53,39148.45,47680.93
High,365.0,28817.81,10435.34,16253.05,20041.09,23563.83,39845.93,48086.84
Low,365.0,27617.1,9970.727,15599.05,19173.33,22722.27,38113.66,47100.44
Close,365.0,28197.75,10198.04,15787.28,19546.85,23164.63,39105.15,47686.81
Volume,365.0,30013260000.0,12606570000.0,9744636000.0,22417210000.0,28148220000.0,34421560000.0,118992000000.0
ScorePositive,365.0,0.1715962,0.1186073,0.01046412,0.07798219,0.1626459,0.2383426,0.9247442
ScoreNegative,365.0,0.4536759,0.1919956,0.009944398,0.3399361,0.4564207,0.5682628,0.9645384
ScoreNeutral,365.0,0.3737416,0.1700731,0.008557184,0.2736989,0.3621676,0.4463143,0.9228845


In [10]:
#將時間轉換為數值，以方便後續可以運算
timestamp_s = pd.to_datetime(df['Date']).map(datetime.datetime.timestamp)
df['Coin_timestamp'] = timestamp_s

In [13]:
# df = df.drop(['Date'], axis=1)
df.head()
df.to_csv('TimestampBTC.csv')

In [120]:
#二值化收盤價，昨天比今天高就是跌，就為False
# UpDown=[True]
# for i in range(1,len(df['Close'])):
#     if df['Close'][i]>df['Close'][i-1]:
#         UpDown.append(True)
#     else:
#         UpDown.append(False)
# df['UpDown'] = UpDown

In [13]:
#將資料集分為70%訓練、20%驗證、10%測試
n = len(df)
train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):]

train_df.shape, val_df.shape, test_df.shape

((255, 9), (73, 9), (37, 9))

In [14]:
#將值縮限在0~1之間，加速訓練時間
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(train_df)

train_df[train_df.columns] = scaler.transform(train_df[train_df.columns])
val_df[val_df.columns] = scaler.transform(val_df[val_df.columns])
test_df[test_df.columns] = scaler.transform(test_df[test_df.columns])

In [15]:
train_df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Open,255.0,0.47587,0.324374,0.0,0.131029,0.561992,0.77359,1.0
High,255.0,0.482538,0.330615,0.0,0.130131,0.561767,0.790308,1.0
Low,255.0,0.477884,0.312671,0.0,0.14067,0.526522,0.757242,1.0
Close,255.0,0.472531,0.323701,0.0,0.130102,0.527627,0.772061,1.0
Volume,255.0,0.223899,0.14021,0.0,0.139343,0.20976,0.277317,1.0
ScorePositive,255.0,0.185835,0.139091,0.0,0.074279,0.177265,0.269369,1.0
ScoreNegative,255.0,0.460104,0.205251,0.0,0.342141,0.452662,0.583187,1.0
ScoreNeutral,255.0,0.39587,0.18551,0.0,0.296977,0.378257,0.473426,1.0
Coin_timestamp,255.0,0.5,0.290379,0.0,0.25,0.5,0.75,1.0


In [16]:
train_df.to_csv('data/train.csv')
val_df.to_csv('data/val.csv')
test_df.to_csv('data/test.csv')

In [17]:
train_df

Unnamed: 0,Open,High,Low,Close,Volume,ScorePositive,ScoreNegative,ScoreNeutral,Coin_timestamp
0,0.952530,0.990962,0.972375,1.000000,0.153933,0.027517,0.990777,0.000095,0.000000
1,1.000000,0.992846,0.991715,0.988159,0.201746,0.027128,0.980765,0.000000,0.003937
2,0.988303,0.979937,0.956979,0.957410,0.274412,0.167427,0.592638,0.276612,0.007874
3,0.957630,0.976309,0.954138,0.937980,0.408148,0.273763,0.273044,0.504568,0.011811
4,0.938233,0.959680,0.853625,0.857264,0.328052,0.339681,0.468234,0.234136,0.015748
...,...,...,...,...,...,...,...,...,...
250,0.015680,0.001588,0.046547,0.017060,0.261963,0.298499,0.259918,0.493507,0.984252
251,0.017004,0.072006,0.054517,0.088165,0.492946,0.434904,0.260025,0.356627,0.988189
252,0.088035,0.083180,0.117723,0.098543,0.328941,0.430441,0.257320,0.352967,0.992126
253,0.098493,0.083538,0.125828,0.101618,0.294598,0.033924,0.146048,0.877993,0.996063
