In [9]:
import datetime

import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras import Model, Sequential

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import MeanAbsoluteError

from tensorflow.keras.layers import Dense, Conv1D, LSTM, Lambda, Reshape, RNN, LSTMCell

import warnings
warnings.filterwarnings('ignore')

In [10]:
plt.rcParams['figure.figsize'] = (10, 7.5)
plt.rcParams['axes.grid'] = False

In [11]:
print(tf.__version__)

2.11.0


In [12]:
tf.random.set_seed(42)
np.random.seed(42)

In [13]:
df = pd.read_csv('FinBERT/Data_final/BTC_final.csv')
df=df.drop('Unnamed: 0',axis=1)
df.head()

Unnamed: 0,CionScore,CoinDate,Open,High,Low,Close,Volume
0,-0.224409,2014-10-01,387.427002,391.378998,380.779999,383.61499,26229400.0
1,0.162439,2014-10-02,383.988007,385.497009,372.946014,375.071991,21777700.0
2,-0.136249,2014-10-03,375.181,377.695007,357.859009,359.511993,30901200.0
3,-0.134887,2014-10-04,359.891998,364.487,325.885986,328.865997,47236500.0
4,-0.134887,2014-10-05,328.915985,341.800995,289.29599,320.51001,83308096.0


In [14]:
df.shape

(3073, 7)

In [15]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
CionScore,3073.0,-0.1495545,0.2982597,-0.9745899,-0.3160612,-0.09489896,0.0,0.9281687
Open,3073.0,13082.73,16097.32,176.897,715.555,7296.165,17813.64,67549.73
High,3073.0,13409.83,16507.74,211.731,736.452,7447.272,18393.95,68789.62
Low,3073.0,12720.14,15623.94,171.51,705.26,7153.306,17382.55,66382.06
Close,3073.0,13088.7,16093.27,178.103,716.411,7302.089,17899.7,67566.83
Volume,3073.0,16565350000.0,19851800000.0,5914570.0,112354000.0,8660880000.0,28066360000.0,350968000000.0


In [16]:
#將時間轉換為數值，以方便後續可以運算
timestamp_s = pd.to_datetime(df['CoinDate']).map(datetime.datetime.timestamp)
df['Coin_timestamp'] = timestamp_s

In [4]:
df = df.drop(['CoinDate'], axis=1)
df.head()

Unnamed: 0,CionScore,Open,High,Low,Close,Volume,Coin_timestamp
0,-0.224409,387.427002,391.378998,380.779999,383.61499,26229400.0,1412093000.0
1,0.162439,383.988007,385.497009,372.946014,375.071991,21777700.0,1412179000.0
2,-0.136249,375.181,377.695007,357.859009,359.511993,30901200.0,1412266000.0
3,-0.134887,359.891998,364.487,325.885986,328.865997,47236500.0,1412352000.0
4,-0.134887,328.915985,341.800995,289.29599,320.51001,83308096.0,1412438000.0


In [18]:
df['CoinScore']=df['CionScore']
df

Unnamed: 0,CionScore,CoinDate,Open,High,Low,Close,Volume,Coin_timestamp,CoinScore
0,-0.224409,2014-10-01,387.427002,391.378998,380.779999,383.614990,2.622940e+07,1.412093e+09,-0.224409
1,0.162439,2014-10-02,383.988007,385.497009,372.946014,375.071991,2.177770e+07,1.412179e+09,0.162439
2,-0.136249,2014-10-03,375.181000,377.695007,357.859009,359.511993,3.090120e+07,1.412266e+09,-0.136249
3,-0.134887,2014-10-04,359.891998,364.487000,325.885986,328.865997,4.723650e+07,1.412352e+09,-0.134887
4,-0.134887,2014-10-05,328.915985,341.800995,289.295990,320.510010,8.330810e+07,1.412438e+09,-0.134887
...,...,...,...,...,...,...,...,...,...
3068,-0.530137,2023-02-24,23946.007810,24103.705080,23007.072270,23198.126950,2.681174e+10,1.677168e+09,-0.530137
3069,-0.524836,2023-02-25,23200.125000,23210.210940,22861.558590,23175.375000,1.610072e+10,1.677254e+09,-0.524836
3070,-0.524836,2023-02-26,23174.150390,23654.367190,23084.220700,23561.212890,1.664453e+10,1.677341e+09,-0.524836
3071,-0.122506,2023-02-27,23561.451170,23857.890630,23205.878910,23522.871090,2.266076e+10,1.677427e+09,-0.122506


In [17]:
df

Unnamed: 0,CionScore,CoinDate,Open,High,Low,Close,Volume,Coin_timestamp
0,-0.224409,2014-10-01,387.427002,391.378998,380.779999,383.614990,2.622940e+07,1.412093e+09
1,0.162439,2014-10-02,383.988007,385.497009,372.946014,375.071991,2.177770e+07,1.412179e+09
2,-0.136249,2014-10-03,375.181000,377.695007,357.859009,359.511993,3.090120e+07,1.412266e+09
3,-0.134887,2014-10-04,359.891998,364.487000,325.885986,328.865997,4.723650e+07,1.412352e+09
4,-0.134887,2014-10-05,328.915985,341.800995,289.295990,320.510010,8.330810e+07,1.412438e+09
...,...,...,...,...,...,...,...,...
3068,-0.530137,2023-02-24,23946.007810,24103.705080,23007.072270,23198.126950,2.681174e+10,1.677168e+09
3069,-0.524836,2023-02-25,23200.125000,23210.210940,22861.558590,23175.375000,1.610072e+10,1.677254e+09
3070,-0.524836,2023-02-26,23174.150390,23654.367190,23084.220700,23561.212890,1.664453e+10,1.677341e+09
3071,-0.122506,2023-02-27,23561.451170,23857.890630,23205.878910,23522.871090,2.266076e+10,1.677427e+09


In [20]:
# df=df.drop('CionScore',axis=1)
df[['CoinDate','CoinScore']][3063:3073]

Unnamed: 0,CoinDate,CoinScore
3063,2023-02-19,-0.874362
3064,2023-02-20,0.058467
3065,2023-02-21,-0.200266
3066,2023-02-22,-0.138639
3067,2023-02-23,-0.73681
3068,2023-02-24,-0.530137
3069,2023-02-25,-0.524836
3070,2023-02-26,-0.524836
3071,2023-02-27,-0.122506
3072,2023-02-28,-0.736306


In [120]:
#二值化收盤價，昨天比今天高就是跌，就為False
# UpDown=[True]
# for i in range(1,len(df['Close'])):
#     if df['Close'][i]>df['Close'][i-1]:
#         UpDown.append(True)
#     else:
#         UpDown.append(False)
# df['UpDown'] = UpDown

In [121]:
#將資料集分為70%訓練、20%驗證、10%測試
n = len(df)
train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):]

train_df.shape, val_df.shape, test_df.shape

((2151, 7), (614, 7), (308, 7))

In [122]:
#將值縮限在0~1之間，加速訓練時間
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(train_df)

train_df[train_df.columns] = scaler.transform(train_df[train_df.columns])
val_df[val_df.columns] = scaler.transform(val_df[val_df.columns])
test_df[test_df.columns] = scaler.transform(test_df[test_df.columns])

In [123]:
train_df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
CionScore,2151.0,0.453232,0.127425,0.0,0.377352,0.498296,0.512198,1.0
Open,2151.0,0.215505,0.212028,0.0,0.013279,0.177392,0.395715,1.0
High,2151.0,0.213083,0.211984,0.0,0.011472,0.173223,0.394385,1.0
Low,2151.0,0.21522,0.210212,0.0,0.01361,0.179136,0.394656,1.0
Close,2151.0,0.215487,0.211927,0.0,0.013218,0.177176,0.39572,1.0
Volume,2151.0,0.104343,0.155503,0.0,0.000735,0.025612,0.165412,1.0
Coin_timestamp,2151.0,0.5,0.288877,0.0,0.25,0.5,0.75,1.0


In [124]:
train_df.to_csv('data/train.csv')
val_df.to_csv('data/val.csv')
test_df.to_csv('data/test.csv')

In [125]:
train_df

Unnamed: 0,CionScore,Open,High,Low,Close,Volume,Coin_timestamp
0,0.394260,0.010909,0.009038,0.011130,0.010638,0.000274,0.000000
1,0.597569,0.010731,0.008742,0.010713,0.010195,0.000214,0.000465
2,0.440592,0.010274,0.008349,0.009911,0.009390,0.000337,0.000930
3,0.441308,0.009482,0.007685,0.008210,0.007804,0.000557,0.001395
4,0.441308,0.007877,0.006544,0.006264,0.007371,0.001044,0.001860
...,...,...,...,...,...,...,...
2146,0.512198,0.605723,0.589778,0.615111,0.606373,0.277508,0.998140
2147,0.602648,0.607224,0.611116,0.618808,0.625090,0.380599,0.998605
2148,0.420753,0.625683,0.609942,0.626670,0.611468,0.351140,0.999070
2149,0.355525,0.612158,0.594508,0.612459,0.599410,0.330366,0.999535
