In [1]:
import datetime

import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras import Model, Sequential

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import MeanAbsoluteError

from tensorflow.keras.layers import Dense, Conv1D, LSTM, Lambda, Reshape, RNN, LSTMCell

import warnings
warnings.filterwarnings('ignore')

In [2]:
plt.rcParams['figure.figsize'] = (10, 7.5)
plt.rcParams['axes.grid'] = False

In [3]:
print(tf.__version__)

2.11.0


In [4]:
tf.random.set_seed(42)
np.random.seed(42)

In [9]:
df = pd.read_csv('../FinBERT/Data_final/BTC_final.csv')
df=df.drop('Unnamed: 0',axis=1)
df.head()

Unnamed: 0,Open,High,Low,Close,Volume,CionScore,CoinDate
0,387.427002,391.378998,380.779999,383.61499,26229400.0,-0.224409,2014-10-01
1,383.988007,385.497009,372.946014,375.071991,21777700.0,0.162439,2014-10-02
2,375.181,377.695007,357.859009,359.511993,30901200.0,-0.136249,2014-10-03
3,359.891998,364.487,325.885986,328.865997,47236500.0,-0.136249,2014-10-04
4,328.915985,341.800995,289.29599,320.51001,83308096.0,-0.136249,2014-10-05


In [16]:
df.tail()

Unnamed: 0,Open,High,Low,Close,Volume,CionScore,CoinDate,Coin_timestamp
3068,23946.00781,24103.70508,23007.07227,23198.12695,26811740000.0,-0.530137,2023-02-24,1677168000.0
3069,23200.125,23210.21094,22861.55859,23175.375,16100720000.0,-0.530137,2023-02-25,1677254000.0
3070,23174.15039,23654.36719,23084.2207,23561.21289,16644530000.0,-0.530137,2023-02-26,1677341000.0
3071,23561.45117,23857.89063,23205.87891,23522.87109,22660760000.0,-0.122506,2023-02-27,1677427000.0
3072,23512.17773,23521.54688,23400.39648,23433.81641,22190760000.0,-0.736306,2023-02-28,1677514000.0


In [11]:
df.shape

(3073, 7)

In [12]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Open,3073.0,13082.73,16097.32,176.897,715.555,7296.165,17813.64,67549.73
High,3073.0,13409.83,16507.74,211.731,736.452,7447.272,18393.95,68789.62
Low,3073.0,12720.14,15623.94,171.51,705.26,7153.306,17382.55,66382.06
Close,3073.0,13088.7,16093.27,178.103,716.411,7302.089,17899.7,67566.83
Volume,3073.0,16565350000.0,19851800000.0,5914570.0,112354000.0,8660880000.0,28066360000.0,350968000000.0
CionScore,3073.0,-0.1497819,0.2987241,-0.9745899,-0.3167566,-0.09489896,0.0,0.9281687


In [15]:
timestamp_s = pd.to_datetime(df['CoinDate']).map(datetime.datetime.timestamp)
df['Coin_timestamp'] = timestamp_s

In [29]:
df = df.drop(['CionScore','Coin_timestamp'], axis=1)

df.head()

Unnamed: 0,Open,High,Low,Close,Volume
0,387.427002,391.378998,380.779999,383.61499,26229400.0
1,383.988007,385.497009,372.946014,375.071991,21777700.0
2,375.181,377.695007,357.859009,359.511993,30901200.0
3,359.891998,364.487,325.885986,328.865997,47236500.0
4,328.915985,341.800995,289.29599,320.51001,83308096.0


In [30]:
n = len(df)
# Split 70:20:10 (train:validation:test)
train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):]

train_df.shape, val_df.shape, test_df.shape

((2151, 5), (614, 5), (308, 5))

In [31]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(train_df)

train_df[train_df.columns] = scaler.transform(train_df[train_df.columns])
val_df[val_df.columns] = scaler.transform(val_df[val_df.columns])
test_df[test_df.columns] = scaler.transform(test_df[test_df.columns])

In [32]:
test_df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Open,308.0,1.131966,0.260999,0.808616,0.982067,1.066137,1.199885,2.051501
High,308.0,1.117706,0.259351,0.807018,0.96459,1.055239,1.185224,2.015253
Low,308.0,1.136987,0.256418,0.820501,0.986888,1.071118,1.21233,2.061945
Close,308.0,1.128223,0.256376,0.807958,0.981101,1.06499,1.198531,2.049543
Volume,308.0,0.404271,0.176793,0.103962,0.30473,0.374984,0.46584,1.604649


In [34]:
train_df.to_csv('./NScore/data_s/train.csv')
val_df.to_csv('./NScore/data_s/val.csv')
test_df.to_csv('./NScore/data_s/test.csv')

In [26]:
train_df

Unnamed: 0,Open,High,Low,Close,Volume,CionScore,Coin_timestamp
0,0.010909,0.009038,0.011130,0.010638,0.000274,0.394260,0.000000
1,0.010731,0.008742,0.010713,0.010195,0.000214,0.597569,0.000465
2,0.010274,0.008349,0.009911,0.009390,0.000337,0.440592,0.000930
3,0.009482,0.007685,0.008210,0.007804,0.000557,0.440592,0.001395
4,0.007877,0.006544,0.006264,0.007371,0.001044,0.440592,0.001860
...,...,...,...,...,...,...,...
2146,0.605723,0.589778,0.615111,0.606373,0.277508,0.512198,0.998140
2147,0.607224,0.611116,0.618808,0.625090,0.380599,0.602648,0.998605
2148,0.625683,0.609942,0.626670,0.611468,0.351140,0.420753,0.999070
2149,0.612158,0.594508,0.612459,0.599410,0.330366,0.355525,0.999535
