In [38]:
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import time
from tqdm import tqdm
import warnings

In [39]:
df = pd.read_csv("./data/AAME.csv")
df 

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1980-03-17,0.00,4.05,3.85,3.85,3.213167,15000
1,1980-03-18,0.00,4.00,3.80,3.80,3.171437,10200
2,1980-03-19,0.00,4.05,3.85,3.85,3.213167,33500
3,1980-03-20,0.00,4.00,3.80,3.80,3.171437,8700
4,1980-03-21,0.00,3.95,3.75,3.75,3.129709,12700
...,...,...,...,...,...,...,...
10093,2020-03-26,2.29,2.29,2.08,2.28,2.280000,1600
10094,2020-03-27,2.25,2.25,2.21,2.25,2.250000,500
10095,2020-03-30,2.25,2.25,2.13,2.13,2.130000,400
10096,2020-03-31,2.11,2.11,2.11,2.11,2.110000,300


In [40]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Open,10098.0,2.479037,1.792743,0.0,1.5,2.41,3.375,13.8
High,10098.0,3.638755,2.392151,0.4375,2.125,3.01,4.17,15.8
Low,10098.0,3.512713,2.349184,0.375,2.0,2.9375,4.05,15.4
Close,10098.0,3.576238,2.361792,0.4375,2.0625,3.0,4.1,15.8
Adj Close,10098.0,3.245732,2.028279,0.404299,1.91291,2.772334,3.843033,13.881437
Volume,10098.0,7977.757972,16239.129044,0.0,1000.0,3300.0,9000.0,581500.0


In [41]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10098 entries, 0 to 10097
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       10098 non-null  object 
 1   Open       10098 non-null  float64
 2   High       10098 non-null  float64
 3   Low        10098 non-null  float64
 4   Close      10098 non-null  float64
 5   Adj Close  10098 non-null  float64
 6   Volume     10098 non-null  int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 552.4+ KB


In [42]:
col = ['Open', 'High', 'Low', 'Close']
dfs = df[col]
dfs

Unnamed: 0,Open,High,Low,Close
0,0.00,4.05,3.85,3.85
1,0.00,4.00,3.80,3.80
2,0.00,4.05,3.85,3.85
3,0.00,4.00,3.80,3.80
4,0.00,3.95,3.75,3.75
...,...,...,...,...
10093,2.29,2.29,2.08,2.28
10094,2.25,2.25,2.21,2.25
10095,2.25,2.25,2.13,2.13
10096,2.11,2.11,2.11,2.11


In [43]:
dfs.isnull().sum()

Open     0
High     0
Low      0
Close    0
dtype: int64

In [44]:
from sklearn.preprocessing import MinMaxScaler

MMS = MinMaxScaler()
dfs_scaled = MMS.fit_transform(dfs)
dfs

Unnamed: 0,Open,High,Low,Close
0,0.00,4.05,3.85,3.85
1,0.00,4.00,3.80,3.80
2,0.00,4.05,3.85,3.85
3,0.00,4.00,3.80,3.80
4,0.00,3.95,3.75,3.75
...,...,...,...,...
10093,2.29,2.29,2.08,2.28
10094,2.25,2.25,2.21,2.25
10095,2.25,2.25,2.13,2.13
10096,2.11,2.11,2.11,2.11


In [45]:
# X = dfs[['Open', 'High', 'Low']]
# y = dfs['Close']

# split_point = int(0.8 * len(X))

# X_train = X.iloc[:split_point]
# X_test = X.iloc[split_point:]
# y_train = y.iloc[:split_point]
# y_test = y.iloc[split_point:] 

# print(X_train.shape, y_train.shape)
# print(X_test.shape, y_test.shape)

In [46]:
def split_sequences(sequences, n_steps):
	X, y = list(), list()
	for i in range(len(sequences)):
    # find the end of this pattern
		end_ix = i + n_steps
		# check if we are beyond the dataset
		if end_ix > len(sequences):
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
		X.append(seq_x)
		y.append(seq_y)
	return np.array(X), np.array(y)

In [47]:
X_seq, y_seq = split_sequences(dfs.to_numpy(), 10)

print (X_train_seq.shape, y_train_seq.shape)

(10089, 10, 3) (10089,)


In [48]:
split_point = int(0.8 * len(dfs))

X_train = X_seq[:split_point]
X_test = X_seq[split_point:]
y_train = y_seq[:split_point]
y_test = y_seq[split_point:] 

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(8078, 10, 3) (8078,)
(2011, 10, 3) (2011,)


-------

In [49]:
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense

model = Sequential()
model.add(SimpleRNN(units=32, input_shape=(10, 3), activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1))

model.compile(loss='mse', optimizer=tf.optimizers.Adam(learning_rate=0.001), metrics=['mse'])
model.summary()

  super().__init__(**kwargs)


In [50]:
model.fit(X_train_seq, y_train_seq,
          epochs=50)

Epoch 1/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.0529 - mse: 0.0529
Epoch 2/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0134 - mse: 0.0134
Epoch 3/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0085 - mse: 0.0085
Epoch 4/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0080 - mse: 0.0080
Epoch 5/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0073 - mse: 0.0073
Epoch 6/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0070 - mse: 0.0070
Epoch 7/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0062 - mse: 0.0062
Epoch 8/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0067 - mse: 0.0067
Epoch 9/50
[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms

<keras.src.callbacks.history.History at 0x1779b0710>

In [52]:
print (model.evaluate(X_train_seq, y_train_seq))
# model.evaluate(X_test_seq, y_train_seq)

[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0055 - mse: 0.0055
[0.004958098754286766, 0.004958098754286766]
