In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [None]:
data = pd.read_csv('FINPROJ_rev.csv')

# Remove rows with NaN values
data = data.dropna()

# Select only integer features
data = data.select_dtypes(exclude=['object']).copy()

In [None]:
# Extract input and output data
X = data.iloc[:-1, :].values
y = data.iloc[1:, :].values

# Scale the data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = scaler.transform(y)

# Reshape the data to match LSTM input shape [samples, time steps, features]
X_reshaped = np.reshape(X_scaled, (X_scaled.shape[0], 1, X_scaled.shape[1]))
y_reshaped = np.reshape(y_scaled, (y_scaled.shape[0], y_scaled.shape[1]))

# Split the data into training and test sets
test_size = int(len(X) * 0.02) # 20% of data as test set
X_train, X_test, y_train, y_test = X_reshaped[:-test_size], X_reshaped[-test_size:], y_reshaped[:-test_size], y_reshaped[-test_size:]
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
model = Sequential()

# Add LSTM layers
model.add(LSTM(32, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(64))
model.add(Dense(128))
model.add(Dense(64))
model.add(LSTM(32))

# Add output layer
model.add(Dense(y_train.shape[1]))

# Compile the model
model.compile(optimizer='adam', loss='mse')

In [None]:
model.fit(X_train, y_train, epochs=10, batch_size=1)

In [None]:
loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss:.4f}')

In [None]:
y_pred = model.predict(X_train)
y_pred_actual = scaler.inverse_transform(y_pred)
y_test_actual = scaler.inverse_transform(y_train)
import matplotlib.pyplot as plt

# Assuming your dataset has multiple features, you can choose one to plot
for feature_index in range(len(data.columns)):
	plt.figure(figsize=(10, 6))
	plt.plot(y_test_actual[:, feature_index], label='Original Data')
	plt.plot(y_pred_actual[:, feature_index], label='Predicted Data')
	plt.xlabel('Time Step')
	plt.ylabel('Value')
	plt.title(f'Predicted vs Original Data for Feature {data.columns[feature_index]}')
	plt.legend()
	plt.grid(True)
	plt.show()

In [None]:
from tensorflow.keras.models import load_model
model = load_model('tmp1.h5')

In [None]:
model.save('tmp1.h5')