<a href="https://colab.research.google.com/github/thomasino2000/CSIT883_first-project/blob/main/LSTM_new_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Load and preprocess data (replace 'your_data.csv' with your actual dataset)
data = pd.read_csv('XFJ_combined_output.csv')
data

In [None]:
# Load and preprocess data
data = pd.read_csv('XFJ_combined_output.csv')

# Get actual column names and convert to lowercase for case-insensitive comparison
actual_columns = [col.lower() for col in data.columns]

# Update features list based on actual column names
features = ['open', 'high', 'low', 'last']  # Original desired features
features = [f for f in features if f in actual_columns]  # Features present in data

# If needed, adjust features to match actual column names exactly
features = [data.columns[actual_columns.index(f)] for f in features]

# Now use the corrected features list:
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data[features])
data_scaled

In [None]:
# Prepare training and test datasets
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length, :-1])  # All features except 'last'
        y.append(data[i + seq_length, -1])    # 'last' as target
    return np.array(X), np.array(y)

sequence_length = 50  # Number of time steps
X, y = create_sequences(data_scaled, sequence_length)
print(f"X has the values: {X}")
print(f"y has the values: {y}")

In [None]:
# Split into training and testing sets
split_ratio = 0.8
train_size = int(len(X) * split_ratio)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
print(f"X_train has the values: {X_train}")
print(f"X_test has the values: {X_test}")
print(f"y_train has the values: {y_train}")
print(f"y_test has the values: {y_test}")

In [None]:
# Build the LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    LSTM(50, return_sequences=False),
    Dense(25, activation='relu'),
    Dense(1)  # Predicting 'last'
])

model.compile(optimizer='adam', loss='mean_squared_error')
model
print(model.summary())

In [None]:
# Train the model
epochs = 20
batch_size = 32
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, batch_size=batch_size, verbose=1)
print(history)

In [None]:
# Make predictions
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(
    np.concatenate((np.zeros((predictions.shape[0], len(features) - 1)), predictions), axis=1)
)[:, -1]
y_test_actual = scaler.inverse_transform(
    np.concatenate((np.zeros((y_test.shape[0], len(features) - 1)), y_test.reshape(-1, 1)), axis=1)
)[:, -1]
print(predictions)
print(y_test_actual)

In [None]:
# Calculate metrics
mae = mean_absolute_error(y_test_actual, predictions)
mse = mean_squared_error(y_test_actual, predictions)
mape = np.mean(np.abs((y_test_actual - predictions) / y_test_actual)) * 100
r2 = r2_score(y_test_actual, predictions)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"MAPE: {mape}%")
print(f"R2 Score: {r2}")

In [None]:
# Plot the results
time_range = range(len(y_test_actual))
plt.figure(figsize=(12, 6))
plt.plot(time_range, y_test_actual, color='green', label='Actual Price')
plt.plot(time_range, predictions, color='red', label='Predicted Price')
plt.title('Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()