In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib.pyplot as plt

# === Load and preprocess data ===
data = pd.read_excel("C:\\Users\\A B Siddik\\Desktop\\ARIMA\\data.xlsx")
data['Date'] = pd.to_datetime(data['Date'], dayfirst=True)
data = data[(data['Date'] >= '2021-01-04') & (data['Date'] <= '2021-09-18')].reset_index(drop=True)

target_col = 'New COVID-19 Cases'
target_series = data[[target_col, 'Date']].copy()

# === Define date-based splits (same as ARIMA) ===
train_data = target_series[(target_series['Date'] <= '2021-08-10')]
test_data = target_series[(target_series['Date'] >= '2021-08-11') & (target_series['Date'] <= '2021-09-18')]

# === Normalize based on training only ===
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train_data[[target_col]])
test_scaled = scaler.transform(test_data[[target_col]])

# === Create sequences ===
def create_sequences(data, seq_len):
    X, y = [], []
    for i in range(seq_len, len(data)):
        X.append(data[i - seq_len:i])
        y.append(data[i])
    return np.array(X), np.array(y)

seq_length = 10
x_train, y_train = create_sequences(train_scaled, seq_length)
x_test, y_test = create_sequences(test_scaled, seq_length)

# === Build and train LSTM model ===
model = Sequential()
model.add(LSTM(64, input_shape=(seq_length, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

model.fit(x_train, y_train, epochs=200, batch_size=16, verbose=1)

# === Predict on test set ===
y_pred = model.predict(x_test)

# === Inverse scale predictions and true values ===
y_pred_rescaled = scaler.inverse_transform(y_pred)
y_test_rescaled = scaler.inverse_transform(y_test)

# === Evaluate LSTM model ===
mse = mean_squared_error(y_test_rescaled, y_pred_rescaled)
mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
mape = np.mean(np.abs((y_test_rescaled - y_pred_rescaled) / y_test_rescaled)) * 100
rmse = np.sqrt(mse)
rrmse = rmse / (np.max(y_test_rescaled) - np.min(y_test_rescaled))
nrmse = rmse / np.mean(y_test_rescaled)
r2 = r2_score(y_test_rescaled, y_pred_rescaled)

print("\n📊 Final LSTM Forecast Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Relative RMSE (RRMSE): {rrmse:.4f}")
print(f"Normalized RMSE (NRMSE): {nrmse:.4f}")
print(f"R-squared (R²): {r2:.4f}")


In [None]:
# === Define forecast horizon (same as your ARIMA example) ===
forecast_horizon = pd.date_range(start='2021-09-12', end='2021-09-18')

# === Start with the last known sequence from test data ===
last_sequence = test_scaled[-seq_length:]  # shape (10, 1)

forecast_scaled = []

for _ in range(len(forecast_horizon)):
    # Reshape to (1, seq_len, 1) for LSTM input
    input_seq = last_sequence.reshape((1, seq_length, 1))

    # Predict next value
    next_scaled = model.predict(input_seq, verbose=0)[0][0]

    # Append prediction
    forecast_scaled.append([next_scaled])

    # Update sequence: drop first, append prediction
    last_sequence = np.vstack((last_sequence[1:], [[next_scaled]]))

# Inverse transform forecasted values
forecast_scaled = np.array(forecast_scaled)
forecast_rescaled = scaler.inverse_transform(forecast_scaled)

# Convert to Pandas Series with date index
lstm_forecast_series = pd.Series(forecast_rescaled.flatten(), index=forecast_horizon)

print("\n📈 LSTM Rolling Forecast:")
print(lstm_forecast_series)


In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Define the dates
dates = pd.date_range(start='2021-09-12', end='2021-09-18')

# Define the vectors
new_cases_vector_actual_data = [19550, 19198, 16073, 15669, 19495, 18815, 17577]
forecast_vector_lstm_no_exo = [18444.546875, 19154.939453, 19460.316406, 19636.972656, 19800.009766, 19999.65625,  20213.628906]

# Plot the data
plt.figure(figsize=(14, 8))
plt.plot(dates, new_cases_vector_actual_data, label='Actual New Cases', marker='o')
plt.plot(dates, forecast_vector_lstm_no_exo, label='LSTM prediction', marker='o')


# Add title and labels
plt.title('Active Cases: Actual vs. Forecasted')
plt.xlabel('Date')
plt.ylabel('Number of Cases')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()

# Show plot
plt.show()