In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from statsmodels.tsa.arima.model import ARIMA
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
import matplotlib.pyplot as plt

# === Load data ===
data = pd.read_excel("C:\\Users\\A B Siddik\\Desktop\\ARIMA\\data.xlsx")
data['Date'] = pd.to_datetime(data['Date'])
data = data[(data['Date'] >= '2021-01-04') & (data['Date'] <= '2021-09-18')].reset_index(drop=True)

# === Extract series ===
target_col = 'New COVID-19 Cases'
target_series = data[target_col].values
dates = data['Date']

# === Split ARIMA train/test ===
train_end = '2021-08-10'
test_start = '2021-08-11'
test_end = '2021-09-18'
train_arima = data[(data['Date'] <= train_end)][target_col]
test_arima = data[(data['Date'] >= test_start) & (data['Date'] <= test_end)][target_col]

# === Fit ARIMA with fixed order (6, 1, 6) ===
order = (6, 1, 6)
arima_model = ARIMA(train_arima, order=order).fit()
arima_forecast = arima_model.forecast(steps=len(test_arima))
arima_forecast.index = test_arima.index

# === Get residuals ===
arima_pred_train = arima_model.predict(start=1, end=len(train_arima)-1)
actual_train = train_arima[1:]
residuals = actual_train.values - arima_pred_train.values

# === Normalize residuals ===
scaler = MinMaxScaler()
residuals_scaled = scaler.fit_transform(residuals.reshape(-1, 1))

# === Create LSTM sequences ===
def create_sequences(data, seq_len):
    X, y = [], []
    for i in range(seq_len, len(data)):
        X.append(data[i - seq_len:i])
        y.append(data[i])
    return np.array(X), np.array(y)

seq_length = 15
x_lstm, y_lstm = create_sequences(residuals_scaled, seq_length)

# === Build LSTM model ===
model = Sequential()
model.add(Bidirectional(LSTM(64, return_sequences=True), input_shape=(seq_length, 1)))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(32)))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_lstm, y_lstm, epochs=200, batch_size=16, verbose=1)

# === Forecast LSTM residuals ===
resid_history = residuals_scaled[-seq_length:].flatten().tolist()
lstm_forecast_scaled = []

for _ in range(len(test_arima)):
    input_seq = np.array(resid_history[-seq_length:]).reshape(1, seq_length, 1)
    pred = model.predict(input_seq, verbose=0)[0][0]
    lstm_forecast_scaled.append(pred)
    resid_history.append(pred)

lstm_forecast_resid = scaler.inverse_transform(np.array(lstm_forecast_scaled).reshape(-1, 1)).flatten()

# === Combine ARIMA and LSTM ===
hybrid_forecast = arima_forecast.values + lstm_forecast_resid
actual = test_arima.values

# === Evaluate ===
mse = mean_squared_error(actual, hybrid_forecast)
mae = mean_absolute_error(actual, hybrid_forecast)
mape = np.mean(np.abs((actual - hybrid_forecast) / actual)) * 100
rmse = np.sqrt(mse)
rrmse = rmse / (np.max(actual) - np.min(actual))
nrmse = rmse / np.mean(actual)
r2 = r2_score(actual, hybrid_forecast)

print("\n\U0001F4CA Final Hybrid ARIMA + LSTM Forecast Evaluation:")
print(f"ARIMA Order Used: {order}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Relative RMSE (RRMSE): {rrmse:.4f}")
print(f"Normalized RMSE (NRMSE): {nrmse:.4f}")
print(f"R-squared (R²): {r2:.4f}")


In [None]:
# === Step 7: Rolling Forecast with Hybrid Model (Sept 12–18) ===
forecast_start_date = '2021-09-12'
forecast_end_date = '2021-09-18'
forecast_dates = pd.date_range(start=forecast_start_date, end=forecast_end_date)

# Initialize history (raw cases)
history_cases = train_arima.tolist()  # from start to 2021-08-10
resid_history = residuals_scaled[-seq_length:].flatten().tolist()

rolling_forecast = []

for _ in forecast_dates:
    # --- ARIMA Forecast ---
    temp_arima = ARIMA(history_cases, order=(6, 1, 6))
    temp_fit = temp_arima.fit()
    arima_next = temp_fit.forecast(steps=1)[0]
    history_cases.append(arima_next)

    # --- Residual Prediction with LSTM ---
    input_seq = np.array(resid_history[-seq_length:]).reshape(1, seq_length, 1)
    lstm_scaled_next = model.predict(input_seq, verbose=0)
    lstm_resid = scaler.inverse_transform(lstm_scaled_next)[0][0]
    resid_history.append(lstm_scaled_next[0][0])

    # --- Combine ARIMA + LSTM ---
    final_forecast = arima_next + lstm_resid
    rolling_forecast.append(final_forecast)

# === Step 8: Output Forecast DataFrame ===
hybrid_forecast_df = pd.DataFrame({
    'Date': forecast_dates,
    'Forecast': np.clip(np.round(rolling_forecast), a_min=0, a_max=None).astype(int)
})
hybrid_forecast_df.set_index('Date', inplace=True)

# === Display Forecast ===
print("\n📅 Hybrid Forecast (2021-09-12 to 2021-09-18):")
print(hybrid_forecast_df)



In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Define the dates
dates = pd.date_range(start='2021-09-12', end='2021-09-18')

# Define the vectors
new_cases_vector_actual_data = [19550, 19198, 16073, 15669, 19495, 18815, 17577]
forecast_vector_combined_no_exo =[17806, 18832, 19763, 20292, 20106, 19318, 18672]
# Plot the data
plt.figure(figsize=(14, 8))
plt.plot(dates, new_cases_vector_actual_data, label='Actual New Cases', marker='o')
plt.plot(dates, forecast_vector_combined_no_exo, label='Hybrid ARIMA-LSTM prediction', marker='o')


# Add title and labels
plt.title('Active Cases: Actual vs. Forecasted')
plt.xlabel('Date')
plt.ylabel('Number of Cases')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()

# Show plot
plt.show()