In [1]:
import os
import json
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from statsmodels.tsa.arima.model import ARIMA

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# ============================================
# PATH CONFIG
# ============================================

DATA_PATH = r"C:\Users\NXTWAVE\Downloads\Pandemic Impact Analysis Model\IC_2.csv"
OUTPUT_DIR = r"C:\Users\NXTWAVE\Downloads\Pandemic Impact Analysis Model"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ============================================
# LOAD & TRANSFORM DATA
# ============================================

df = pd.read_csv(DATA_PATH)

df_melted = df.melt(
    id_vars=["SN", "State", "District"],
    value_vars=["2017", "2018", "2019", "2020", "2021", "2022"],
    var_name="Year",
    value_name="Value"
)

df_melted["Year"] = df_melted["Year"].astype(int)

# Aggregate yearly
yearly = df_melted.groupby("Year")["Value"].mean().reset_index()

# Train/Test split (2017-2020 train, 2021-2022 test)
train = yearly[yearly["Year"] <= 2020]
test = yearly[yearly["Year"] > 2020]

X_train = train[["Year"]]
y_train = train["Value"]

X_test = test[["Year"]]
y_test = test["Value"]

# ============================================
# 1Ô∏è‚É£ LINEAR REGRESSION
# ============================================

reg = LinearRegression()
reg.fit(X_train, y_train)
reg_pred = reg.predict(X_test)

reg_rmse = np.sqrt(mean_squared_error(y_test, reg_pred))
reg_r2 = r2_score(y_test, reg_pred)

joblib.dump(reg, os.path.join(OUTPUT_DIR, "pandemic_model.pkl"))

# ============================================
# 2Ô∏è‚É£ ARIMA MODEL
# ============================================

arima_model = ARIMA(train["Value"], order=(1,1,1)).fit()
arima_pred = arima_model.forecast(steps=len(test))

arima_rmse = np.sqrt(mean_squared_error(y_test, arima_pred))
arima_r2 = r2_score(y_test, arima_pred)

# ============================================
# 3Ô∏è‚É£ LSTM MODEL
# ============================================

scaler = MinMaxScaler()
scaled = scaler.fit_transform(yearly[["Value"]])

X_lstm = []
y_lstm = []

for i in range(len(scaled)-1):
    X_lstm.append(scaled[i])
    y_lstm.append(scaled[i+1])

X_lstm = np.array(X_lstm)
y_lstm = np.array(y_lstm)

X_lstm = X_lstm.reshape((X_lstm.shape[0], 1, 1))

model = Sequential([
    LSTM(50, activation='relu', input_shape=(1,1)),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')
model.fit(X_lstm[:-2], y_lstm[:-2], epochs=50, verbose=0)

lstm_pred_scaled = model.predict(X_lstm[-2:])
lstm_pred = scaler.inverse_transform(lstm_pred_scaled)

lstm_rmse = np.sqrt(mean_squared_error(y_test, lstm_pred))
lstm_r2 = r2_score(y_test, lstm_pred)

model.save(os.path.join(OUTPUT_DIR, "pandemic_model.keras"))

# ============================================
# SAVE RESULTS CSV
# ============================================

results_df = pd.DataFrame({
    "Model": ["Linear Regression", "ARIMA", "LSTM"],
    "RMSE": [reg_rmse, arima_rmse, lstm_rmse],
    "R2": [reg_r2, arima_r2, lstm_r2]
})

results_df.to_csv(os.path.join(OUTPUT_DIR, "pandemic_results.csv"), index=False)

# ============================================
# SAVE PREDICTIONS JSON
# ============================================

prediction_dict = {
    "Linear Regression": reg_pred.tolist(),
    "ARIMA": arima_pred.tolist(),
    "LSTM": lstm_pred.flatten().tolist()
}

with open(os.path.join(OUTPUT_DIR, "pandemic_predictions.json"), "w") as f:
    json.dump(prediction_dict, f, indent=4)

# ============================================
# üìä ACCURACY GRAPH
# ============================================

plt.figure()
plt.bar(results_df["Model"], results_df["RMSE"])
plt.title("Model RMSE Comparison")
plt.savefig(os.path.join(OUTPUT_DIR, "accuracy_graph.png"))
plt.close()

# ============================================
# üî• HEATMAP
# ============================================

corr = df_melted.pivot_table(
    index="District",
    columns="Year",
    values="Value"
).corr()

plt.figure()
sns.heatmap(corr, annot=True, cmap="coolwarm")
plt.title("Year Correlation Heatmap")
plt.savefig(os.path.join(OUTPUT_DIR, "heatmap.png"))
plt.close()

# ============================================
# üìà MODEL COMPARISON GRAPH
# ============================================

plt.figure()
plt.plot(results_df["Model"], results_df["R2"], marker='o')
plt.title("Model R2 Comparison")
plt.savefig(os.path.join(OUTPUT_DIR, "model_comparison.png"))
plt.close()

# ============================================
# üìä PREDICTION GRAPH
# ============================================

plt.figure()
plt.plot(test["Year"], y_test, label="Actual")
plt.plot(test["Year"], reg_pred, label="Regression")
plt.plot(test["Year"], arima_pred, label="ARIMA")
plt.plot(test["Year"], lstm_pred, label="LSTM")
plt.legend()
plt.title("Prediction Comparison")
plt.savefig(os.path.join(OUTPUT_DIR, "prediction_graph.png"))
plt.close()

# ============================================
# üìä RESULT TREND GRAPH
# ============================================

plt.figure()
plt.plot(yearly["Year"], yearly["Value"], marker='o')
plt.axvline(x=2020)
plt.title("Pandemic Structural Break Trend")
plt.savefig(os.path.join(OUTPUT_DIR, "result_trend_graph.png"))
plt.close()

print("‚úÖ All Results Generated Successfully!")





  warn('Too few observations to estimate starting parameters%s.'





‚úÖ All Results Generated Successfully!
