In [2]:
import os
import json
import yaml
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import IsolationForest
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import tensorflow as tf

# ============================================
# PATHS
# ============================================

DATA_PATH = r"C:\Users\NXTWAVE\Downloads\Pandemic Impact Analysis Model\IC_2.csv"
OUTPUT_DIR = r"C:\Users\NXTWAVE\Downloads\Pandemic Impact Analysis Model"

os.makedirs(OUTPUT_DIR, exist_ok=True)

# ============================================
# LOAD DATA
# ============================================

df = pd.read_csv(DATA_PATH)

df_melted = df.melt(
    id_vars=["SN", "State", "District"],
    value_vars=["2017", "2018", "2019", "2020", "2021", "2022"],
    var_name="Year",
    value_name="Value"
)

df_melted["Year"] = df_melted["Year"].astype(int)

# ============================================
# PRE vs PANDEMIC ANALYSIS
# ============================================

pre = df_melted[df_melted["Year"] <= 2019]
post = df_melted[df_melted["Year"] >= 2020]

pre_avg = pre.groupby("District")["Value"].mean()
post_avg = post.groupby("District")["Value"].mean()

impact_df = pd.DataFrame({
    "Pre_Pandemic_Avg": pre_avg,
    "Pandemic_Avg": post_avg
})

impact_df["Percent_Change"] = (
    (impact_df["Pandemic_Avg"] - impact_df["Pre_Pandemic_Avg"]) /
    impact_df["Pre_Pandemic_Avg"]
) * 100

impact_df.to_csv(os.path.join(OUTPUT_DIR, "pandemic_structural_change.csv"))

# ============================================
# STRUCTURAL BREAK REGRESSION
# ============================================

df_melted["Pandemic_Dummy"] = (df_melted["Year"] >= 2020).astype(int)

X = df_melted[["Year", "Pandemic_Dummy"]]
y = df_melted["Value"]

reg_model = LinearRegression()
reg_model.fit(X, y)

r2 = r2_score(y, reg_model.predict(X))

# Save regression model
joblib.dump(reg_model, os.path.join(OUTPUT_DIR, "pandemic_model.pkl"))

# ============================================
# ANOMALY DETECTION
# ============================================

iso = IsolationForest(contamination=0.1, random_state=42)
df_melted["Anomaly"] = iso.fit_predict(df_melted[["Value"]])

# ============================================
# LSTM MODEL
# ============================================

scaler = MinMaxScaler()
values_scaled = scaler.fit_transform(df_melted[["Value"]])

X_lstm = []
y_lstm = []

for i in range(len(values_scaled) - 1):
    X_lstm.append(values_scaled[i])
    y_lstm.append(values_scaled[i + 1])

X_lstm = np.array(X_lstm)
y_lstm = np.array(y_lstm)

X_lstm = X_lstm.reshape((X_lstm.shape[0], 1, 1))

model = Sequential([
    LSTM(50, activation='relu', input_shape=(1, 1)),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')

model.fit(X_lstm, y_lstm, epochs=50, verbose=0)

# ============================================
# SAVE MODELS (MODERN WAY)
# ============================================

# 1️⃣ Save modern Keras format
model.save(os.path.join(OUTPUT_DIR, "pandemic_model.keras"))

# 2️⃣ Save JSON architecture
model_json = model.to_json()
with open(os.path.join(OUTPUT_DIR, "pandemic_model.json"), "w") as json_file:
    json_file.write(model_json)

# 3️⃣ Save safe YAML manually
yaml_data = {
    "model_type": "LSTM",
    "layers": [
        {"type": "LSTM", "units": 50, "activation": "relu"},
        {"type": "Dense", "units": 1}
    ],
    "optimizer": "adam",
    "loss": "mse"
}

with open(os.path.join(OUTPUT_DIR, "pandemic_model.yaml"), "w") as f:
    yaml.dump(yaml_data, f)

# ============================================
# SAVE PREDICTIONS
# ============================================

pred = model.predict(X_lstm)
pred_rescaled = scaler.inverse_transform(pred)

prediction_output = {
    "R2_score_regression": float(r2),
    "Sample_predictions": pred_rescaled[:10].flatten().tolist()
}

with open(os.path.join(OUTPUT_DIR, "pandemic_predictions.json"), "w") as f:
    json.dump(prediction_output, f, indent=4)

# ============================================
# VISUALIZATIONS
# ============================================

plt.figure()
plt.plot(df_melted["Year"], df_melted["Value"], marker='o')
plt.axvline(x=2020)
plt.title("Trend with Pandemic Structural Break")
plt.savefig(os.path.join(OUTPUT_DIR, "trend_analysis.png"))
plt.close()

plt.figure()
plt.bar(impact_df.index, impact_df["Percent_Change"])
plt.xticks(rotation=90)
plt.title("District-wise Pandemic Impact (%)")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "district_impact.png"))
plt.close()

print("✅ Model Saved Successfully (No YAML Error, Modern Format Used)")


✅ Model Saved Successfully (No YAML Error, Modern Format Used)
