In [None]:
# Required Libraries
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import STL
from scipy.stats import ttest_ind

# Step 1: Load and Clean Data
df = pd.read_csv("unemployment.csv")  # Make sure this file is in your working directory

# Clean column names
df.columns = df.columns.str.lower().str.strip().str.replace(" ", "_")

# Filter for India as a whole
df["date"] = pd.to_datetime(df["date"])
india = df[df["region"] == "India"].copy()
india = india.drop_duplicates("date").sort_values("date").set_index("date")
india = india.asfreq("MS")  # Ensure consistent monthly data
india.interpolate(inplace=True)

# Step 2: Line Plot of Unemployment Rate
plt.figure(figsize=(10, 4))
india["estimated_unemployment_rate_%"].plot(color='orange')
plt.title("Monthly Unemployment Rate in India")
plt.xlabel("Date")
plt.ylabel("Unemployment Rate (%)")
plt.grid(True)
plt.tight_layout()
plt.show()

# Step 3: STL Decomposition (Trend, Seasonality, Residual)
series = india["estimated_unemployment_rate_%"]
stl = STL(series, period=12, robust=True).fit()
stl.plot()
plt.suptitle("STL Decomposition of Unemployment Rate", fontsize=14)
plt.tight_layout()
plt.show()

# Step 4: Quantifying Covid-19 Impact
pre_covid = series["2018-01":"2019-12"]
covid_year = series["2020"]
post_covid = series["2021-01":"2024-12"]

# Year-wise average comparison
print("\n--- Average Unemployment Rate ---")
print(f"Pre-Covid (2018–2019): {pre_covid.mean():.2f}%")
print(f"Covid Year (2020): {covid_year.mean():.2f}%")
print(f"Post-Covid (2021–2024): {post_covid.mean():.2f}%")

# T-test between Pre-Covid and Covid Year
t_stat, p_val = ttest_ind(covid_year, pre_covid, equal_var=False)
print("\n--- T-Test Results ---")
print(f"T-statistic: {t_stat:.2f}")
print(f"P-value: {p_val:.4f}")
if p_val < 0.05:
    print("➡ Significant increase in unemployment due to Covid-19.")
else:
    print("➡ No statistically significant difference.")

# Step 5: Optional – Save Decomposition Components to CSV
stl_df = pd.DataFrame({
    "trend": stl.trend,
    "seasonal": stl.seasonal,
    "resid": stl.resid
}, index=series.index)
stl_df.to_csv("unemployment_decomposition.csv")

print("\n✅ Analysis Complete.")
