In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import plotly.express as px

# Error handling for data loading
print(" Loading the human stories behind the numbers...")
try:
    unemployment_data = pd.read_csv('Unemployment_in_India.csv')
    print(" Successfully loaded employment stories of {} individuals".format(len(unemployment_data)))
except FileNotFoundError:
    print(" Oops! Couldn't find the dataset. Please download from:")
    print(" https://www.kaggle.com/datasets/gokulrajkmv/unemployment-in-india")
    exit()

print("\n Sanitizing the data...")
unemployment_data['Date'] = pd.to_datetime(unemployment_data['Date'])

original_size = len(unemployment_data)
clean_data = unemployment_data.dropna().copy()
cleaned_count = original_size - len(clean_data)
print(f" Cleaned {cleaned_count} incomplete records ({cleaned_count/original_size:.1%} of total)")

clean_data['Year'] = clean_data['Date'].dt.year
clean_data['Month'] = clean_data['Date'].dt.month_name()
clean_data['Region'] = clean_data['Region'].str.title()  # Proper capitalization

#  Visualizating the Human Impact 
plt.style.use('seaborn')
plt.rcParams['font.family'] = 'DejaVu Sans'

print("\n Painting the picture of India's employment landscape...")

def create_humanized_plots(data):
    """Visualize the human impact of unemployment"""
    
    # Fig 1: The Big Picture
    fig, ax = plt.subplots(figsize=(14, 7))
    yearly_avg = data.groupby('Year')['Estimated Unemployment Rate (%)'].mean()
    yearly_avg.plot(
        kind='line', 
        marker='o', 
        color='crimson', 
        linewidth=2.5,
        ax=ax
    )
    
    # Highlight COVID period
    ax.axvspan(2020, 2021, color='salmon', alpha=0.3, label='COVID-19 Period')
    ax.set_title(' India\'s Unemployment Journey: 2019-2022', pad=20, fontsize=16)
    ax.set_xlabel('Year', labelpad=10)
    ax.set_ylabel('Unemployment Rate (%)', labelpad=10)
    ax.legend()
    plt.tight_layout()
    plt.show()
    
    # Fig 2: Regional Impact
    plt.figure(figsize=(14, 8))
    regional_data = data.groupby('Region')['Estimated Unemployment Rate (%)'].mean().sort_values()
    colors = ['#2ecc71' if x < 10 else '#f39c12' if x < 15 else '#e74c3c' for x in regional_data]
    
    bars = plt.barh(
        regional_data.index, 
        regional_data.values,
        color=colors,
        alpha=0.7
    )
    
    # Add value labels
    for bar in bars:
        width = bar.get_width()
        plt.text(
            width + 0.5, 
            bar.get_y() + bar.get_height()/2,
            f'{width:.1f}%',
            va='center'
        )
    
    plt.title('The Regional Divide: Unemployment Across States', pad=20, fontsize=16)
    plt.xlabel('Average Unemployment Rate (%)', labelpad=10)
    plt.grid(axis='x', alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Fig 3: The Seasonal Struggle
    seasonal_pattern = data.groupby(['Month', 'Year'])['Estimated Unemployment Rate (%)'].mean().reset_index()
    months_order = ['January', 'February', 'March', 'April', 'May', 'June', 
                   'July', 'August', 'September', 'October', 'November', 'December']
    
    plt.figure(figsize=(14, 7))
    sns.lineplot(
        data=seasonal_pattern,
        x='Month',
        y='Estimated Unemployment Rate (%)',
        hue='Year',
        palette='viridis',
        sort=False,
        marker='o',
        markersize=8,
        linewidth=2.5
    )
    
    plt.title('The Seasonal Employment Cycle', pad=20, fontsize=16)
    plt.xticks(range(12), months_order, rotation=45)
    plt.xlabel('')
    plt.ylabel('Unemployment Rate (%)', labelpad=10)
    plt.legend(title='Year', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()

# Run the visualization
create_humanized_plots(clean_data)

# COVID-19 Shock Analysis
print("\n Analyzing the pandemic's devastating impact on livelihoods...")

def covid_impact_analysis(data):
    """Show the before/after COVID comparison"""
    
    pre_covid = data[data['Year'] == 2019]
    covid_period = data[data['Year'] == 2020]
    
    fig, axes = plt.subplots(1, 2, figsize=(18, 7))
    
    # Before COVID
    sns.boxplot(
        y='Estimated Unemployment Rate (%)', 
        data=pre_covid, 
        color='lightblue',
        ax=axes[0]
    )
    axes[0].set_title('The Calm Before the Storm (2019)', pad=15)
    
    # During COVID
    sns.boxplot(
        y='Estimated Unemployment Rate (%)', 
        data=covid_period, 
        color='lightcoral',
        ax=axes[1]
    )
    axes[1].set_title('The COVID Catastrophe (2020)', pad=15)
    
    plt.suptitle('Employment Before and After COVID-19 Pandemic', y=1.02, fontsize=16)
    plt.tight_layout()
    plt.show()
    
    # Calculate the human cost
    avg_unemployment_before = pre_covid['Estimated Unemployment Rate (%)'].mean()
    avg_unemployment_during = covid_period['Estimated Unemployment Rate (%)'].mean()
    increase = avg_unemployment_during - avg_unemployment_before
    
    print(f"\n The human cost: Unemployment rose from {avg_unemployment_before:.1f}% to {avg_unemployment_during:.1f}%")
    print(f" That's a staggering {increase:.1f} percentage point increase during COVID-19")

covid_impact_analysis(clean_data)

# Generating Policy Insights
print("\n🔍 Discovering insights for a better tomorrow...")

def generate_insights(data):
    """Generate human-centered policy recommendations"""
    
    # 1. Identify worst-affected regions
    worst_regions = data.groupby('Region')['Estimated Unemployment Rate (%)'] \
                      .mean().sort_values(ascending=False).head(3)
    
    # 2. Find seasonal patterns
    seasonal_peaks = data.groupby('Month')['Estimated Unemployment Rate (%)'] \
                       .mean().sort_values(ascending=False).head(2)
    
    print("\n Key Insights:")
    print(f"1. The most affected regions are {', '.join(worst_regions.index)} "
          f"with {worst_regions.values[0]:.1f}% unemployment")
    print(f"2. Employment dips sharply in {seasonal_peaks.index[0]} and {seasonal_peaks.index[1]}")
    
    print("\n Policy Recommendations:")
    print("-  Targeted skill development programs in most affected regions")
    print("-  Seasonal employment guarantee schemes during peak unemployment months")
    print("-  SME support packages to boost local employment")
    print("-  Agricultural sector support during lean seasons")
    print("-  Urban employment guarantee for informal sector workers")

generate_insights(clean_data)

# Interactive Exploration 
print("\n Launching interactive exploration...")
try:
    fig = px.choropleth(
        clean_data,
        geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
        locations='Region',
        featureidkey='properties.ST_NM',
        color='Estimated Unemployment Rate (%)',
        animation_frame='Year',
        color_continuous_scale='reds',
        range_color=(0, 30),
        title='The Changing Face of Unemployment Across India'
    )
    fig.update_geos(fitbounds="locations", visible=False)
    fig.show()
except:

print("\n Analysis complete! Each percentage point represents thousands of Indian lives.")
