In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

file_path_1 = 'ndia.csv'
file_path_2 = 'Unemployment_Rate_upto_11_2020.csv'
data1 = pd.read_csv(file_path_1)
data2 = pd.read_csv(file_path_2)

data1['Date'] = pd.to_datetime(data1['Date'], errors='coerce')
data2['Date'] = pd.to_datetime(data2['Date'], errors='coerce')

data1.dropna(inplace=True)
data2.dropna(inplace=True)

plt.figure(figsize=(10, 6))
sns.lineplot(data=data1, x='Date', y='Unemployment Rate', label='Dataset 1')
sns.lineplot(data=data2, x='Date', y='Unemployment Rate', label='Dataset 2')
plt.title('Unemployment Rate Over Time')
plt.xlabel('Date')
plt.ylabel('Unemployment Rate (%)')
plt.legend()
plt.grid()
plt.show()

statewise_unemployment = data2.groupby('Region')['Unemployment Rate'].mean().sort_values()
plt.figure(figsize=(12, 8))
sns.barplot(x=statewise_unemployment.values, y=statewise_unemployment.index, palette='viridis')
plt.title('Average Unemployment Rate by Region')
plt.xlabel('Unemployment Rate (%)')
plt.ylabel('Region')
plt.show()

correlation_matrix = data2[['Unemployment Rate', 'Estimated Employed', 'Estimated Labour Participation Rate']].corr()
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.show()

monthly_avg_unemployment = data2.groupby(data2['Date'].dt.to_period('M'))['Unemployment Rate'].mean()
plt.figure(figsize=(10, 6))
monthly_avg_unemployment.plot(kind='line', marker='o', color='b')
plt.title('Monthly Average Unemployment Rate')
plt.xlabel('Month')
plt.ylabel('Unemployment Rate (%)')
plt.grid()
plt.show()

employment_stats = data2[['Estimated Employed', 'Estimated Labour Participation Rate']].describe()
print("Employment Statistics:")
print(employment_stats)

region_trends = data2.groupby(['Region', data2['Date'].dt.to_period('M')])['Unemployment Rate'].mean().unstack(0)
plt.figure(figsize=(14, 8))
region_trends.plot(figsize=(14, 8))
plt.title('Unemployment Trends by Region Over Time')
plt.xlabel('Month')
plt.ylabel('Unemployment Rate (%)')
plt.legend(title='Region', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid()
plt.show()

data2['Year'] = data2['Date'].dt.year
yearly_region_unemployment = data2.groupby(['Year', 'Region'])['Unemployment Rate'].mean().unstack()
plt.figure(figsize=(12, 8))
sns.heatmap(yearly_region_unemployment, cmap='coolwarm', annot=True, fmt='.1f', linewidths=0.5)
plt.title('Yearly Average Unemployment Rate by Region')
plt.xlabel('Region')
plt.ylabel('Year')
plt.show()

boxplot_data = data2[['Region', 'Unemployment Rate']]
plt.figure(figsize=(14, 8))
sns.boxplot(data=boxplot_data, x='Region', y='Unemployment Rate', palette='Set2')
plt.title('Distribution of Unemployment Rate by Region')
plt.xlabel('Region')
plt.ylabel('Unemployment Rate (%)')
plt.xticks(rotation=45)
plt.show()
