# Historical AQI Data Analysis

This notebook demonstrates how to download, analyze, and visualize historical Air Quality Index (AQI) data for Indian cities.

In [None]:
from cpcbfetch import AQIClient
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better-looking plots
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Downloading Historical Data

Let's download AQI data for a city (e.g., Delhi) for analysis.

In [None]:
# Initialize AQI client
client = AQIClient()

# Download city-level data for Delhi in 2024
# Note: Uncomment the following lines to actually download data

# city = "Delhi"
# year = "2024"
# output_file = f"{city.lower()}_aqi_{year}.csv"

# client.download_past_year_AQI_data_cityLevel(city, year, output_file)
# print(f"Data downloaded to {output_file}")

## 2. Loading and Exploring the Data

Load the downloaded CSV file and explore its structure.

In [None]:
# Load the data
# df = pd.read_csv(output_file)

# For demonstration purposes, let's create sample data structure
# Uncomment the above line when you have actual data

print("Dataset Information:")
# print(f"Shape: {df.shape}")
# print(f"\nColumns: {df.columns.tolist()}")
# print(f"\nData types:\n{df.dtypes}")
# print(f"\nFirst few rows:")
# df.head()

## 3. Data Preprocessing

Clean and prepare the data for analysis.

In [None]:
# Convert date columns to datetime
# df['Date'] = pd.to_datetime(df['Date'])

# Sort by date
# df = df.sort_values('Date')

# Handle missing values
# print(f"\nMissing values:\n{df.isnull().sum()}")

# Fill or drop missing values as appropriate
# df = df.dropna(subset=['AQI'])  # Drop rows where AQI is missing

## 4. Statistical Summary

Get basic statistics about the AQI data.

In [None]:
# Statistical summary
# print("AQI Statistics:")
# print(df['AQI'].describe())

# print("\nAQI Category Distribution:")
# print(df['AQI_Category'].value_counts())

## 5. Time Series Visualization

Plot AQI trends over time.

In [None]:
# Plot AQI over time
# plt.figure(figsize=(14, 6))
# plt.plot(df['Date'], df['AQI'], linewidth=1, alpha=0.7)
# plt.xlabel('Date')
# plt.ylabel('AQI')
# plt.title(f'Air Quality Index Trend for {city} ({year})')
# plt.xticks(rotation=45)
# plt.grid(True, alpha=0.3)
# plt.tight_layout()
# plt.show()

## 6. Monthly Analysis

Analyze AQI patterns by month.

In [None]:
# Extract month from date
# df['Month'] = df['Date'].dt.month_name()

# Calculate monthly average AQI
# monthly_avg = df.groupby('Month')['AQI'].mean().sort_values(ascending=False)

# print("Average AQI by Month:")
# print(monthly_avg)

# Visualize monthly averages
# plt.figure(figsize=(12, 6))
# monthly_avg.plot(kind='bar', color='steelblue')
# plt.xlabel('Month')
# plt.ylabel('Average AQI')
# plt.title(f'Average AQI by Month - {city} ({year})')
# plt.xticks(rotation=45)
# plt.tight_layout()
# plt.show()

## 7. Pollutant Analysis

Analyze individual pollutant levels if available in the data.

In [None]:
# Pollutants to analyze
# pollutants = ['PM2.5', 'PM10', 'NO2', 'SO2', 'CO', 'O3']

# # Check which pollutants are available
# available_pollutants = [p for p in pollutants if p in df.columns]

# if available_pollutants:
#     # Plot pollutant trends
#     fig, axes = plt.subplots(len(available_pollutants), 1, figsize=(14, 4*len(available_pollutants)))
#     
#     for idx, pollutant in enumerate(available_pollutants):
#         ax = axes[idx] if len(available_pollutants) > 1 else axes
#         ax.plot(df['Date'], df[pollutant], linewidth=1, alpha=0.7)
#         ax.set_xlabel('Date')
#         ax.set_ylabel(f'{pollutant} (μg/m³)')
#         ax.set_title(f'{pollutant} Levels Over Time')
#         ax.grid(True, alpha=0.3)
#     
#     plt.tight_layout()
#     plt.show()

## 8. AQI Category Distribution

Visualize the distribution of AQI categories.

In [None]:
# Create a pie chart of AQI categories
# category_counts = df['AQI_Category'].value_counts()

# plt.figure(figsize=(10, 8))
# plt.pie(category_counts.values, labels=category_counts.index, autopct='%1.1f%%', startangle=90)
# plt.title(f'AQI Category Distribution - {city} ({year})')
# plt.axis('equal')
# plt.show()

## 9. Identifying Worst Days

Find the days with worst air quality.

In [None]:
# Get top 10 worst AQI days
# worst_days = df.nlargest(10, 'AQI')[['Date', 'AQI', 'AQI_Category']]
# print("Top 10 Worst Air Quality Days:")
# print(worst_days)

# Get top 10 best AQI days
# best_days = df.nsmallest(10, 'AQI')[['Date', 'AQI', 'AQI_Category']]
# print("\nTop 10 Best Air Quality Days:")
# print(best_days)

## 10. Export Analysis Results

Save the analysis results for future reference.

In [None]:
# Create summary statistics
# summary = {
#     'City': city,
#     'Year': year,
#     'Total Days': len(df),
#     'Average AQI': df['AQI'].mean(),
#     'Max AQI': df['AQI'].max(),
#     'Min AQI': df['AQI'].min(),
#     'Std Dev': df['AQI'].std()
# }

# print("\nAnalysis Summary:")
# for key, value in summary.items():
#     print(f"{key}: {value}")

# Save to JSON
# import json
# with open(f'{city.lower()}_analysis_{year}.json', 'w') as f:
#     json.dump(summary, f, indent=2)
# print(f"\nAnalysis saved to {city.lower()}_analysis_{year}.json")

## Conclusion

This notebook demonstrated how to:
- Download historical AQI data
- Perform basic data analysis
- Visualize trends and patterns
- Identify critical pollution days
- Export analysis results

For more examples, check out the other notebooks in this series!