In [1]:
import pandas as pd
import numpy as np
from scipy.stats import linregress

In [33]:
df = pd.read_csv("F:/Air_purifier_PMF/Data/aqi.csv")
df.head()


Unnamed: 0,date,state,area,number_of_monitoring_stations,prominent_pollutants,aqi_value,air_quality_status,unit,note
0,2025-04-30,Maharashtra,Amravati,2,PM10,78,Satisfactory,number_of_monitoring_stations in Absolute Numb...,
1,2025-04-30,Bihar,Purnia,1,CO,56,Satisfactory,number_of_monitoring_stations in Absolute Numb...,
2,2025-04-30,Madhya Pradesh,Katni,1,O3,98,Satisfactory,number_of_monitoring_stations in Absolute Numb...,
3,2025-04-30,Chhattisgarh,Tumidih,1,PM10,103,Moderate,number_of_monitoring_stations in Absolute Numb...,
4,2025-04-30,Assam,Byrnihat,1,PM2.5,61,Satisfactory,number_of_monitoring_stations in Absolute Numb...,


In [35]:
# Force Date column to datetime format
df['Date'] = pd.to_datetime(df['date'],errors='coerce')

# Now we can extract year and month
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month

# Ensure month/year are integers
df['Year'] = df['Year'].astype(int)
df['Month'] = df['Month'].astype(int)

# Create a continuous month index for regression
df['MonthNumber'] = (df['Year'] - df['Year'].min()) * 12 + df['Month']

In [40]:
# 2. Calculate Baseline AQI (2022)
baseline = df[df['Year'] == 2022].groupby('area')['aqi_value'].mean()

In [42]:
# 3. Calculate Latest AQI (last 12 months)
latest_period = df[(df['Year'] == df['Year'].max()) | 
                   ((df['Year'] == df['Year'].max()-1) & (df['Month'] >= 5))]
latest = latest_period.groupby('area')['aqi_value'].mean()

In [57]:
# 4. Detect Irreversible Degradation

threshold_pct = 15  # % increase over baseline
results = []
for city in df['area'].unique():
    city_data = df[df['area'] == city].sort_values('MonthNumber')
     # Skip cities without full baseline
    if city not in baseline.index or city not in latest.index:
        continue

    base_aqi = baseline[city]
    latest_aqi = latest[city]
    pct_change = ((latest_aqi - base_aqi) / base_aqi) * 100

    # Trend slope via regression
    slope, intercept, r_value, p_value, std_err = linregress(city_data['MonthNumber'], city_data['aqi_value'])

    # No recovery check: min AQI after baseline
    after_baseline = city_data[city_data['Year'] > 2022]['aqi_value'].min()
    no_recovery = (after_baseline - base_aqi) > 0

    irreversible = (pct_change >= threshold_pct) and (slope > 0) and (p_value < 0.05)

    results.append({
        "State": city_data['state'].iloc[0],
        "City": city,
        "Baseline AQI (2022)": round(base_aqi, 2),
        "Latest AQI": round(latest_aqi, 2),
        "% Change": round(pct_change, 2),
        "Slope": round(slope, 3),
        "p-value": round(p_value, 4),
        "No Recovery": no_recovery,
        "Irreversible Degradation": irreversible
    })

In [58]:
# 5. Create Results Table

final_df = pd.DataFrame(results)
priority_cities = final_df[final_df["Irreversible Degradation"] == True]

# Save outputs
final_df.to_csv("aqi_trend_analysis.csv", index=False)
priority_cities.to_csv("priority_cities_irreversible_aqi.csv", index=False)

print("✅ Analysis complete!")
print("Total priority cities found:", len(priority_cities))
print(priority_cities[['City', 'Baseline AQI (2022)', 'Latest AQI', '% Change']])

✅ Analysis complete!
Total priority cities found: 29
                   City  Baseline AQI (2022)  Latest AQI  % Change
3                Imphal                86.10      109.51     27.19
20               Yadgir                56.67       69.82     23.20
30                 Pali               109.00      140.21     28.63
41               Aizawl                34.89       47.59     36.41
47           Davanagere                49.59       58.27     17.52
59             Guwahati                78.59      112.30     42.88
66               Howrah               100.65      121.50     20.71
70               Ratlam                88.17      111.60     26.57
76             Shillong                36.92       63.49     71.96
77               Haldia                69.12       97.66     41.29
78              Talcher                98.47      136.58     38.70
90    Rajamahendravaram                61.11       72.73     19.02
98            Gorakhpur                77.42      112.23     44.96
105      