In [4]:
!pip install schedule


Defaulting to user installation because normal site-packages is not writeable
Collecting schedule
  Downloading schedule-1.2.2-py3-none-any.whl.metadata (3.8 kB)
Downloading schedule-1.2.2-py3-none-any.whl (12 kB)
Installing collected packages: schedule
Successfully installed schedule-1.2.2


In [1]:
# ==============================================================
# MULTI-CITY AIR QUALITY FORECASTING SYSTEM (GitHub Version)
# Delhi | Bangalore | Hualien
# Runs daily via GitHub Actions (Free Cloud Automation)
# ==============================================================

import os
import requests
import pandas as pd
import datetime
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# ==============================================================
# 1Ô∏è CONFIGURATION
# ==============================================================

CITIES = ["delhi", "bangalore", "hualien"]
TOKEN = "639fc5d65c4d0869e99011f0082b1322ed951f76"  # your AQICN token

BASE_PATH = os.path.dirname(os.path.abspath(__file__))
KAGGLE_PATH = os.path.join(BASE_PATH, "data_date.csv")
LIVE_PATH = os.path.join(BASE_PATH, "air_quality_live_multi.csv")
FORECAST_PATH = os.path.join(BASE_PATH, "kaggle_forecast.csv")

# ==============================================================
# 2Ô∏è LOAD KAGGLE DATA
# ==============================================================

def load_kaggle_data():
    try:
        df = pd.read_csv(KAGGLE_PATH)
        df.rename(columns={
            "Date": "timestamp",
            "Country": "country",
            "Status": "status",
            "AQI Value": "aqi"
        }, inplace=True)
        df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
        df = df.dropna(subset=["aqi"])
        df = df.sort_values("timestamp")
        print(f" Loaded Kaggle dataset with {len(df)} records.")
        return df
    except FileNotFoundError:
        print(f" Kaggle dataset not found at {KAGGLE_PATH}")
        return pd.DataFrame()

# ==============================================================
# 3Ô∏è FETCH LIVE AQI (for all cities)
# ==============================================================

def get_live_aqi(city):
    url = f"http://api.waqi.info/feed/{city}/?token={TOKEN}"
    r = requests.get(url)
    data = r.json()
    if data["status"] != "ok":
        print(f" API error for {city}: ", data)
        return None

    d = data["data"]
    iaqi = d["iaqi"]
    record = {
        "city": city.capitalize(),
        "timestamp": d["time"]["s"],
        "aqi": d["aqi"],
        "pm25": iaqi.get("pm25", {}).get("v", None),
        "pm10": iaqi.get("pm10", {}).get("v", None),
        "no2": iaqi.get("no2", {}).get("v", None),
        "so2": iaqi.get("so2", {}).get("v", None),
        "co": iaqi.get("co", {}).get("v", None),
        "o3": iaqi.get("o3", {}).get("v", None)
    }
    print(f" Live AQI fetched for {city.capitalize()}: AQI={record['aqi']}")
    return record

def append_live_data():
    all_records = []
    for city in CITIES:
        rec = get_live_aqi(city)
        if rec:
            all_records.append(rec)
    if not all_records:
        print(" No live data collected.")
        return

    df = pd.DataFrame(all_records)
    try:
        existing = pd.read_csv(LIVE_PATH)
        updated = pd.concat([existing, df], ignore_index=True)
    except FileNotFoundError:
        updated = df

    updated.drop_duplicates(subset=["timestamp", "city"], keep="last", inplace=True)
    updated.to_csv(LIVE_PATH, index=False)
    print(f" Live data updated for {len(CITIES)} cities.")
    print(f" Saved to: {LIVE_PATH}")

# ==============================================================
# 4Ô∏è‚É£ KAGGLE-BASED FORECAST (General Trend)
# ==============================================================

def train_kaggle_regression(df):
    df = df.dropna(subset=["aqi", "timestamp"])
    df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
    df = df.sort_values("timestamp")
    df["day_number"] = (df["timestamp"] - df["timestamp"].min()).dt.days
    if len(df) < 10:
        print(" Not enough Kaggle data to forecast.")
        return

    model = LinearRegression()
    model.fit(df[["day_number"]], df["aqi"])

    future_days = np.arange(df["day_number"].max() + 1, df["day_number"].max() + 8).reshape(-1, 1)
    future_preds = model.predict(future_days)
    future_dates = [df["timestamp"].max() + datetime.timedelta(days=i) for i in range(1, 8)]

    forecast_df = pd.DataFrame({
        "timestamp": future_dates,
        "Predicted_AQI": future_preds
    })
    forecast_df.to_csv(FORECAST_PATH, index=False)
    print("\nüìÖ Next 7-Day Forecast (from Kaggle data):")
    print(forecast_df.head())

# ==============================================================
# 5Ô∏è‚É£ PER-CITY MINI FORECASTS (based on live data)
# ==============================================================

def per_city_forecast():
    try:
        df = pd.read_csv(LIVE_PATH)
    except FileNotFoundError:
        print(f" Live data file not found at {LIVE_PATH}")
        return

    df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")

    print("\n Per-City 7-Day Forecasts (based on live AQI trends):\n")
    all_forecasts = []

    for city in CITIES:
        subset = df[df["city"].str.lower() == city].sort_values("timestamp")
        if len(subset) < 3:
            print(f" Not enough data for {city.capitalize()}. Need ‚â•3 records.")
            continue

        subset["day_number"] = (subset["timestamp"] - subset["timestamp"].min()).dt.days
        X = subset[["day_number"]]
        y = subset["aqi"]
        model = LinearRegression().fit(X, y)

        # Predict next 7 days
        future_days = np.arange(subset["day_number"].max() + 1, subset["day_number"].max() + 8).reshape(-1, 1)
        preds = model.predict(future_days)
        dates = [subset["timestamp"].max() + datetime.timedelta(days=i) for i in range(1, 8)]

        forecast_df = pd.DataFrame({"City": city.capitalize(),
                                    "Date": dates,
                                    "Predicted_AQI": preds})
        all_forecasts.append(forecast_df)

        print(f"\n {city.capitalize()} Forecast:")
        print(forecast_df.to_string(index=False, formatters={'Predicted_AQI': '{:.1f}'.format}))

    if all_forecasts:
        result = pd.concat(all_forecasts, ignore_index=True)
        result.to_csv(os.path.join(BASE_PATH, "per_city_forecast.csv"), index=False)
        print("\n All per-city forecasts saved successfully!")

# ==============================================================
# üöÄ MAIN EXECUTION
# ==============================================================

if __name__ == "__main__":
    kaggle_df = load_kaggle_data()
    append_live_data()
    train_kaggle_regression(kaggle_df)
    per_city_forecast()
    print("\n GitHub daily AQI update completed successfully.")


NameError: name '__file__' is not defined