In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Air Quality Analysis with UCI Dataset\n",
    "\n",
    "This notebook demonstrates how to:\n",
    "- Load and clean the UCI Air Quality dataset\n",
    "- Perform regression to predict NO2 levels\n",
    "- Classify air quality into risk categories\n",
    "- Visualize the data and results\n",
    "- Deep learning models (when TensorFlow is available)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 1: Set up your notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n",
    "from sklearn.metrics import mean_squared_error, r2_score, classification_report, confusion_matrix\n",
    "from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "# Try to import TensorFlow/Keras\n",
    "try:\n",
    "    from tensorflow.keras.models import Sequential\n",
    "    from tensorflow.keras.layers import Dense\n",
    "    TENSORFLOW_AVAILABLE = True\n",
    "    print(\"✅ TensorFlow/Keras available for deep learning models\")\n",
    "except ImportError:\n",
    "    TENSORFLOW_AVAILABLE = False\n",
    "    print(\"⚠️ TensorFlow not available. Deep learning models will be skipped.\")\n",
    "    print(\"   Install with: pip install tensorflow (requires Python < 3.12)\")\n",
    "\n",
    "# Set style for better plots\n",
    "plt.style.use('seaborn-v0_8')\n",
    "sns.set_palette(\"husl\")\n",
    "\n",
    "print(\"✅ All libraries imported successfully!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2: Load the dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the dataset\n",
    "df = pd.read_csv('air+quality/AirQualityUCI.csv', sep=';', decimal=',', engine='python')\n",
    "\n",
    "# Drop empty columns at the end\n",
    "df = df.drop(columns=['Unnamed: 15', 'Unnamed: 16'])\n",
    "\n",
    "print(f\"Dataset shape: {df.shape}\")\n",
    "print(f\"Columns: {list(df.columns)}\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 3: Clean the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Replace -200 values (missing) with NaN and drop rows\n",
    "df.replace(-200, np.nan, inplace=True)\n",
    "print(f\"Rows before dropping NaN: {len(df)}\")\n",
    "\n",
    "df.dropna(inplace=True)\n",
    "print(f\"Rows after dropping NaN: {len(df)}\")\n",
    "\n",
    "# Convert Date/Time to a single datetime column\n",
    "# Fix: Replace periods with colons in Time column for correct parsing\n",
    "df['Time'] = df['Time'].str.replace('.', ':', regex=False)\n",
    "df['Datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], dayfirst=True)\n",
    "df = df.drop(columns=['Date', 'Time'])\n",
    "df = df.set_index('Datetime')\n",
    "\n",
    "print(\"\\n✅ Data cleaning completed!\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 4: Data Exploration and Visualization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Basic statistics\n",
    "print(\"Dataset Info:\")\n",
    "print(df.info())\n",
    "\n",
    "print(\"\\nBasic Statistics:\")\n",
    "print(df.describe())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot time series of key pollutants\n",
    "fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n",
    "fig.suptitle('Air Quality Time Series', fontsize=16)\n",
    "\n",
    "# NO2 levels over time\n",
    "axes[0, 0].plot(df.index, df['NO2(GT)'], alpha=0.7)\n",
    "axes[0, 0].set_title('NO2 Levels Over Time')\n",
    "axes[0, 0].set_ylabel('NO2 (µg/m³)')\n",
    "axes[0, 0].tick_params(axis='x', rotation=45)\n",
    "\n",
    "# Temperature over time\n",
    "axes[0, 1].plot(df.index, df['T'], alpha=0.7, color='orange')\n",
    "axes[0, 1].set_title('Temperature Over Time')\n",
    "axes[0, 1].set_ylabel('Temperature (°C)')\n",
    "axes[0, 1].tick_params(axis='x', rotation=45)\n",
    "\n",
    "# Relative Humidity over time\n",
    "axes[1, 0].plot(df.index, df['RH'], alpha=0.7, color='green')\n",
    "axes[1, 0].set_title('Relative Humidity Over Time')\n",
    "axes[1, 0].set_ylabel('Relative Humidity (%)')\n",
    "axes[1, 0].tick_params(axis='x', rotation=45)\n",
    "\n",
    "# Absolute Humidity over time\n",
    "axes[1, 1].plot(df.index, df['AH'], alpha=0.7, color='purple')\n",
    "axes[1, 1].set_title('Absolute Humidity Over Time')\n",
    "axes[1, 1].set_ylabel('Absolute Humidity (g/m³)')\n",
    "axes[1, 1].tick_params(axis='x', rotation=45)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Correlation heatmap\n",
    "plt.figure(figsize=(12, 8))\n",
    "correlation_matrix = df.corr()\n",
    "sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, fmt='.2f')\n",
    "plt.title('Correlation Matrix of Air Quality Variables')\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 5: Choose target + features and run regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Let's predict NO2(GT) (Nitrogen Dioxide in microg/m³)\n",
    "target = 'NO2(GT)'\n",
    "features = ['T', 'RH', 'AH']  # temperature, relative humidity, absolute humidity\n",
    "\n",
    "X = df[features]\n",
    "y = df[target]\n",
    "\n",
    "print(f\"Features: {features}\")\n",
    "print(f\"Target: {target}\")\n",
    "print(f\"X shape: {X.shape}\")\n",
    "print(f\"y shape: {y.shape}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Split into train/test and run regression\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
    "\n",
    "# Linear Regression\n",
    "lr_model = LinearRegression()\n",
    "lr_model.fit(X_train, y_train)\n",
    "\n",
    "y_pred_lr = lr_model.predict(X_test)\n",
    "\n",
    "print(\"=== Linear Regression Results ===\")\n",
    "print(f\"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_lr)):.2f}\")\n",
    "print(f\"R² Score: {r2_score(y_test, y_pred_lr):.3f}\")\n",
    "print(f\"\\nFeature coefficients:\")\n",
    "for feature, coef in zip(features, lr_model.coef_):\n",
    "    print(f\"  {feature}: {coef:.3f}\")\n",
    "print(f\"Intercept: {lr_model.intercept_:.3f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Try Random Forest for better performance\n",
    "rf_model = RandomForestRegressor(n_estimators=100, random_state=42)\n",
    "rf_model.fit(X_train, y_train)\n",
    "\n",
    "y_pred_rf = rf_model.predict(X_test)\n",
    "\n",
    "print(\"=== Random Forest Regression Results ===\")\n",
    "print(f\"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_rf)):.2f}\")\n",
    "print(f\"R² Score: {r2_score(y_test, y_pred_rf):.3f}\")\n",
    "\n",
    "# Feature importance\n",
    "feature_importance = pd.DataFrame({\n",
    "    'feature': features,\n",
    "    'importance': rf_model.feature_importances_\n",
    "}).sort_values('importance', ascending=False)\n",
    "\n",
    "print(f\"\\nFeature importance:\")\n",
    "print(feature_importance)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 6: Deep Learning Regression Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if TENSORFLOW_AVAILABLE:\n",
    "    print(\"🧠 Running deep learning regression...\")\n",
    "    \n",
    "    # Normalize data for deep learning\n",
    "    scaler = StandardScaler()\n",
    "    X_scaled = scaler.fit_transform(X)\n",
    "    \n",
    "    X_train_dl, X_test_dl, y_train_dl, y_test_dl = train_test_split(X_scaled, y, test_size=0.2, random_state=42)\n",
    "    \n",
    "    # Define deep learning model\n",
    "    dl_model = Sequential([\n",
    "        Dense(64, activation='relu', input_shape=(X_train_dl.shape[1],)),\n",
    "        Dense(32, activation='relu'),\n",
    "        Dense(1)\n",
    "    ])\n",
    "    \n",
    "    # Compile model\n",
    "    dl_model.compile(optimizer='adam', loss='mse', metrics=['mae'])\n",
    "    \n",
    "    # Train model\n",
    "    print(\"Training deep learning model...\")\n",
    "    history = dl_model.fit(X_train_dl, y_train_dl, epochs=50, batch_size=32, verbose=1)\n",
    "    \n",
    "    # Predict\n",
    "    y_pred_dl = dl_model.predict(X_test_dl).flatten()\n",
    "    \n",
    "    # Evaluate\n",
    "    print(\"\\n=== Deep Learning Regression Results ===\")\n",
    "    print(f\"R² Score: {r2_score(y_test_dl, y_pred_dl):.3f}\")\n",
    "    print(f\"RMSE: {np.sqrt(mean_squared_error(y_test_dl, y_pred_dl)):.2f}\")\n",
    "    \n",
    "    # Plot training history\n",
    "    plt.figure(figsize=(12, 4))\n",
    "    \n",
    "    plt.subplot(1, 2, 1)\n",
    "    plt.plot(history.history['loss'], label='Training Loss')\n",
    "    plt.title('Model Loss')\n",
    "    plt.xlabel('Epoch')\n",
    "    plt.ylabel('Loss')\n",
    "    plt.legend()\n",
    "    \n",
    "    plt.subplot(1, 2, 2)\n",
    "    plt.plot(history.history['mae'], label='Training MAE')\n",
    "    plt.title('Model MAE')\n",
    "    plt.xlabel('Epoch')\n",
    "    plt.ylabel('MAE')\n",
    "    plt.legend()\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "    \n",
    "else:\n",
    "    print(\"Deep Learning: Skipped (TensorFlow not available)\")\n",
    "    y_pred_dl = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualize regression results\n",
    "fig, axes = plt.subplots(1, 2, figsize=(15, 6))\n",
    "\n",
    "# Linear Regression results\n",
    "axes[0].scatter(y_test, y_pred_lr, alpha=0.6)\n",
    "axes[0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)\n",
    "axes[0].set_xlabel('Actual NO2')\n",
    "axes[0].set_ylabel('Predicted NO2')\n",
    "axes[0].set_title(f'Linear Regression\\nR² = {r2_score(y_test, y_pred_lr):.3f}')\n",
    "\n",
    "# Random Forest results\n",
    "axes[1].scatter(y_test, y_pred_rf, alpha=0.6)\n",
    "axes[1].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)\n",
    "axes[1].set_xlabel('Actual NO2')\n",
    "axes[1].set_ylabel('Predicted NO2')\n",
    "axes[1].set_title(f'Random Forest\\nR² = {r2_score(y_test, y_pred_rf):.3f}')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Add deep learning comparison if available\n",
    "if y_pred_dl is not None:\n",
    "    plt.figure(figsize=(8, 6))\n",
    "    plt.scatter(y_test_dl, y_pred_dl, alpha=0.6)\n",
    "    plt.plot([y_test_dl.min(), y_test_dl.max()], [y_test_dl.min(), y_test_dl.max()], 'r--', lw=2)\n",
    "    plt.xlabel('Actual NO2')\n",
    "    plt.ylabel('Predicted NO2')\n",
    "    plt.title(f'Deep Learning\\nR² = {r2_score(y_test_dl, y_pred_dl):.3f}')\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 7: Classification - Air Quality Risk Categories"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Make AQI-style labels\n",
    "def label_aqi(no2):\n",
    "    if no2 <= 50:\n",
    "        return 'Good'\n",
    "    elif no2 <= 100:\n",
    "        return 'Moderate'\n",
    "    elif no2 <= 150:\n",
    "        return 'Unhealthy for Sensitive'\n",
    "    else:\n",
    "        return 'Unhealthy'\n",
    "\n",
    "df['AQI_Label'] = df['NO2(GT)'].apply(label_aqi)\n",
    "\n",
    "# Check distribution of labels\n",
    "print(\"AQI Label Distribution:\")\n",
    "print(df['AQI_Label'].value_counts())\n",
    "print(f\"\\nPercentage distribution:\")\n",
    "print(df['AQI_Label'].value_counts(normalize=True) * 100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualize AQI distribution\n",
    "plt.figure(figsize=(10, 6))\n",
    "aqi_counts = df['AQI_Label'].value_counts()\n",
    "colors = ['green', 'yellow', 'orange', 'red']\n",
    "bars = plt.bar(aqi_counts.index, aqi_counts.values, color=colors[:len(aqi_counts)])\n",
    "plt.title('Distribution of Air Quality Categories')\n",
    "plt.ylabel('Number of Observations')\n",
    "plt.xlabel('Air Quality Category')\n",
    "\n",
    "# Add count labels on bars\n",
    "for bar, count in zip(bars, aqi_counts.values):\n",
    "    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 10, \n",
    "             str(count), ha='center', va='bottom')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Classification with Random Forest\n",
    "X_class = df[features]\n",
    "y_class = df['AQI_Label']\n",
    "\n",
    "X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
    "\n",
    "clf = RandomForestClassifier(n_estimators=100, random_state=42)\n",
    "clf.fit(X_train_c, y_train_c)\n",
    "\n",
    "y_pred_c = clf.predict(X_test_c)\n",
    "\n",
    "print(\"=== Classification Results ===\")\n",
    "print(classification_report(y_test_c, y_pred_c))\n",
    "\n",
    "# Feature importance for classification\n",
    "class_feature_importance = pd.DataFrame({\n",
    "    'feature': features,\n",
    "    'importance': clf.feature_importances_\n",
    "}).sort_values('importance', ascending=False)\n",
    "\n",
    "print(f\"\\nFeature importance for classification:\")\n",
    "print(class_feature_importance)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 8: Deep Learning Classification Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if TENSORFLOW_AVAILABLE:\n",
    "    print(\"🧠 Running deep learning classification...\")\n",
    "    \n",
    "    # Prepare data for classification\n",
    "    le = LabelEncoder()\n",
    "    y_class_encoded = le.fit_transform(y_class)\n",
    "    \n",
    "    X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(\n",
    "        X_scaled, y_class_encoded, test_size=0.2, random_state=42\n",
    "    )\n",
    "    \n",
    "    # Define classification model\n",
    "    dl_clf_model = Sequential([\n",
    "        Dense(64, activation='relu', input_shape=(X_train_clf.shape[1],)),\n",
    "        Dense(32, activation='relu'),\n",
    "        Dense(len(le.classes_), activation='softmax')\n",
    "    ])\n",
    "    \n",
    "    # Compile model\n",
    "    dl_clf_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n",
    "    \n",
    "    # Train model\n",
    "    print(\"Training deep learning classification model...\")\n",
    "    clf_history = dl_clf_model.fit(X_train_clf, y_train_clf, epochs=50, batch_size=32, verbose=1)\n",
    "    \n",
    "    # Predict\n",
    "    y_pred_clf_dl = dl_clf_model.predict(X_test_clf)\n",
    "    y_pred_clf_dl_classes = np.argmax(y_pred_clf_dl, axis=1)\n",
    "    y_pred_clf_dl_labels = le.inverse_transform(y_pred_clf_dl_classes)\n",
    "    \n",
    "    # Evaluate\n",
    "    print(\"\\n=== Deep Learning Classification Results ===\")\n",
    "    print(classification_report(y_test_clf, y_pred_clf_dl_classes, target_names=le.classes_))\n",
    "    \n",
    "    # Plot training history\n",
    "    plt.figure(figsize=(12, 4))\n",
    "    \n",
    "    plt.subplot(1, 2, 1)\n",
    "    plt.plot(clf_history.history['loss'], label='Training Loss')\n",
    "    plt.title('Classification Model Loss')\n",
    "    plt.xlabel('Epoch')\n",
    "    plt.ylabel('Loss')\n",
    "    plt.legend()\n",
    "    \n",
    "    plt.subplot(1, 2, 2)\n",
    "    plt.plot(clf_history.history['accuracy'], label='Training Accuracy')\n",
    "    plt.title('Classification Model Accuracy')\n",
    "    plt.xlabel('Epoch')\n",
    "    plt.ylabel('Accuracy')\n",
    "    plt.legend()\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "    \n",
    "else:\n",
    "    print(\"Deep Learning Classification: Skipped (TensorFlow not available)\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Confusion Matrix\n",
    "plt.figure(figsize=(8, 6))\n",
    "cm = confusion_matrix(y_test_c, y_pred_c, labels=clf.classes_)\n",
    "sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', \n",
    "            xticklabels=clf.classes_, yticklabels=clf.classes_)\n",
    "plt.title('Confusion Matrix - Air Quality Classification')\n",
    "plt.ylabel('True Label')\n",
    "plt.xlabel('Predicted Label')\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 9: Advanced Analysis - Seasonal Patterns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Extract seasonal information\n",
    "df['Month'] = df.index.month\n",
    "df['Hour'] = df.index.hour\n",
    "df['DayOfWeek'] = df.index.dayofweek\n",
    "\n",
    "# Monthly patterns\n",
    "monthly_avg = df.groupby('Month')['NO2(GT)'].mean()\n",
    "\n",
    "plt.figure(figsize=(12, 4))\n",
    "\n",
    "plt.subplot(1, 3, 1)\n",
    "monthly_avg.plot(kind='bar', color='skyblue')\n",
    "plt.title('Average NO2 by Month')\n",
    "plt.xlabel('Month')\n",
    "plt.ylabel('Average NO2 (µg/m³)')\n",
    "plt.xticks(range(12), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', \n",
    "                       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])\n",
    "\n",
    "# Hourly patterns\n",
    "hourly_avg = df.groupby('Hour')['NO2(GT)'].mean()\n",
    "plt.subplot(1, 3, 2)\n",
    "hourly_avg.plot(kind='line', marker='o', color='orange')\n",
    "plt.title('Average NO2 by Hour')\n",
    "plt.xlabel('Hour of Day')\n",
    "plt.ylabel('Average NO2 (µg/m³)')\n",
    "\n",
    "# Day of week patterns\n",
    "dow_avg = df.groupby('DayOfWeek')['NO2(GT)'].mean()\n",
    "plt.subplot(1, 3, 3)\n",
    "dow_avg.plot(kind='bar', color='lightgreen')\n",
    "plt.title('Average NO2 by Day of Week')\n",
    "plt.xlabel('Day of Week')\n",
    "plt.ylabel('Average NO2 (µg/m³)')\n",
    "plt.xticks(range(7), ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 10: Model Comparison and Summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Summary of results\n",
    "print(\"=== AIR QUALITY ANALYSIS SUMMARY ===\\n\")\n",
    "\n",
    "print(\"📊 Dataset Information:\")\n",
    "print(f\"   • Total observations: {len(df):,}\")\n",
    "print(f\"   • Time period: {df.index.min().strftime('%Y-%m-%d')} to {df.index.max().strftime('%Y-%m-%d')}\")\n",
    "print(f\"   • Features used: {', '.join(features)}\")\n",
    "print(f\"   • Target variable: {target}\")\n",
    "\n",
    "print(\"\\n🎯 Regression Results:\")\n",
    "print(f\"   • Linear Regression R²: {r2_score(y_test, y_pred_lr):.3f}\")\n",
    "print(f\"   • Random Forest R²: {r2_score(y_test, y_pred_rf):.3f}\")\n",
    "if y_pred_dl is not None:\n",
    "    print(f\"   • Deep Learning R²: {r2_score(y_test_dl, y_pred_dl):.3f}\")\n",
    "\n",
    "# Determine best model\n",
    "models = [\n",
    "    ('Linear Regression', r2_score(y_test, y_pred_lr)),\n",
    "    ('Random Forest', r2_score(y_test, y_pred_rf))\n",
    "]\n",
    "if y_pred_dl is not None:\n",
    "    models.append(('Deep Learning', r2_score(y_test_dl, y_pred_dl)))\n",
    "\n",
    "best_model = max(models, key=lambda x: x[1])\n",
    "print(f\"   • Best model: {best_model[0]} (R² = {best_model[1]:.3f})\")\n",
    "\n",
    "print(\"\\n🏷️ Classification Results:\")\n",
    "print(f\"   • Air quality categories: {', '.join(clf.classes_)}\")\n",
    "print(f\"   • Most common category: {df['AQI_Label'].mode().iloc[0]}\")\n",
    "\n",
    "print(\"\\n🔍 Key Insights:\")\n",
    "print(f\"   • Most important feature for prediction: {feature_importance.iloc[0]['feature']}\")\n",
    "print(f\"   • Average NO2 level: {df['NO2(GT)'].mean():.1f} µg/m³\")\n",
    "print(f\"   • NO2 range: {df['NO2(GT)'].min():.1f} - {df['NO2(GT)'].max():.1f} µg/m³\")\n",
    "\n",
    "if not TENSORFLOW_AVAILABLE:\n",
    "    print(\"\\n💡 To enable deep learning models:\")\n",
    "    print(\"   • Install TensorFlow: pip install tensorflow\")\n",
    "    print(\"   • Note: Requires Python < 3.12\")\n",
    "\n",
    "print(\"\\n✅ Analysis completed successfully!\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
