In [None]:
# prompt: predict air quality levels using artificial intelligence

# Install necessary libraries if not already installed
!pip install pandas scikit-learn matplotlib

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# --- Load your data ---
# Assuming your data is in a CSV file named 'air_quality_data.csv'
# You can upload it to your Colab environment or mount Google Drive.
# If using Google Drive:
# from google.colab import drive
# drive.mount('/content/drive')
# data = pd.read_csv('/content/drive/My Drive/air_quality_data.csv')
# If uploading directly:
# from google.colab import files
# uploaded = files.upload()
# file_name = list(uploaded.keys())[0]
# data = pd.read_csv(file_name)

# For demonstration, let's create some sample data
data = pd.DataFrame({
    'timestamp': pd.to_datetime(pd.date_range(start='2023-01-01', periods=100, freq='H')),
    'temperature': np.random.rand(100) * 20 + 10,
    'humidity': np.random.rand(100) * 40 + 30,
    'wind_speed': np.random.rand(100) * 10,
    'pm25': np.random.rand(100) * 50 + 10 # Target variable
})

print("Sample Data Head:")
print(data.head())

# --- Data Preprocessing ---
# Simple example: using temperature, humidity, and wind speed to predict pm25
X = data[['temperature', 'humidity', 'wind_speed']]
y = data['pm25']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("\nTraining Data Shape:", X_train.shape)
print("Testing Data Shape:", X_test.shape)

# --- Model Selection and Training (Simple Linear Regression) ---
model = LinearRegression()
model.fit(X_train, y_train)

# --- Model Evaluation ---
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"\nMean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R-squared (R2): {r2:.2f}")

# --- Visualize Predictions (Simple Plot) ---
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred)
plt.xlabel("Actual PM2.5")
plt.ylabel("Predicted PM2.5")
plt.title("Actual vs. Predicted PM2.5")
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=2) # Identity line
plt.show()

# --- Example Prediction ---
# Predict for a new data point
new_data = pd.DataFrame([[25, 60, 5]], columns=['temperature', 'humidity', 'wind_speed'])
predicted_pm25 = model.predict(new_data)
print(f"\nPrediction for new data (Temp=25, Humid=60, Wind=5): {predicted_pm25[0]:.2f}")
