In [None]:
# Simplified Machine Learning Model (Random Forest) for Air Quality Prediction
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

# Load data
df = pd.read_csv('D:/DSMLAI(insaid)/ML/DATA SETS/air_quality_health_dataset.csv')

# Feature engineering
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day

# Prepare features/target
X = df.drop(['hospital_admissions', 'city', 'date'], axis=1)
y = df['hospital_admissions']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
categorical_features = ['population_density']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Simplified model with fixed parameters
model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(
        n_estimators=50,  # Reduced from default 100
        max_depth=10,     # Limit tree depth
        random_state=42,
        n_jobs=-1))       # Use all cores
])

# Train model
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}")
print(f"R2: {r2_score(y_test, y_pred):.2f}")

# Save model
joblib.dump(model, 'fast_air_quality_model.pkl')
print("Model saved successfully!")

MSE: 11.55
R2: 0.16
Model saved successfully!


In [36]:
import joblib
import pandas as pd

# 1. Load the model
model = joblib.load('D:/DSMLAI(insaid)/ML/practice/fast_air_quality_model.pkl')

# 2. Prepare new data (adjust values as needed)
new_data = pd.DataFrame({
    'aqi': [150],
    'pm2_5': [45.0],
    'pm10': [60.0],
    'no2': [25.0],
    'o3': [45.0],
    'temperature': [22.5],
    'humidity': [65],
    'population_density': ['Urban'],
    'hospital_capacity': [1200],
    'year': [2023],
    'month': [6],
    'day': [15]
})

# 3. Make prediction
prediction = model.predict(new_data)
print(f"Predicted Hospital Admissions: {prediction[0]:.2f}")

Predicted Hospital Admissions: 9.08
