In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso
from sklearn.svm import SVR
from sklearn.metrics import r2_score
from sklearn.impute import SimpleImputer

# Load the dataset
file_path = 'AirQualityUCI.xlsx'  # Update the path as per your file location
air_quality_data = pd.read_excel(file_path)

# Drop the "Date" and "Time" columns (categorical)
air_quality_data_cleaned = air_quality_data.drop(columns=['Date', 'Time'])

# Check and handle missing values using mean imputation
imputer = SimpleImputer(strategy='mean')
air_quality_data_cleaned = pd.DataFrame(imputer.fit_transform(air_quality_data_cleaned), columns=air_quality_data_cleaned.columns)

# Defining the independent variables (X) and dependent variable (y)
X = air_quality_data_cleaned.drop(columns=['CO(GT)'])  # Independent variables
y = air_quality_data_cleaned['CO(GT)']  # Dependent variable

# Scale the numerical columns using StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training (80%) and testing sets (20%)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build the Ridge regression model with alpha=12
ridge_model = Ridge(alpha=12)
ridge_model.fit(X_train, y_train)

# Predict and evaluate Ridge model on the test set
y_pred_ridge = ridge_model.predict(X_test)
ridge_r2_score = r2_score(y_test, y_pred_ridge)
print(f'R2 score of Ridge regression: {ridge_r2_score}')

# Build the Lasso regression model with alpha=12
lasso_model = Lasso(alpha=12)
lasso_model.fit(X_train, y_train)

# Predict and evaluate Lasso model on the test set
y_pred_lasso = lasso_model.predict(X_test)
lasso_r2_score = r2_score(y_test, y_pred_lasso)
print(f'R2 score of Lasso regression: {lasso_r2_score}')

# Build the SVR model
svr_model = SVR(kernel='linear')  # You can try different kernels like 'rbf' as well
svr_model.fit(X_train, y_train)

# Predict and evaluate SVR model on the test set
y_pred_svr = svr_model.predict(X_test)
svr_r2_score = r2_score(y_test, y_pred_svr)
print(f'R2 score of SVR: {svr_r2_score}')

# Compare the results of Ridge, Lasso, and SVR models
print(f'Comparison - Ridge R2: {ridge_r2_score}, Lasso R2: {lasso_r2_score}, SVR R2: {svr_r2_score}')


R2 score of Ridge regression: 0.5153759100843001
R2 score of Lasso regression: 0.4577715309137489
R2 score of SVR: 0.4810361392237197
Comparison - Ridge R2: 0.5153759100843001, Lasso R2: 0.4577715309137489, SVR R2: 0.4810361392237197
