In [2]:
import pandas as pd
import numpy as np
import cvxpy as cp
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.metrics import mean_absolute_error

# Load training and test data
train_data = pd.read_csv("../Dataset/train.csv", parse_dates=["Time"])
test_data = pd.read_csv("../Dataset/dummy_test.csv", parse_dates=["Time"])
columns_to_drop = ['temperature', 'humidity', 'Time']  
train_data = train_data.drop(columns=columns_to_drop, errors='ignore')
test_data = test_data.drop(columns=columns_to_drop, errors='ignore')
print("5 FIRST ROWS OF TRAINING DATASET")
print(train_data.head())
print("./")
print("5 FIRST ROWS OF TEST DATASET")
print(test_data.head())
# Features and targets for training
X_train = train_data[["no2op1", "no2op2", "o3op1", "o3op2"]]
y_train_ozone = train_data["OZONE"]
y_train_no2 = train_data["NO2"]

# Features and targets for test
X_test = test_data[["no2op1", "no2op2", "o3op1", "o3op2"]]
y_test_ozone = test_data["OZONE"]
y_test_no2 = test_data["NO2"]

# Convert to numpy arrays for cvxpy
X_train_np = X_train.values
X_test_np = X_test.values
y_train_ozone_np = y_train_ozone.values
y_train_no2_np = y_train_no2.values
y_test_ozone_np = y_test_ozone.values
y_test_no2_np = y_test_no2.values

5 FIRST ROWS OF TRAINING DATASET
   OZONE     NO2  temp  no2op1  no2op2  o3op1  o3op2
0  77.59   6.881  36.2   199.0   200.0  240.0  197.0
1  78.71  11.057  36.3   196.0   200.0  237.0  196.0
2  78.85   8.596  36.7   195.0   199.0  235.0  196.0
3  79.27   7.248  37.0   193.0   198.0  233.0  195.0
4  80.01   8.638  36.8   191.0   198.0  231.0  195.0
./
5 FIRST ROWS OF TEST DATASET
    OZONE    NO2  temp  no2op1  no2op2  o3op1  o3op2
0  71.327  8.801  41.2   179.0   194.0  220.0  192.0
1  72.317  5.536  41.1   181.0   196.0  222.0  192.0
2  74.440  4.574  41.0   181.0   195.0  222.0  192.0
3  74.033  6.426  40.9   179.0   194.0  220.0  192.0
4  73.080  5.825  40.9   181.0   195.0  222.0  193.0


In [36]:
def training(model, X_train, y_train, X_test, y_test, model_name, target_name):
    model.fit(X_train, y_train)
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    mae_train = mean_absolute_error(y_train, y_train_pred)
    print(f"{model_name} - {target_name}: MAE (Train) = {mae_train:.4f}")
    print(f"{model_name} - {target_name} Coefficients:")
    print(f"p ({target_name.lower()}) (o3op1) = {model.coef_[2]:.4f}")
    print(f"q ({target_name.lower()}) (o3op2) = {model.coef_[3]:.4f}")
    print(f"r ({target_name.lower()}) (no2op1) = {model.coef_[0]:.4f}")
    print(f"s ({target_name.lower()}) (no2op2) = {model.coef_[1]:.4f}")
    print(f"t ({target_name.lower()}) (intercept) = {model.intercept_:.4f}\n")
    return mae_train

LEAST SQUARE METHOD WITHOUT REGULARIZATION(OZONE) is the best model for predicting OZONE

In [37]:
BEST_OZONE_MODEL = LinearRegression()
name1 = "least square"
training(BEST_OZONE_MODEL, X_train, y_train_ozone, X_test, y_test_ozone, name1, "OZONE")

least square - OZONE: MAE (Train) = 5.6259
least square - OZONE Coefficients:
p (ozone) (o3op1) = 1.5643
q (ozone) (o3op2) = -0.9241
r (ozone) (no2op1) = -1.7559
s (ozone) (no2op2) = 1.1150
t (ozone) (intercept) = 10.9386



5.6259377355149365

LEAST SQUARE METHOD WITH RIDGE (OZONE)

In [38]:
ridge_OZONE_Model = Ridge(alpha=1.0)
name2 = "Ridge regression"
training(ridge_OZONE_Model, X_train, y_train_ozone, X_test, y_test_ozone, name2, "OZONE")

Ridge regression - OZONE: MAE (Train) = 5.6259
Ridge regression - OZONE Coefficients:
p (ozone) (o3op1) = 1.5643
q (ozone) (o3op2) = -0.9240
r (ozone) (no2op1) = -1.7559
s (ozone) (no2op2) = 1.1149
t (ozone) (intercept) = 10.9399



5.625938089364324

LEAST SQUARE METHOD WITH LASSO (OZONE)

In [39]:
lasso_OZONE_Model = Lasso(alpha=1.0)
name3 = "Lasso regression"
training(lasso_OZONE_Model, X_train, y_train_ozone, X_test, y_test_ozone, name3, "OZONE")

Lasso regression - OZONE: MAE (Train) = 5.6804
Lasso regression - OZONE Coefficients:
p (ozone) (o3op1) = 1.5286
q (ozone) (o3op2) = 0.0000
r (ozone) (no2op1) = -1.6888
s (ozone) (no2op2) = 0.1164
t (ozone) (intercept) = 22.8060



5.680444323222144

LEAST SQUARE METHOD WITHOUT REGULARIZATION(NO2) is the best model for predicting NO2

In [40]:
BEST_NO2_MODEL = LinearRegression()
name4 = "least square"
training(BEST_NO2_MODEL, X_train, y_train_ozone, X_test, y_test_ozone, name4, "NO2")

least square - NO2: MAE (Train) = 5.6259
least square - NO2 Coefficients:
p (no2) (o3op1) = 1.5643
q (no2) (o3op2) = -0.9241
r (no2) (no2op1) = -1.7559
s (no2) (no2op2) = 1.1150
t (no2) (intercept) = 10.9386



5.6259377355149365

LEAST SQUARE METHOD WITH RIDGE (NO2)

In [41]:
ridge_NO2_Model = Ridge(alpha=1.0)
name5 = "Ridge regression"
training(ridge_NO2_Model, X_train, y_train_ozone, X_test, y_test_ozone, name5, "OZONE")

Ridge regression - OZONE: MAE (Train) = 5.6259
Ridge regression - OZONE Coefficients:
p (ozone) (o3op1) = 1.5643
q (ozone) (o3op2) = -0.9240
r (ozone) (no2op1) = -1.7559
s (ozone) (no2op2) = 1.1149
t (ozone) (intercept) = 10.9399



5.625938089364324

LEAST SQUARE METHOD WITH LASSO (OZONE)

In [42]:
lasso_NO2_Model = Lasso(alpha=1.0)
name6 = "Lasso regression"
training(lasso_NO2_Model, X_train, y_train_ozone, X_test, y_test_ozone, name6, "OZONE")

Lasso regression - OZONE: MAE (Train) = 5.6804
Lasso regression - OZONE Coefficients:
p (ozone) (o3op1) = 1.5286
q (ozone) (o3op2) = 0.0000
r (ozone) (no2op1) = -1.6888
s (ozone) (no2op2) = 0.1164
t (ozone) (intercept) = 22.8060



5.680444323222144