# Using SKLearn's roadmap to select the ElasticNet Models

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_log_error, mean_squared_error, root_mean_squared_error
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("../data/processed/processed.csv")

In [3]:
data.head()

Unnamed: 0,Date,AQI,Year,Month,Day,temperature_2m_max,temperature_2m_min,precipitation_sum,weather_code,sunshine_duration,cloud_cover_mean,wind_speed_10m_mean,winddirection_10m_dominant
0,2018-01-01,75.0,2018,1,1,28.253,14.503,0.0,0.0,36062.707,0.0,5.679092,270.6138
1,2018-01-02,76.0,2018,1,2,28.453,12.903,0.0,1.0,36040.188,7.166666,4.744073,278.32556
2,2018-01-03,79.0,2018,1,3,28.903,15.303,0.0,0.0,36101.605,0.083333,5.611268,285.8839
3,2018-01-04,84.0,2018,1,4,28.353,15.303,0.0,0.0,36083.652,1.541667,5.784296,326.56494
4,2018-01-05,97.0,2018,1,5,28.453,14.003,0.0,3.0,36066.5,46.125,5.50617,287.77652


In [4]:
model = ElasticNet(alpha=1.0, l1_ratio=0.5, random_state=42)

In [5]:
# Selecting Features for Training

features = ["Year", "Month", "temperature_2m_max", "temperature_2m_min",
            "precipitation_sum", "weather_code", "sunshine_duration", "cloud_cover_mean",
           "wind_speed_10m_mean", "winddirection_10m_dominant"]

X = data[features]
y = data["AQI"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
model.fit(X_train, y_train)

0,1,2
,alpha,1.0
,l1_ratio,0.5
,fit_intercept,True
,precompute,False
,max_iter,1000
,copy_X,True
,tol,0.0001
,warm_start,False
,positive,False
,random_state,42


In [7]:
y_preds = model.predict(X_test)

In [8]:
y_preds

array([147.96624653,  96.75060458,  74.95137341, 148.40325959,
       149.80353794, 100.45491445,  59.64602646, 126.43647942,
        62.60803638, 134.15891204,  57.36722993,  68.98683145,
        68.52001807, 103.95393099, 112.92502243,  66.14692083,
       134.11648121,  76.03132363,  81.86673129, 121.9743208 ,
       146.82889141, 132.21118237,  67.71916485, 147.63022336,
        73.53175691,  60.56534375,  76.99225588, 127.22026066,
       125.97738998,  50.05957409,  73.10396794, 139.11503425,
       101.51572795,  67.8294556 , 106.97144131,  78.8318596 ,
        69.74379174,  74.66519183,  77.4772528 ,  97.3565251 ,
       120.18908515, 112.37510801,  82.74531617,  82.4040459 ,
        52.06269652, 107.01528499,  85.76290666, 149.66015428,
        69.53051333, 101.17267208,  80.45390587, 113.68648715,
       100.58797238, 150.51945812, 139.17218995, 117.2991994 ,
        96.02874733,  70.22888069, 115.35154703,  99.26433039,
       130.06999083, 117.417788  ,  80.10473732, 106.44

In [9]:
print("R2:", r2_score(y_test, y_preds))
print("MAE:", mean_absolute_error(y_test, y_preds))
print("MSE:", mean_squared_error(y_test, y_preds))
print("RMSE:", root_mean_squared_error(y_test, y_preds))
print("RMSLE:", root_mean_squared_log_error(y_test, y_preds))

R2: 0.41018734082034014
MAE: 22.64889099934768
MSE: 1063.454688069001
RMSE: 32.61065298440068
RMSLE: 0.30544844999912885


In [11]:
# Save the model
import pickle

with open("../models/ElasticModel-model.pkl", "wb") as f:
    pickle.dump(model, f)