# Models 

In this notebook, we develop the models for the transformed dataset. 

In [62]:
import pandas as pd
import numpy as np
import os
import tqdm

# plotting
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

# models 
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from sklearn.svm import SVR

# others
from sklearn.metrics import mean_squared_error, r2_score

## Definitions for the notebook

In [2]:
# For local folder
IMAGES_FOLDER = "../notes/images/"
# For Google colab
#IMAGES_FOLDER = "drive/MyDrive/Arquivos Acadêmicos/Disciplinas FGV/Machine Learning/images/"

sns.set()

pd.set_option('precision', 3)
pd.options.mode.chained_assignment = None

%matplotlib inline

## Importing the dataset

In [82]:
# For local folder
location = "../data/"
# For Google colab
#location = "drive/MyDrive/Arquivos Acadêmicos/Disciplinas FGV/Machine Learning/"

air_data = pd.read_csv(location + "RiodeJaneiro_MonitorAr_hourly_p3.csv", index_col = 0)
air_data.weekend = air_data.weekend.astype(int)
air_data.head()

Unnamed: 0,year,month,day,CodNum,Lat,Lon,Chuva,Pres,RS,Temp,...,CO_lag24,CO_MA24,O3_lag1,O3_lag2,O3_lag24,O3_MA24,PM10_lag1,PM10_lag2,PM10_lag24,PM10_MA24
163253,2011,1,2,1,-22.965,-43.18,3.617,-1.53,-1.128,-0.14,...,-0.763,-0.936,-0.738,-0.543,-1.19,-0.365,0.101,0.078,0.087,0.082
326745,2011,1,2,7,-22.898,-43.222,-0.272,-1.513,-0.565,0.56,...,-1.678,-1.232,-0.612,-0.997,-0.724,-0.187,-0.307,-0.967,-1.858,-1.136
103103,2011,1,2,3,-22.908,-43.178,3.617,-1.557,-0.72,-0.47,...,-0.634,-0.166,-0.211,-0.717,-0.169,-0.027,-0.677,-0.677,-1.617,-1.141
387082,2011,1,2,8,-22.925,-43.233,3.617,-1.909,-0.194,-1.068,...,-0.897,-1.283,-0.267,-0.184,-1.028,0.274,-0.773,-0.583,0.495,-1.061
326746,2011,1,2,7,-22.898,-43.222,3.617,-1.658,-0.567,0.548,...,-1.591,-1.206,-1.3,-0.612,-1.099,-0.204,-0.484,-0.307,-0.967,-1.136


In [83]:
df_train = air_data[air_data.train].drop(columns='train')
df_test = air_data[~air_data.train].drop(columns='train')

x_train = df_train.drop(columns=["O3", 'CO', 'PM10', 'aiq', 'Lat', 'Lon'])
x_test = df_test.drop(columns=["O3", 'CO', 'PM10', 'aiq', 'Lat', 'Lon'])

## Linear regression

We first start with the ozone. 

In [84]:
y_train = df_train['O3']
y_test = df_test['O3']

In [98]:
# x_train_sm = sm.add_constant(x_train)

reg = sm.OLS(y_train, x_train_sm[['O3_lag1', 'O3_lag2']])
res = reg.fit()
print(res.summary())

                                 OLS Regression Results                                
Dep. Variable:                     O3   R-squared (uncentered):                   0.852
Model:                            OLS   Adj. R-squared (uncentered):              0.852
Method:                 Least Squares   F-statistic:                          1.154e+06
Date:                Wed, 16 Jun 2021   Prob (F-statistic):                        0.00
Time:                        01:13:08   Log-Likelihood:                     -2.0075e+05
No. Observations:              399541   AIC:                                  4.015e+05
Df Residuals:                  399539   BIC:                                  4.015e+05
Df Model:                           2                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [99]:
x_train.iloc[0]

year          2011.000
month            1.000
day              2.000
CodNum           1.000
Chuva            3.617
Pres            -1.530
RS              -1.128
Temp            -0.140
UR               0.866
Dir_Vento       -0.512
Vel_Vento       -0.603
weekend          1.000
season           0.000
hour_sin         0.000
hour_cos         1.000
CO_lag1         -0.282
CO_lag2         -0.572
CO_lag24        -0.763
CO_MA24         -0.936
O3_lag1         -0.738
O3_lag2         -0.543
O3_lag24        -1.190
O3_MA24         -0.365
PM10_lag1        0.101
PM10_lag2        0.078
PM10_lag24       0.087
PM10_MA24        0.082
Name: 163253, dtype: float64