<a href="https://colab.research.google.com/github/thiagobmartins/MO432/blob/main/Exerc2/E2_Model_Temperature_Forecast.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Grupo:
* Isaque Elcio de Souza — RA: 225310
* Matheus Vinicius Correa — RA: 22524
* Thiago Bruschi Martins — RA: 120212


In [None]:
import io
import random
import requests
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.svm import SVR
from sklearn.svm import LinearSVR
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from scipy.stats import loguniform, uniform
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import RandomizedSearchCV


In [None]:
import warnings
warnings.filterwarnings("ignore")

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00514/Bias_correction_ucl.csv"
s = requests.get(url).content
df = pd.read_csv(io.StringIO(s.decode('utf-8')))


# Preprocessamento

In [None]:
random.seed(42)

pre_df = df.dropna()
X = pre_df.drop(['Next_Tmin','Date','Next_Tmax'], axis=1)
y = pre_df['Next_Tmax']

scaler = StandardScaler()
X = scaler.fit_transform(X)

print('X:', X.shape)
print('y:', y.shape)

X: (7588, 22)
y: (7588,)


# Funções

In [None]:
def random_search(estimator, param_distributions):
  rscv = RandomizedSearchCV(estimator, param_distributions=param_distributions, scoring='neg_root_mean_squared_error', n_iter=10, n_jobs=-1)
  r = rscv.fit(X,y)
  return -r.best_score_, r.best_params_ 

def cross_validation(estimator):
  r = cross_val_score(estimator, X, y, cv=5, scoring='neg_root_mean_squared_error')
  return np.min(-r)

def list_dict(d):
    for x in d:
        print (f'\t{x}: {round(d[x],5)}')     

def print_dict(d):
    for x in d:
        print (x)
        for y in d[x]:
            print ('\t',y,':', round(d[x][y], 4))

def model_test(estimator, params, name):
  best_score, best_params = random_search(estimator, params)
  default_score =  cross_validation(estimator)

  results[name] = {}
  results[name]['best_score'] = best_score
  results[name]['default_score'] = default_score
  return best_params

def model_report(name, best_params):
  print(name)
  list_dict(best_params)
  list_dict(results[name])

results = {}

# Modelo Linear

In [None]:
results['LR'] = {}
validation = cross_val_score(LinearRegression(), X, y, scoring='neg_root_mean_squared_error')
results['LR']['default_score'] = np.round(np.min(-validation),3) # melhor resultado
results['LR']['best_score'] = results['LR']['default_score']

print('Linear Regression:', results['LR']['default_score'])

Linear Regression: 1.454


# L1

In [None]:
lasso_param = {
    'alpha':loguniform(10e-3, 10e3)
}

best_params = model_test(Lasso(), lasso_param, 'L1')
model_report('L1', best_params)

L1
	alpha: 0.02765
	best_score: 1.56746
	default_score: 1.73604


# L2

In [None]:

ridge_param = {
    'alpha':loguniform(10e-3, 10e3)
}

best_params = model_test(Ridge(), ridge_param, 'L2')
model_report('L2', best_params)

L2
	alpha: 301.93183
	best_score: 1.57219
	default_score: 1.45418


# SVM Linear

In [None]:
svm_param = {
    'epsilon':[0.1, 0.3],
    'C': loguniform(2e-15,2e15)
    }

best_params = model_test(LinearSVR(), svm_param, 'SVM_Linear')
model_report('SVM_Linear', best_params)

SVM_Linear
	C: 989026.14153
	epsilon: 0.3
	best_score: 1.9227
	default_score: 1.42582


# SVM com kernel RBF

In [None]:
svm_param = {
    'epsilon':[0.1, 0.3],
    'C':loguniform(2e-15,2e15),
    'gamma':loguniform(2e-9,2e3)
    }
 
best_params = model_test(SVR(), svm_param, 'SVM_RBF')
model_report('SVM_RBF', best_params)

# KNN

In [16]:
knn_params = {
    'n_neighbors':np.random.randint(1, 1000, 10)
    }

best_params = model_test(KNeighborsRegressor(), knn_params, 'KNN')
model_report('KNN', best_params)

KNN
	n_neighbors: 15
	best_score: 1.84637
	default_score: 1.63376
KNN
	n_neighbors: 26
	best_score: 1.83765
	default_score: 1.63376


# MLP

In [11]:
MLP_params = {
    'hidden_layer_sizes':(5,8,11,14,17,20,23)
    }

best_params = model_test(MLPRegressor(), MLP_params, 'MLP')
model_report('MLP', best_params)

MLP
	hidden_layer_sizes: 23
	best_score: 2.24748
	default_score: 1.86414


# Decision Tree

In [12]:
dtree_params = {
    'ccp_alpha':[random.uniform(0.0, 0.4) for i in range(10)]
    }

best_params = model_test(DecisionTreeRegressor(), dtree_params, 'D-Tree')
model_report('D-Tree', best_params)

D-Tree
	ccp_alpha: 0.03478
	best_score: 1.85151
	default_score: 2.1466


# Random Forest

In [13]:
rf_params = {
    'n_estimators':[10, 100, 1000],
    'max_features':[5, 10, 22]
    }

best_params = model_test(RandomForestRegressor(), rf_params, 'RF')
model_report('RF', best_params)

RF
	n_estimators: 100
	max_features: 10
	best_score: 1.62319
	default_score: 1.42919


# GBM

In [14]:
gbm_params = {
    'n_estimators': np.random.randint(5, 100, 10),
    'max_features':[random.uniform(0.01, 0.3) for i in range(10)],
    'max_depth':[2, 3]
    }

best_params = model_test(GradientBoostingRegressor(), gbm_params, 'GBM')
model_report('GBM', best_params)

GBM
	n_estimators: 87
	max_features: 0.18089
	max_depth: 3
	best_score: 1.63073
	default_score: 1.43869


# Final Report

In [15]:
pd.DataFrame.from_dict(results, orient='index')

Unnamed: 0,default_score,best_score
LR,1.454,1.454
L1,1.736037,1.567464
L2,1.454183,1.572187
SVM_Linear,1.425823,1.922697
SVM_RBF,1.502184,2.644222
KNN,1.633757,1.846374
MLP,1.864144,2.247483
D-Tree,2.146603,1.851509
RF,1.42919,1.623186
GBM,1.438692,1.630729
