# Aprendizado Supervisionado

### Classificação & Regressão

#### 1 - Carregando bibliotecas

In [None]:
%matplotlib notebook
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt


from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import neighbors

from matplotlib.colors import ListedColormap

cmap_light = ListedColormap(['#FFFFAA', '#AAFFAA', '#AAAAFF','#EFEFEF'])
cmap_bold  = ListedColormap(['#FFFF00', '#00FF00', '#0000FF','#000000'])


#### 2 - Conjunto de dados sintético para classificação binária

In [None]:
from sklearn.datasets import make_classification

plt.figure()

X_C2, y_C2 = make_classification(n_samples = 100, n_features=2,
                                n_redundant=0, n_informative=2,
                                n_clusters_per_class=1, flip_y = 0.2,
                                class_sep = 0.5, random_state=0)

plt.scatter(X_C2[:, 0], X_C2[:, 1], c=y_C2, marker='o', cmap=cmap_bold)

plt.show()

#### 3 - KNN binário

In [None]:
def plot_two_class_knn(X, y, n_neighbors, X_test, y_test):
    X_mat = X
    y_mat = y

    clf = neighbors.KNeighborsClassifier(n_neighbors)
    clf.fit(X_mat, y_mat)

    mesh_step_size = .01
    
    x_min, x_max = X_mat[:, 0].min() - 1, X_mat[:, 0].max() + 1
    y_min, y_max = X_mat[:, 1].min() - 1, X_mat[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, mesh_step_size),
                         np.arange(y_min, y_max, mesh_step_size))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    Z = Z.reshape(xx.shape)
    plt.figure()
    plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

    plt.scatter(X_mat[:, 0], X_mat[:, 1], s=50, c=y, cmap=cmap_bold, edgecolor = 'black')
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())

    print('Escore treinamento:', clf.score(X_mat, y_mat))
    print('Escore teste:',clf.score(X_test, y_test))

    
    plt.show()

X_train, X_test, y_train, y_test = train_test_split(X_C2, y_C2,
                                                   random_state=0)

plot_two_class_knn(X_train, y_train, 1, X_test, y_test)
plot_two_class_knn(X_train, y_train, 11, X_test, y_test)
plot_two_class_knn(X_train, y_train, 30, X_test, y_test)

#### 4 - Conjunto de dados sintético para regressão simples

In [None]:
from sklearn.datasets import make_regression

plt.figure()

make_regression?

X_R1, y_R1 = make_regression(n_samples = 100, n_features=1,
                            n_informative=1, bias = 150.0,
                            noise = 30, random_state=0)
plt.scatter(X_R1, y_R1, marker= 'o', s=50)

plt.show()

#### 5 - Regressão com KNN

In [None]:
from sklearn.neighbors import KNeighborsRegressor

X_train, X_test, y_train, y_test = train_test_split(X_R1, y_R1, random_state=0)

knnreg = KNeighborsRegressor(n_neighbors = 5).fit(X_train, y_train)

print(knnreg.predict(X_test))

print(knnreg.score(X_test, y_test))

#### 6 - Variando o parâmetro K na Regressão

In [None]:
plt.figure()
X_predict_input = np.linspace(-3, 3, 50).reshape(-1,1)
X_train, X_test, y_train, y_test = train_test_split(X_R1[0::5], y_R1[0::5], random_state = 0)

knnreg = KNeighborsRegressor(n_neighbors = 8).fit(X_train, y_train)
y_predict_output = knnreg.predict(X_predict_input)
plt.plot(X_predict_input, y_predict_output, '^')
plt.plot(X_train, y_train, 'o')


print(knnreg.score(X_train, y_train))

print(knnreg.score(X_test, y_test))


plt.show()

#### 7 - $R^2$ escore


In [None]:
plt.figure()
X_predict_input = np.linspace(-3, 3, 500).reshape(-1,1)
X_train, X_test, y_train, y_test = train_test_split(X_R1, y_R1,
                                                   random_state = 0)

# k = 1, 3, 7, 15, 55

knnreg = KNeighborsRegressor(n_neighbors = 15).fit(X_train, y_train)
y_predict_output = knnreg.predict(X_predict_input)

plt.plot(X_predict_input, y_predict_output)
plt.plot(X_train, y_train, 'o')
plt.plot(X_test, y_test, '^')

print('Escore do treinamento:', knnreg.score(X_train, y_train))
print('Escore do teste:', knnreg.score(X_test, y_test))

plt.show()


## Regressão com modelos lineares

Vetor de características: $x=\left(x_{0}, x_{1}, \ldots, x_{n}\right)$

Saída prevista: $\hat{y}=\widehat{w_{0}} x_{0}+\widehat{w_{1}} x_{1}+\cdots \widehat{w_{n}} x_{n}+\hat{b}$

Parâmentros a se estimar: 

1. $\widehat{\boldsymbol{w}}=(\widehat{w_{0}}, \cdots, \widehat{w_{n}})$ : coeficientes do modelo (pesos das características)

2. $\widehat{\boldsymbol{b}}$ : viés (bias) constante

#### 8 - Regressão linear em dados sintéticos

In [None]:
from sklearn.linear_model import LinearRegression

X_train, X_test, y_train, y_test = train_test_split(X_R1, y_R1,
                                                   random_state = 0)

linreg = LinearRegression().fit(X_train, y_train)

print('Coeff (w):',linreg.coef_)

print('Intercept (b):', linreg.intercept_)

print('R-2 score (treinamento):', linreg.score(X_train, y_train))
print('R-2 score (teste):', linreg.score(X_test, y_test))



#### 9 - Mínimos Quadrados

Minimizar $R S S(\boldsymbol{w}, b)=\sum_{\{i=1\}}^{N}\left(\boldsymbol{y}_{i}-\left(\boldsymbol{w} \cdot \boldsymbol{x}_{i}+b\right)\right)^{2}$

$\hat{y}=w_{0} x_{0}+b$

$w_{0}$ = linreg.coef_

b = linreg.intercept_

In [None]:
plt.figure(figsize=(5,4))
plt.scatter(X_R1, y_R1, marker= 'o')
plt.plot(X_R1, linreg.coef_ * X_R1 + linreg.intercept_, 'r-')
plt.show()

#### 10 - Carregando a base de dados sobre crimes no EUA

In [None]:
crime = pd.read_table('./CSV/CommViolPredUnnormalizedData.txt', sep=',', na_values='?')

# remove características com baixa relevância e com inconsistências
columns_to_keep = [5, 6] + list(range(11,26)) + list(range(32, 103)) + [145]  
crime = crime.iloc[:,columns_to_keep].dropna()

# A coluna ViolentCrimesPerPop é o valor-alvo (y)

X_crime = crime.iloc[:,range(0,88)]
y_crime = crime['ViolentCrimesPerPop']

crime.head()

#### 11 - Regressão linear na base de crimes

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_crime, y_crime,
                                                   random_state = 0)
linreg = LinearRegression().fit(X_train, y_train)

print('Coeff (w):',linreg.coef_)

print('Intercept (b):', linreg.intercept_)

print('R-2 score (treinamento):', linreg.score(X_train, y_train))
print('R-2 score (teste):', linreg.score(X_test, y_test))

#### 12 - Regressão de Cume

$R S S_{R I D G E}(\boldsymbol{w}, b)=\sum_{[i=1]}^{N}\left(\boldsymbol{y}_{i}-\left(\boldsymbol{w} \cdot \boldsymbol{x}_{i}+b\right)\right)^{2} +\alpha \sum_{j=1}^{p} w_{j}^{2}$

In [None]:
from sklearn.linear_model import Ridge
X_train, X_test, y_train, y_test = train_test_split(X_crime, y_crime,
                                                   random_state = 0)

linridge = Ridge(alpha=1e-3).fit(X_train, y_train)

print('Coeff (w):',linridge.coef_)

print('Intercept (b):', linridge.intercept_)

print('R-2 score (treinamento):', linridge.score(X_train, y_train))
print('R-2 score (teste):', linridge.score(X_test, y_test))


#### 13 - Regressão de Cume com normalização de característica

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

X_train, X_test, y_train, y_test = train_test_split(X_crime, y_crime,
                                                   random_state = 0)

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

linridge = Ridge(alpha=20.0).fit(X_train_scaled, y_train)

print('Coeff (w):',linridge.coef_)

print('Intercept (b):', linridge.intercept_)

print('R-2 score (treinamento):', linridge.score(X_train_scaled, y_train))
print('R-2 score (teste):', linridge.score(X_test_scaled, y_test))

#### 14 - Regressão de Cume, normalização e o parâmetro alpha

In [None]:
for this_alpha in [0, 1, 10, 20, 50, 100, 1000]:
    linridge = Ridge(alpha = this_alpha).fit(X_train_scaled, y_train)
    r2_train = linridge.score(X_train_scaled, y_train)
    r2_test = linridge.score(X_test_scaled, y_test)
    num_coeff_bigger = np.sum(abs(linridge.coef_) > 1.0)
    print('Alpha = {:.2f}\ncoeff > 1.0: {}, \
R-2 treinamento: {:.2f}, R-2 teste: {:.2f}\n'
         .format(this_alpha, num_coeff_bigger, r2_train, r2_test))

#### 15 - Regressão Lasso com normalização

$R S S_{L A S S O}(\boldsymbol{w}, b)=\sum_{\{i=1\}}^{N}\left(y_{i}-\left(\boldsymbol{w} \cdot \boldsymbol{x}_{i}+b\right)\right)^{2}$ $+\alpha \sum_{\{j=1\}}^{p}\left|w_{j}\right|$

In [None]:
from sklearn.linear_model import Lasso
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

X_train, X_test, y_train, y_test = train_test_split(X_crime, y_crime,
                                                   random_state = 0)

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

linlasso = Lasso(alpha=2.0, max_iter = 10000).fit(X_train_scaled, y_train)



print('Lasso intercept: {}'
     .format(linlasso.intercept_))
print('Lasso coeff:\n{}'
     .format(linlasso.coef_))
print('Características não zeradas: {}'
     .format(np.sum(linlasso.coef_ != 0)))
print('R-2 score (treinamento): {:.3f}'
     .format(linlasso.score(X_train_scaled, y_train)))
print('R-2 score (teste): {:.3f}\n'
     .format(linlasso.score(X_test_scaled, y_test)))
print('Características com peso diferente de zero:')

for e in sorted (list(zip(list(X_crime), linlasso.coef_)),
                key = lambda e: -abs(e[1])):
    if e[1] != 0:
        print('\t{}, {:.3f}'.format(e[0], e[1]))

#### 16 - Regressão Lasso com normalização e o parâmetro alpha

In [None]:
for alpha in [0.5, 1, 2, 3, 5, 10, 20, 50]:
    linlasso = Lasso(alpha, max_iter = 10000).fit(X_train_scaled, y_train)
    r2_train = linlasso.score(X_train_scaled, y_train)
    r2_test = linlasso.score(X_test_scaled, y_test)
    
    print('Alpha = {:.2f}\nCaracterísticas: {}, R-2 treinamento: {:.2f}, \
R-2 teste: {:.2f}\n'
         .format(alpha, np.sum(linlasso.coef_ != 0), r2_train, r2_test))

#### 17 - Base de dados para regressões mais complexas

In [None]:
from sklearn.datasets import make_friedman1
plt.figure()

X_F1, y_F1 = make_friedman1(n_samples = 100,
                           n_features = 7, random_state=0)

plt.scatter(X_F1[:, 2], y_F1, marker= 'o', s=50)
plt.show()

#### 18 - Regressão polinomial

$x=\left(x_{0}, x_{1}\right)$ $\longrightarrow x^{\prime}=\left(x_{0}, x_{1}, x_{0}^{2}, x_{0} x_{1}, x_{1}^{2}\right)$

$\hat{y}=\widehat{w}_{0} x_{0}+\widehat{w}_{1} x_{1}+\widehat{w}_{00} x_{0}^{2}+\widehat{w}_{01} x_{0} x_{1}+\widehat{w}_{11} x_{1}^{2}+b$


In [None]:
from sklearn.preprocessing import PolynomialFeatures


X_train, X_test, y_train, y_test = train_test_split(X_F1, y_F1,
                                                   random_state = 0)
linreg = LinearRegression().fit(X_train, y_train)

print('Modelo linear coeff (w): {}'
     .format(linreg.coef_))
print('Modelo linear intercept (b): {:.3f}'
     .format(linreg.intercept_))
print('R-squared score (treinamento): {:.3f}'
     .format(linreg.score(X_train, y_train)))
print('R-squared score (teste): {:.3f}'
     .format(linreg.score(X_test, y_test)))

print('\nTransformação polinomial quadrática\n')
poly = PolynomialFeatures(degree=2)
X_F1_poly = poly.fit_transform(X_F1)

X_train, X_test, y_train, y_test = train_test_split(X_F1_poly, y_F1,
                                                   random_state = 0)
linreg = LinearRegression().fit(X_train, y_train)

print('Polinomial coeff (w):\n{}'
     .format(linreg.coef_))
print('Polinomial intercept (b): {:.3f}'
     .format(linreg.intercept_))
print('Polinomial R-2 score (treinamento): {:.3f}'
     .format(linreg.score(X_train, y_train)))
print('Polinomial R-2 score (teste): {:.3f}\n'
     .format(linreg.score(X_test, y_test)))


X_train, X_test, y_train, y_test = train_test_split(X_F1_poly, y_F1,
                                                   random_state = 0)
linreg = Ridge().fit(X_train, y_train)

print('Polinomial + Cume coeff (w):\n{}'
     .format(linreg.coef_))
print('Polinomial + Cume intercept (b): {:.3f}'
     .format(linreg.intercept_))
print('Polinomial + Cume R-2 score (treinamento): {:.3f}'
     .format(linreg.score(X_train, y_train)))
print('Polinomial + Cume R-2 score (teste): {:.3f}'
     .format(linreg.score(X_test, y_test)))