# Autor: Natan Nobre Chaves
## Atividade 06

## 1. Implemente diferentes funções em Python, usando o NumPy, para calcular:
    a) Acurácia
    b) Precisão
    c) Recall
    d) F1-Measure
    e) MAE
    f) RMSE
---
Observações:

* Cada item acima deve ter uma função própria para calculá-lo.
* Todas as funções recebem como parâmetros de entrada y_true e y_pred
* As funções para cálculo da Precisão, Recall e F1-Measure devem retornar um único valor já com a métrica calculada baseada na média ponderada das classes.
* As funções podem gerar e usar a matriz de confusão usando o scikit learn, mas não podem usar as métricas já implementadas por ele.

In [429]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn import metrics

In [430]:
def calc_tn_fp_fn_tp(y_true, y_pred) :
    cm = metrics.confusion_matrix(y_true, y_pred)
    qtd_labels = cm.shape[0]
    size_each_label = []
    tp = [] # True Positives
    fp = [] # False Positives
    fn = [] # False Negatives
    tn = [] # True Negatives

    for i in range(qtd_labels) :
        size_each_label.append(np.sum(cm[i,:]))
        tp.append(cm[i,i])
        fp.append(np.sum(cm[:, i]) - cm[i, i])
        fn.append(np.sum(cm[i, :]) - cm[i, i])
        tn.append(np.sum(cm) - np.sum(cm[i,:]) - np.sum(cm[:, i]))

    size_each_label = np.array(size_each_label)
    tp = np.array(tp)
    fp = np.array(fp)
    fn = np.array(fn)
    tn = np.array(tn)
    #print(cm)

    return tn, fp, fn, tp, size_each_label
    

## a) Função da Acurácia

In [431]:
def calc_accuracy(y_true, y_pred) :
    return np.sum(y_true == y_pred) / y_true.shape[0]

## b) Função de Precisão

In [432]:
def calc_precision(y_true, y_pred) :
    tn, fp, fn, tp, size = calc_tn_fp_fn_tp(y_true, y_pred)
    #precision = tp / (tp + fp)
    vector = np.vectorize(np.float)
    precision_num = vector(tp)
    precision_den = vector(tp + fp)
    precision = precision = np.divide(precision_num, precision_den, out=np.zeros_like(precision_num), where=precision_den!=0)
    return (np.sum( precision * size) / np.sum(size))

## c) Função de Recall

In [433]:
def calc_recall(y_true, y_pred) :
    tn, fp, fn, tp, size = calc_tn_fp_fn_tp(y_true, y_pred)
    recall = tp / (tp + fn)
    return (np.sum( recall * size) / np.sum(size))

## d) Função F1-Measure

In [434]:
def calc_f1_measure(y_true, y_pred) :
    tn, fp, fn, tp, size = calc_tn_fp_fn_tp(y_true, y_pred)
    vector = np.vectorize(np.float)
    precision_num = vector(tp)
    precision_den = vector(tp + fp)
    precision = precision = np.divide(precision_num, precision_den, out=np.zeros_like(precision_num), where=precision_den!=0)
    recall = tp / (tp + fn)
    f1_measure = np.divide((2 * precision * recall), (precision + recall), out=np.zeros_like((2 * precision * recall)), where=(precision + recall)!=0, dtype='float')
    return (np.sum( f1_measure * size) / np.sum(size)) 

## e) MAE

In [435]:
def calc_mae(y_true, y_pred) :
    return (np.sum(abs(y_true - y_pred))) / y_true.shape[0]

## f) RMSE

In [436]:
def calc_rmse(y_true, y_pred) :
    return (np.sum((y_pred - y_true)**2)/y_true.shape[0])**(1/2)

## 2. Calcule Acurácia, Precisão, Recall e F1-Measure para sua solução da questão 2 da Lista 04. Caso não tenha feito a questão 2 da Lista 04 terá que fazê-la agora.

In [437]:
wine = pd.read_csv("dataset/winequality-white.csv", delimiter=';')

In [438]:
y = wine['quality'].values
X = wine
del X['quality']
X = X.values

In [439]:
# separando os dados aleatoriamente em 70%/30%
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42, stratify=y)

In [440]:
# criando os modelos kNN
quantidade_de_modelos = 8
modelokNN = []
for idx in range(quantidade_de_modelos) :
    modelokNN.append(KNeighborsClassifier(n_neighbors=(idx+1), weights='distance'))

In [441]:
# treinando o modelo
for idx in range(quantidade_de_modelos) :
    modelokNN[idx].fit(X_train, y_train)

In [442]:
y_pred = []
for idx in range(quantidade_de_modelos) :
    y_pred.append(np.array(modelokNN[idx].predict(X_test)))

## Acurácia

In [443]:
for idx in range(quantidade_de_modelos) :
    print(calc_accuracy(y_test, y_pred[idx]))

0.5544217687074829
0.5544217687074829
0.5653061224489796
0.5687074829931973
0.5727891156462585
0.5727891156462585
0.5870748299319728
0.5918367346938775


In [444]:
for idx in range(quantidade_de_modelos) :
    print(metrics.accuracy_score(y_test, y_pred[idx]))

0.5544217687074829
0.5544217687074829
0.5653061224489796
0.5687074829931973
0.5727891156462585
0.5727891156462585
0.5870748299319728
0.5918367346938775


## Precisão

In [445]:
for idx in range(quantidade_de_modelos) :
    print(calc_precision(y_test, y_pred[idx]))

0.551372409099045
0.551372409099045
0.5588608045587584
0.5601667444678968
0.5656872841104581
0.5655225087609819
0.5823927123481587
0.5885700374762742


In [446]:
for idx in range(quantidade_de_modelos) :
    print(metrics.precision_score(y_test, y_pred[idx], average='weighted', zero_division=0))

0.551372409099045
0.551372409099045
0.5588608045587584
0.5601667444678968
0.5656872841104581
0.5655225087609819
0.5823927123481587
0.5885700374762742


## Recall

In [447]:
for idx in range(quantidade_de_modelos) :
    print(calc_recall(y_test, y_pred[idx]))

0.5544217687074829
0.5544217687074829
0.5653061224489796
0.5687074829931973
0.5727891156462585
0.5727891156462585
0.5870748299319728
0.5918367346938775


In [448]:
for idx in range(quantidade_de_modelos) :
    print(metrics.recall_score(y_test, y_pred[idx], average='weighted', zero_division=0))

0.5544217687074829
0.5544217687074829
0.5653061224489796
0.5687074829931973
0.5727891156462585
0.5727891156462585
0.5870748299319728
0.5918367346938775


## F1-Measure

In [449]:
for idx in range(quantidade_de_modelos) :
    print(calc_f1_measure(y_test, y_pred[idx]))

0.5525828044529364
0.5525828044529364
0.5608003403781048
0.5620532942136799
0.5659486037984914
0.5653618280396214
0.5786898439574746
0.5824994441038994


In [450]:
for idx in range(quantidade_de_modelos) :
    print(metrics.f1_score(y_test, y_pred[idx], average='weighted', zero_division=0))

0.5525828044529364
0.5525828044529364
0.5608003403781048
0.5620532942136799
0.5659486037984914
0.5653618280396214
0.5786898439574746
0.5824994441038994


## 3. Calcule MAE e RMSE para sua solução da questão 3.3 da Lista 05. Caso não tenha feito a questão 3.3 da Lista 05 terá que fazê-la agora.

In [451]:
k = 5
kNNr_model = KNeighborsRegressor(n_neighbors=k)
kNNr_model.fit(X_train, y_train)
y_pred = kNNr_model.predict(X_test)
print(y_pred)

[6.4 6.4 6.4 ... 5.6 6.4 6.2]


## MAE

In [452]:
print(calc_mae(y_test, y_pred))

0.6337414965986394


In [453]:
print(metrics.mean_absolute_error(y_test, y_pred))

0.6337414965986394


## RMSE

In [454]:
print(calc_rmse(y_test, y_pred))

0.8166298756116486


In [455]:
print(metrics.mean_squared_error(y_test, y_pred, squared=False))

0.8166298756116486
