# Autor: Natan Nobre Chaves
## Atividade 06

## 1. Implemente diferentes funções em Python, usando o NumPy, para calcular:
    a) Acurácia
    b) Precisão
    c) Recall
    d) F1-Measure
    e) MAE
    f) RMSE
---
Observações:

* Cada item acima deve ter uma função própria para calculá-lo.
* Todas as funções recebem como parâmetros de entrada y_true e y_pred
* As funções para cálculo da Precisão, Recall e F1-Measure devem retornar um único valor já com a métrica calculada baseada na média ponderada das classes.
* As funções podem gerar e usar a matriz de confusão usando o scikit learn, mas não podem usar as métricas já implementadas por ele.

In [348]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn import metrics

In [349]:
def calc_tn_fp_fn_tp(y_true, y_pred) :
    cm = metrics.confusion_matrix(y_true, y_pred)
    qtd_labels = cm.shape[0]
    size_each_label = []
    tp = [] # True Positives
    fp = [] # False Positives
    fn = [] # False Negatives
    tn = [] # True Negatives

    for i in range(qtd_labels) :
        size_each_label.append(np.sum(cm[i,:]))
        tp.append(cm[i,i])
        fp.append(np.sum(cm[:, i]) - cm[i, i])
        fn.append(np.sum(cm[i, :]) - cm[i, i])
        tn.append(np.sum(cm) - np.sum(cm[i,:]) - np.sum(cm[:, i]))

    size_each_label = np.array(size_each_label)
    tp = np.array(tp)
    fp = np.array(fp)
    fn = np.array(fn)
    tn = np.array(tn)
    #print(cm)

    return tn, fp, fn, tp, size_each_label
    

## a) Função da Acurácia

In [350]:
def calc_accuracy(y_true, y_pred) :
    return np.sum(y_true == y_pred) / y_true.shape[0]

## b) Função de Precisão

In [351]:
def calc_precision(y_true, y_pred) :
    tn, fp, fn, tp, size = calc_tn_fp_fn_tp(y_true, y_pred)
    #precision = tp / (tp + fp)
    vector = np.vectorize(np.float)
    precision_num = vector(tp)
    precision_den = vector(tp + fp)
    precision = precision = np.divide(precision_num, precision_den, out=np.zeros_like(precision_num), where=precision_den!=0)
    return (np.sum( precision * size) / np.sum(size))

## c) Função de Recall

In [352]:
def calc_recall(y_true, y_pred) :
    tn, fp, fn, tp, size = calc_tn_fp_fn_tp(y_true, y_pred)
    recall = tp / (tp + fn)
    return (np.sum( recall * size) / np.sum(size))

## d) Função F1-Measure

In [353]:
def calc_f1_measure(y_true, y_pred) :
    tn, fp, fn, tp, size = calc_tn_fp_fn_tp(y_true, y_pred)
    vector = np.vectorize(np.float)
    precision_num = vector(tp)
    precision_den = vector(tp + fp)
    precision = precision = np.divide(precision_num, precision_den, out=np.zeros_like(precision_num), where=precision_den!=0)
    recall = tp / (tp + fn)
    f1_measure = np.divide((2 * precision * recall), (precision + recall), out=np.zeros_like((2 * precision * recall)), where=(precision + recall)!=0, dtype='float')
    return (np.sum( f1_measure * size) / np.sum(size)) 

## e) MAE

In [354]:
def calc_mae(y_true, y_pred) :
    return (np.sum(abs(y_true - y_pred))) / y_true.shape[0]

## f) RMSE

In [355]:
def calc_rmse(y_true, y_pred) :
    return (np.sum((y_pred - y_true)**2)/y_true.shape[0])**(1/2)

## 2. Calcule Acurácia, Precisão, Recall e F1-Measure para sua solução da questão 2 da Lista 04. Caso não tenha feito a questão 2 da Lista 04 terá que fazê-la agora.

In [356]:
wine = pd.read_csv("dataset/winequality-white.csv", delimiter=';')

In [357]:
y = wine['quality'].values
X = wine
del X['quality']
X = X.values

In [358]:
# separando os dados aleatoriamente em 70%/30%
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42, stratify=y)

In [359]:
# criando os modelos kNN
quantidade_de_modelos = 50
modelokNN = []
for idx in range(quantidade_de_modelos) :
    modelokNN.append(KNeighborsClassifier(n_neighbors=(idx+1)))

In [360]:
# treinando o modelo
for idx in range(quantidade_de_modelos) :
    modelokNN[idx].fit(X_train, y_train)

In [361]:
y_pred = []
for idx in range(quantidade_de_modelos) :
    y_pred.append(np.array(modelokNN[idx].predict(X_test)))

## Acurácia

In [362]:
for idx in range(quantidade_de_modelos) :
    print(calc_accuracy(y_test, y_pred[idx]))

0.5544217687074829
0.47891156462585033
0.45918367346938777
0.47210884353741495
0.46462585034013604
0.4605442176870748
0.45714285714285713
0.4748299319727891
0.4639455782312925
0.46530612244897956
0.4448979591836735
0.4530612244897959
0.4414965986394558
0.42857142857142855
0.44013605442176873
0.4340136054421769
0.4346938775510204
0.42993197278911566
0.43945578231292515
0.44013605442176873
0.43537414965986393
0.43333333333333335
0.4387755102040816
0.44081632653061226
0.43197278911564624
0.4421768707482993
0.4380952380952381
0.4387755102040816
0.4448979591836735
0.43945578231292515
0.43673469387755104
0.43945578231292515
0.4421768707482993
0.44421768707482995
0.4496598639455782
0.44829931972789117
0.44761904761904764
0.45170068027210886
0.45510204081632655
0.45034013605442175
0.45374149659863944
0.4435374149659864
0.45102040816326533
0.4530612244897959
0.4557823129251701
0.45782312925170066
0.4564625850340136
0.4605442176870748
0.45374149659863944
0.46122448979591835


In [363]:
for idx in range(quantidade_de_modelos) :
    print(metrics.accuracy_score(y_test, y_pred[idx]))

0.5544217687074829
0.47891156462585033
0.45918367346938777
0.47210884353741495
0.46462585034013604
0.4605442176870748
0.45714285714285713
0.4748299319727891
0.4639455782312925
0.46530612244897956
0.4448979591836735
0.4530612244897959
0.4414965986394558
0.42857142857142855
0.44013605442176873
0.4340136054421769
0.4346938775510204
0.42993197278911566
0.43945578231292515
0.44013605442176873
0.43537414965986393
0.43333333333333335
0.4387755102040816
0.44081632653061226
0.43197278911564624
0.4421768707482993
0.4380952380952381
0.4387755102040816
0.4448979591836735
0.43945578231292515
0.43673469387755104
0.43945578231292515
0.4421768707482993
0.44421768707482995
0.4496598639455782
0.44829931972789117
0.44761904761904764
0.45170068027210886
0.45510204081632655
0.45034013605442175
0.45374149659863944
0.4435374149659864
0.45102040816326533
0.4530612244897959
0.4557823129251701
0.45782312925170066
0.4564625850340136
0.4605442176870748
0.45374149659863944
0.46122448979591835


## Precisão

In [364]:
for idx in range(quantidade_de_modelos) :
    print(calc_precision(y_test, y_pred[idx]))

0.551372409099045
0.5039397069455999
0.46050698583322397
0.4614086390928045
0.45136332103547094
0.453793921925357
0.44517746665668884
0.46145249820488
0.4465121070184717
0.44695008130433667
0.42271579740539283
0.4268376233273899
0.41456510175530753
0.387687305787882
0.39702019846550535
0.3924246205176857
0.3897171693552583
0.38350079056305186
0.39639284527884555
0.3990620002785731
0.3938278098780971
0.3892615375060606
0.3954150496954499
0.39103856496369527
0.4153095010151393
0.39290484546996335
0.3799826722242763
0.37979100942511984
0.38897844992167174
0.4128193372263136
0.37536798964007395
0.3786501131592932
0.3812873235904552
0.3890583074818012
0.38816569670413287
0.3886876148790183
0.38400734136554754
0.38935323835748037
0.3940175288101555
0.3901900455745082
0.39314407703926213
0.3810962243590468
0.3915772806150906
0.39565156486665864
0.3956877670316268
0.3994500571280436
0.3985008743027983
0.39560581595091204
0.39176429375624544
0.40712608554579094


In [365]:
for idx in range(quantidade_de_modelos) :
    print(metrics.precision_score(y_test, y_pred[idx], average='weighted', zero_division=0))

0.551372409099045
0.5039397069455999
0.46050698583322397
0.4614086390928045
0.45136332103547094
0.453793921925357
0.44517746665668884
0.46145249820488
0.4465121070184717
0.44695008130433667
0.42271579740539283
0.4268376233273899
0.41456510175530753
0.387687305787882
0.39702019846550535
0.3924246205176857
0.3897171693552583
0.38350079056305186
0.39639284527884555
0.3990620002785731
0.3938278098780971
0.3892615375060606
0.3954150496954499
0.39103856496369527
0.4153095010151393
0.39290484546996335
0.3799826722242763
0.37979100942511984
0.38897844992167174
0.4128193372263136
0.37536798964007395
0.3786501131592932
0.3812873235904552
0.3890583074818012
0.38816569670413287
0.3886876148790183
0.38400734136554754
0.38935323835748037
0.3940175288101555
0.3901900455745082
0.39314407703926213
0.3810962243590468
0.3915772806150906
0.39565156486665864
0.3956877670316268
0.3994500571280436
0.3985008743027983
0.39560581595091204
0.39176429375624544
0.40712608554579094


## Recall

In [366]:
for idx in range(quantidade_de_modelos) :
    print(calc_recall(y_test, y_pred[idx]))

0.5544217687074829
0.47891156462585033
0.45918367346938777
0.47210884353741495
0.46462585034013604
0.4605442176870748
0.45714285714285713
0.4748299319727891
0.4639455782312925
0.46530612244897956
0.4448979591836735
0.4530612244897959
0.4414965986394558
0.42857142857142855
0.44013605442176873
0.4340136054421769
0.4346938775510204
0.42993197278911566
0.43945578231292515
0.44013605442176873
0.43537414965986393
0.43333333333333335
0.4387755102040816
0.44081632653061226
0.43197278911564624
0.4421768707482993
0.4380952380952381
0.4387755102040816
0.4448979591836735
0.43945578231292515
0.43673469387755104
0.43945578231292515
0.4421768707482993
0.44421768707482995
0.4496598639455782
0.44829931972789117
0.44761904761904764
0.45170068027210886
0.45510204081632655
0.45034013605442175
0.45374149659863944
0.4435374149659864
0.45102040816326533
0.4530612244897959
0.4557823129251701
0.45782312925170066
0.4564625850340136
0.4605442176870748
0.45374149659863944
0.46122448979591835


In [367]:
for idx in range(quantidade_de_modelos) :
    print(metrics.recall_score(y_test, y_pred[idx], average='weighted', zero_division=0))

0.5544217687074829
0.47891156462585033
0.45918367346938777
0.47210884353741495
0.46462585034013604
0.4605442176870748
0.45714285714285713
0.4748299319727891
0.4639455782312925
0.46530612244897956
0.4448979591836735
0.4530612244897959
0.4414965986394558
0.42857142857142855
0.44013605442176873
0.4340136054421769
0.4346938775510204
0.42993197278911566
0.43945578231292515
0.44013605442176873
0.43537414965986393
0.43333333333333335
0.4387755102040816
0.44081632653061226
0.43197278911564624
0.4421768707482993
0.4380952380952381
0.4387755102040816
0.4448979591836735
0.43945578231292515
0.43673469387755104
0.43945578231292515
0.4421768707482993
0.44421768707482995
0.4496598639455782
0.44829931972789117
0.44761904761904764
0.45170068027210886
0.45510204081632655
0.45034013605442175
0.45374149659863944
0.4435374149659864
0.45102040816326533
0.4530612244897959
0.4557823129251701
0.45782312925170066
0.4564625850340136
0.4605442176870748
0.45374149659863944
0.46122448979591835


## F1-Measure

In [368]:
for idx in range(quantidade_de_modelos) :
    print(calc_f1_measure(y_test, y_pred[idx]))

0.5525828044529364
0.46954377846244816
0.45515113346859987
0.4611921012780785
0.4503368523033497
0.4441468744675037
0.44033575604653447
0.45459790715684356
0.44429360862740175
0.4455957781745911
0.42179876609215483
0.42695327160936913
0.4155096237607401
0.4005157764562704
0.40818881655647965
0.4021784217307167
0.4009411118944877
0.395597897702573
0.4038682005310935
0.4043417154503377
0.39926177478057145
0.3946997056078115
0.40114395153231325
0.3995454449967573
0.3933034400191946
0.3969294333280574
0.3926398327878084
0.39271131085384364
0.39970061061056844
0.39207191901595745
0.3883348754035206
0.3902389330385154
0.392530990402904
0.3956320483784425
0.40032653263609713
0.39904872050838325
0.3957354019607614
0.39914066248605506
0.40147999758867425
0.39846790357643314
0.4008919082821906
0.3893584893325842
0.3967973156334076
0.3990423961034498
0.4001734632063886
0.40229011438928125
0.40018719182586915
0.40214571594775056
0.3964191853724275
0.4056580438835534


In [369]:
for idx in range(quantidade_de_modelos) :
    print(metrics.f1_score(y_test, y_pred[idx], average='weighted', zero_division=0))

0.5525828044529364
0.46954377846244816
0.45515113346859987
0.4611921012780785
0.4503368523033497
0.4441468744675037
0.44033575604653447
0.45459790715684356
0.44429360862740175
0.4455957781745911
0.42179876609215483
0.42695327160936913
0.4155096237607401
0.4005157764562704
0.40818881655647965
0.4021784217307167
0.4009411118944877
0.395597897702573
0.4038682005310935
0.4043417154503377
0.39926177478057145
0.3946997056078115
0.40114395153231325
0.3995454449967573
0.3933034400191946
0.3969294333280574
0.3926398327878084
0.39271131085384364
0.39970061061056844
0.39207191901595745
0.3883348754035206
0.3902389330385154
0.392530990402904
0.3956320483784425
0.40032653263609713
0.39904872050838325
0.3957354019607614
0.39914066248605506
0.40147999758867425
0.39846790357643314
0.4008919082821906
0.3893584893325842
0.3967973156334076
0.3990423961034498
0.4001734632063886
0.40229011438928125
0.40018719182586915
0.40214571594775056
0.3964191853724275
0.4056580438835534


## 3. Calcule MAE e RMSE para sua solução da questão 3.3 da Lista 05. Caso não tenha feito a questão 3.3 da Lista 05 terá que fazê-la agora.

In [370]:
k = 5
kNNr_model = KNeighborsRegressor(n_neighbors=k)
kNNr_model.fit(X_train, y_train)
y_pred = kNNr_model.predict(X_test)
print(y_pred)

[6.4 6.4 6.4 ... 5.6 6.4 6.2]


## MAE

In [371]:
print(calc_mae(y_test, y_pred))

0.6337414965986394


In [372]:
print(metrics.mean_absolute_error(y_test, y_pred))

0.6337414965986394


## RMSE

In [373]:
print(calc_rmse(y_test, y_pred))

0.8166298756116486


In [374]:
print(metrics.mean_squared_error(y_test, y_pred, squared=False))

0.8166298756116486
