<a href="https://colab.research.google.com/github/pawel0508/MachineLearningBootcamp_I/blob/master/Klasyfikacja_ocena_modeli.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Klasyfikacja - ocena modeli

- accuracy = poprawne_przewidywania / wszystkie_próbki
- macierz konfuzji:
  - w kolumnie wartości rzeczywiste
  - w wierszu wartości przewidywane
  - na podstawie macierzy konfuzji możemy wyznaczyć wiele charajterystyk, które pozwolą nam ocenić model
  - macierz konfuzji może być stworzona dla problemów gdzie mamy więcej klas np. 3 klasy 

###### Oznaczenia: TN - True Negative, FN - False Negative, FP - False Positive, TP - True Positive.

- ###### FPR(False Positive Rate) = FP/(FP + TN) (type I error)
- ###### FNR(False Negative Rate) = FN/(FN + TP) (type II error)
- ###### Precision = TP/(TP + FP)
precyzja mówi o tym jaki % obserwacji przewidzianych jako pozytywne są w rzeczywistości pozytywne
- ###### Recall = TP/(TP + FN)
(recall mówi ile obserwacji z wszystkich pozytywnych, sklasyfikowano jako pozytywne)
- ###### F1_score = 2 * (Precision * Recall)/(Precision + Recall) 
średnia harmoniczna precyzji i recall

###### Krzywa ROC:
  - krzywa oparta o dwie charakterystyki FPR i TPR
  - oś X - False Positive Rate
  - oś Y - True Positive Rate

In [2]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [3]:
y_true = np.array([1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1])
y_pred = np.array([0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1])

In [4]:
df = pd.DataFrame({'y_true' : y_true, 'y_pred' : y_pred})
df

Unnamed: 0,y_true,y_pred
0,1,0
1,0,0
2,1,1
3,1,1
4,0,0
5,1,1
6,1,0
7,0,0
8,1,1
9,0,1


In [5]:
from sklearn.metrics import accuracy_score
accuracy_score(y_true, y_pred)

0.8

In [9]:
df = df.sort_values(by = 'y_true')
df = df.reset_index(drop = True)
df['sample'] = df.index + 1

In [10]:
df

Unnamed: 0,y_true,y_pred,sample
0,0,0,1
1,0,1,2
2,0,0,3
3,0,0,4
4,0,0,5
5,0,1,6
6,0,0,7
7,0,0,8
8,0,0,9
9,0,0,10


In [12]:
fig = make_subplots(rows = 2, cols = 1)
fig.add_trace(go.Scatter(x = df['sample'], y = df['y_true'], mode = 'markers', name = 'y_true'), row = 1, col = 1)
fig.add_trace(go.Scatter(x = df['sample'], y = df['y_pred'], mode = 'markers', name = 'y_pred'), row = 2, col = 1)
fig.update_layout(width = 800, height = 600, title = 'Klasyfikator binarny')
fig.show()

In [32]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true, y_pred)
cm

array([[ 8,  2],
       [ 4, 16]])

In [33]:
import plotly.figure_factory as ff
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true, y_pred)
def plot_cm(cm):
    cm = cm[::-1]
    cm = pd.DataFrame(cm, columns=['pred_0', 'pred_1'], index=['true_1', 'true_0'])

    fig = ff.create_annotated_heatmap(z=cm.values, x=list(cm.columns), y=list(cm.index), 
                                      colorscale='ice', showscale=True, reversescale=True)
    fig.update_layout(width=400, height=400, title='Confusion Matrix', font_size=16)
    fig.show()

plot_cm(cm)


In [35]:
cm = confusion_matrix(y_true, y_pred)
cm_df = pd.DataFrame(cm, columns=['pred_0', 'pred_1'], index=['true_0', 'true_1'])
cm_df


Unnamed: 0,pred_0,pred_1
true_0,8,2
true_1,4,16


In [41]:
tn, fp, fn, tp = cm.ravel()

In [42]:
print(f'TN - True Negative: {tn}')
print(f'FP - False Positive: {fp}')
print(f'FN - False Negative: {fn}')
print(f'TP - True Positive: {tp}')

TN - True Negative: 8
FP - False Positive: 2
FN - False Negative: 4
TP - True Positive: 16


In [44]:
fpr = fp/(fp + tn)
print(fpr)

0.2


In [46]:
fnr = fn/(fn + tp)
print(fnr)

0.2


In [47]:
precision = tp/(tp + fp)
print(precision)

0.8888888888888888


In [48]:
recall = tp/(tp + fn)
print(recall)

0.8


In [49]:
from sklearn.metrics import classification_report

print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.67      0.80      0.73        10
           1       0.89      0.80      0.84        20

    accuracy                           0.80        30
   macro avg       0.78      0.80      0.78        30
weighted avg       0.81      0.80      0.80        30



###### Krzywa ROC

In [52]:

from sklearn.metrics import roc_curve

fpr, tpr, tresh = roc_curve(y_true, y_pred, pos_label=1)

roc = pd.DataFrame({'fpr': fpr, 'tpr': tpr})
roc

def plot_roc_curve(y_true, y_pred):
    # Binary classification
    from sklearn.metrics import roc_curve
    fpr, tpr, tresh = roc_curve(y_true, y_pred, pos_label=1)

    fig = go.Figure(
        data=[
            go.Scatter(x=roc['fpr'], y=roc['tpr'],
                    line_color='red',
                    name='ROC Curve'),
            go.Scatter(x=[0, 1], y=[0, 1],
                    mode='lines', 
                    line_dash='dash', 
                    line_color='navy')
        ],
        layout=go.Layout(xaxis_title='False Positive Rate',
                         yaxis_title='True Positive Rate',
                         title='ROC Curve',
                         showlegend=False,
                         width=700))
    fig.show()

plot_roc_curve(y_true, y_pred)



###### Klasyfikacja wieloklasowa

In [53]:
y_true = np.array([1, 0, 1, 2, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0, 2, 1, 1, 2, 2])
y_pred = np.array([0, 0, 1, 2, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 2, 1, 2, 1, 2, 1, 0, 2, 1, 0, 1, 1, 1, 2, 2])

from sklearn.metrics import accuracy_score

accuracy_score(y_true, y_pred)


0.7241379310344828

In [54]:
cm = confusion_matrix(y_true, y_pred)
cm

array([[ 6,  1,  0],
       [ 3, 10,  2],
       [ 0,  2,  5]])

In [55]:
def plot_confusion_matrix(cm):
    cm = cm[::-1]
    cm = pd.DataFrame(cm, columns=['pred_0', 'pred_1', 'pred_2'], index=['true_2','true_1', 'true_0'])

    fig = ff.create_annotated_heatmap(z=cm.values, x=list(cm.columns), y=list(cm.index), 
                                      colorscale='ice', showscale=True, reversescale=True)
    fig.update_layout(width=400, height=400, title='Confusion Matrix', font_size=16)
    fig.show()

plot_confusion_matrix(cm)

###### Raport klasyfikacji

In [56]:
from sklearn.metrics import classification_report

print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.67      0.86      0.75         7
           1       0.77      0.67      0.71        15
           2       0.71      0.71      0.71         7

    accuracy                           0.72        29
   macro avg       0.72      0.75      0.73        29
weighted avg       0.73      0.72      0.72        29



In [57]:
print(classification_report(y_true, y_pred, target_names=['label_1', 'label_2', 'label_3']))

              precision    recall  f1-score   support

     label_1       0.67      0.86      0.75         7
     label_2       0.77      0.67      0.71        15
     label_3       0.71      0.71      0.71         7

    accuracy                           0.72        29
   macro avg       0.72      0.75      0.73        29
weighted avg       0.73      0.72      0.72        29

