# Performance Metrics

In [1]:
from math import sqrt
from typing import List
from itertools import combinations

import numpy as np
import pandas as pd

from sklearn import metrics
from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

import matplotlib.pyplot as plt

## Classification Performance Metrics

In [2]:
actual_values = [1, 1, 0, 1, 0, 0, 1, 0, 0, 0]
predictions =   [1, 0, 1, 1, 1, 0, 1, 1, 0, 0]

### Confusion Matrix

### Complete Classification Report

In [3]:
print(metrics.classification_report(actual_values, predictions))

              precision    recall  f1-score   support

           0       0.75      0.50      0.60         6
           1       0.50      0.75      0.60         4

    accuracy                           0.60        10
   macro avg       0.62      0.62      0.60        10
weighted avg       0.65      0.60      0.60        10



### Accuracy Score

In [None]:
print(f'Accuracy Score is: {metrics.accuracy_score(actual_values, predictions) * 100} % ')

### Precision Score

In [None]:
print(f'Precision Score is: {metrics.precision_score(actual_values, predictions)}')

### Recall Score

In [None]:
print(f'Recall Score is: {metrics.recall_score(actual_values, predictions)}')

### Precision - Recall Curve

In [None]:
data = pd.read_csv('./data/penguins_size.csv')

data = data.dropna()
data = data.drop(['sex', 'island', 'flipper_length_mm', 'body_mass_g'], axis=1)
data = data[data['species'] != 'Chinstrap']

X = data.drop(['species'], axis=1).values

y = data['species']
spicies = {'Adelie': -1, 'Gentoo': 1}
y = [spicies[item] for item in y]
y = np.array(y) 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=33)

dt_model = DecisionTreeClassifier(max_depth=1)
dt_model.fit(X_train, y_train)
dt_preditions = dt_model.predict(X_test)

In [None]:
disp = metrics.plot_precision_recall_curve(dt_model, X_test, y_test, color='orange')

### F1 Score

In [None]:
print('F1 Score:', metrics.f1_score(actual_values, predictions))

### AUC-ROC

In [None]:
data = pd.read_csv('./data/penguins_size.csv')

data = data.dropna()
data = data.drop(['sex', 'island', 'flipper_length_mm', 'body_mass_g'], axis=1)
data = data[data['species'] != 'Chinstrap']

X = data.drop(['species'], axis=1).values

y = data['species']
spicies = {'Adelie': -1, 'Gentoo': 1}
y = [spicies[item] for item in y]
y = np.array(y) 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=33)

dt_model = DecisionTreeClassifier(max_depth=1)
dt_model.fit(X_train, y_train)
dt_preditions = dt_model.predict(X_test)

In [None]:
metrics.plot_roc_curve(dt_model, X_test, y_test, color = 'orange')

In [None]:
print('AUC-ROC:', metrics.roc_auc_score(actual_values, predictions))

### LOGLOSS

In [None]:
print('LOGLOSS:', metrics.log_loss(actual_values, predictions))

## Regression Performance Metrics

In [None]:
actual_values =    [9, -3.3, 6, 11]
predictions = [8.5, -2.9, 6, 9.2]

### Mean Absolute Error - MAE

In [None]:
print (f'MAE: {metrics.mean_absolute_error(actual_values, predictions)}')

### Mean Squared Error - MSE

In [None]:
print (f'MSE: {metrics.mean_squared_error(actual_values, predictions)}')

### Root Mean Squared Error - RMSE

In [None]:
def rmse(actual_values, predictions):
    actual_values = np.asarray(actual_values)
    predictions = np.asarray(predictions)
    return np.sqrt(((predictions - actual_values) ** 2).mean())

In [None]:
print(f'RMSE: {rmse(actual_values, predictions)}')

In [None]:
print(f'RMSE: {sqrt(metrics.mean_squared_error(actual_values, predictions))}')

### Root Mean Squared Logarithmic Error - RMSLE

In [None]:
actual_values_ranged = minmax_scale(actual_values, feature_range=(0,1))
predictions_ranged = minmax_scale(predictions, feature_range=(0,1))

print(f'RMSLE: {sqrt(metrics.mean_squared_log_error(actual_values_ranged, predictions_ranged))}')

### R Squared

In [None]:
print (f'R Squared: {metrics.r2_score(actual_values, predictions)}')