<a href="https://colab.research.google.com/github/kszymon/neural-network/blob/main/02_basics%20/05_regression_metrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Metryki - Problem regresji:
1. [Import bibliotek](#a0)
2. [Metryki](#a1)
    1. [Interpretacja graficzna](#a2)
    2. [Mean Absolute Error - MAE](#a3)
    3. [Mean Squared Error - MSE](#a4)
    4. [Root Mean Squared Error - RMSE](#a5)
    5. [Max Error](#a6)
    6. [R2 score - współczynnik determinacji](#a7)

### <a name='a0'></a> 1. Import bibliotek

In [62]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.graph_objects as go
import tensorflow as tf
tf.__version__

'2.18.0'

In [63]:
y_true = 100 + 20 * np.random.randn(50)
y_true

array([ 86.88577136, 101.21691617,  87.7004557 , 106.4141384 ,
       105.04447397, 103.89994889, 115.45921279,  73.07322112,
        72.70445554, 140.85915019, 120.18182478, 100.71419223,
       114.7230065 , 115.65974201, 122.74228504,  87.25059073,
        68.73104086,  87.03852721, 104.24559793,  67.55753311,
        93.59668712, 108.63801198,  96.47039767, 114.21034187,
        82.70322443,  76.15650886,  87.46694651,  88.67668858,
       132.21102509, 135.18771298, 101.13214027, 117.4739779 ,
        55.9983938 ,  91.45742553, 117.84423611, 128.61678172,
        90.37637786,  90.14181797,  85.92099873, 104.44521969,
       100.15777501, 122.03356822, 124.71098552,  79.51797003,
        97.39510134,  87.51043675,  97.94350454,  79.37356283,
       151.77306891,  82.23464863])

In [64]:
y_pred = y_true + 10 * np.random.randn(50)
y_pred

array([ 99.97089822,  96.44756725,  89.23151786,  98.74861986,
       106.9717447 , 114.14743573, 114.11476941,  63.6907353 ,
        64.35915878, 129.14563054,  96.73276349, 100.64038923,
       120.176497  , 117.50392286, 127.52033424,  90.55055847,
        75.81808283, 109.1344751 ,  88.05478157,  80.76858803,
       108.7102855 , 119.81362264,  98.70209282, 114.709725  ,
        86.98919744,  74.13728216,  82.91783998, 107.22071136,
       132.23436062, 128.17824369,  97.63204679, 116.4120973 ,
        55.23996245, 109.11079737, 113.32776245, 143.39435858,
        73.43255604, 104.13732827,  95.16148036, 101.81232108,
        88.92520784, 108.67313808, 127.25337227,  89.99640169,
       100.30633441,  64.42685194, 105.349771  ,  74.07841286,
       172.58825831,  74.89643961])

In [65]:
results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results.head()

Unnamed: 0,y_true,y_pred
0,86.885771,99.970898
1,101.216916,96.447567
2,87.700456,89.231518
3,106.414138,98.74862
4,105.044474,106.971745


In [66]:
results['error'] = results['y_true']- results['y_pred']
results.head()

Unnamed: 0,y_true,y_pred,error
0,86.885771,99.970898,-13.085127
1,101.216916,96.447567,4.769349
2,87.700456,89.231518,-1.531062
3,106.414138,98.74862,7.665519
4,105.044474,106.971745,-1.927271


### <a name='a1'></a> 2. Metryki
### <a name='a2'></a> 2.1 Interpretacja graficzna

In [67]:
def plot_regression_results(y_true, y_pred):
    results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
    min = results[['y_true', 'y_pred']].min().min()
    max = results[['y_true', 'y_pred']].max().max()

    fig = go.Figure(data=[go.Scatter(x=results['y_true'], y=results['y_pred'], mode='markers'),
                    go.Scatter(x=[min, max], y=[min, max])],
                    layout=go.Layout(showlegend=False, width=800,
                                     xaxis_title='y_true',
                                     yaxis_title='y_pred',
                                     title='Regression results'))
    fig.show()
plot_regression_results(y_true, y_pred)

In [68]:
y_true = 100 + 20 * np.random.randn(1000)
y_pred = y_true + 10 * np.random.randn(1000)
results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results['error'] = results['y_true'] - results['y_pred']

px.histogram(results, x='error', nbins=50, width=800)

### <a name='a3'></a> 2.2 Mean Absolute Error
### $$MAE = \frac{1}{n}\sum_{i=1}^{N}|y_{true} - y_{pred}|$$

In [69]:
def mean_absolute_error(y_true, y_pred):
  return abs(y_true - y_pred).sum() / len(y_true)

mean_absolute_error(y_true, y_pred)

np.float64(8.31332754219073)

In [70]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_true, y_pred)

8.31332754219073

### <a name='a4'></a> 2.3 Mean Squared Error
### $$MSE = \frac{1}{n}\sum_{i=1}^{N}(y_{true} - y_{pred})^{2}$$

In [71]:
def mean_squared_error(y_true, y_pred):
  return ((y_true - y_pred) **2).sum() / len(y_true)
mean_squared_error(y_true, y_pred)

np.float64(108.16105574599645)

In [72]:
from sklearn.metrics import mean_squared_error
mean_squared_error(y_true, y_pred)

108.16105574599645

### <a name='a5'></a> 2.4 Root Mean Squared Error
### $$RMSE = \sqrt{MSE}$$

In [73]:
def root_mean_squared_error(y_true, y_pred):
  return np.sqrt(((y_true - y_pred) **2).sum() / len(y_true))

root_mean_squared_error(y_true, y_pred)

np.float64(10.400050756895201)

In [74]:
np.sqrt(mean_squared_error(y_true, y_pred))

np.float64(10.400050756895201)

### <a name='a6'></a> 2.5 Max Error

In [75]:
def max_error(y_true, y_pred):
  return abs(y_true - y_pred).max()

In [76]:
max_error(y_true, y_pred)

np.float64(31.78802110752376)

In [77]:
from sklearn.metrics import max_error
max_error(y_true, y_pred)

np.float64(31.78802110752376)

### <a name='a7'></a> 2.6 R2 score - współczynnik determinacji
### $$R2\_score = 1 - \frac{\sum_{i=1}^{N}(y_{true} - y_{pred})^{2}}{\sum_{i=1}^{N}(y_{true} - \overline{y_{true}})^{2}}$$

In [78]:
from sklearn.metrics import r2_score

r2_score(y_true, y_pred)

0.7359421241456381

In [79]:
def r2_score(y_true, y_pred):
    numerator = ((y_true - y_pred) ** 2).sum()
    denominator = ((y_true - y_true.mean()) ** 2).sum()
    try:
        r2 = 1 - numerator / denominator
    except ZeroDivisionError:
        print('Dzielenie przez zero')
    return r2

In [80]:
r2_score(y_true, y_pred)

np.float64(0.7359421241456381)