<a href="https://colab.research.google.com/github/kurek0010/neutral-network/blob/main/02_basics/05_regression_metrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

* @author: krakowiakpawel9@gmail.com  
* @site: e-smartdata.org

### Metryki - Problem regresji:
1. [Import bibliotek](#a0)
2. [Metryki](#a1)
    1. [Interpretacja graficzna](#a2)
    2. [Mean Absolute Error - MAE](#a3)
    3. [Mean Squared Error - MSE](#a4)
    4. [Root Mean Squared Error - RMSE](#a5)
    5. [Max Error](#a6)
    6. [R2 score - współczynnik determinacji](#a7)

    

### <a name='a0'></a> 1. Import bibliotek

In [None]:
# Przygotowanie środowiska do pracy z Tensorflow 2.0.
# Jeśli otrzymasz błąd podczas instalacji Tensorflow uruchom tę komórkę raz jeszcze.

!pip uninstall -y tensorflow
!pip install -q tensorflow==2.0.0

Found existing installation: tensorflow 2.0.0
Uninstalling tensorflow-2.0.0:
  Successfully uninstalled tensorflow-2.0.0


In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import tensorflow as tf
tf.__version__

'2.0.0'

In [None]:
y_true = 100 + 20 * np.random.randn(50)
y_true

array([ 80.21153202, 117.41849848, 106.96972112, 117.62482445,
       121.02237481, 135.26687079,  95.09878587, 103.41994835,
        68.46804306,  96.19897614,  72.38428487, 100.90670045,
       128.5663837 , 109.64935051, 100.61417877,  97.91350021,
        96.84856174,  97.19197252,  79.15854831, 133.8765057 ,
       111.51061815, 126.38948705, 101.4095637 ,  95.64522427,
       114.40255534, 123.52971506, 111.1692031 ,  82.92852907,
       106.16462415, 104.81789229,  80.62585827,  98.37327751,
       128.01738854,  81.74022205,  81.35956436, 101.32173903,
       125.47292787,  86.25802091,  68.40793432, 111.78948718,
       142.37250444,  93.52877496,  74.58593417, 105.36060718,
        61.68002339, 135.91877799,  67.44089045, 118.13165484,
       133.93699028,  98.00469155])

In [None]:
y_pred = y_true + 10 * np.random.randn(50)
y_pred

array([ 96.0625596 , 111.29268492, 101.47880953, 105.32795672,
       130.32232059, 111.10122275,  95.64620474,  90.30632398,
        72.03520332,  96.50475241,  76.82511339,  94.73087104,
       113.99937456, 124.04078549, 111.20254153,  97.52690292,
        91.74092993, 100.89523952,  75.07332313, 127.36423501,
       113.56617887, 144.77789822, 111.31745291,  84.30870035,
       126.91724199,  98.37431735, 111.08794641,  73.45314787,
       123.68872617, 128.18986422,  79.66689277, 104.99682484,
       128.60350517,  94.88641235,  87.79197528,  98.32096417,
       121.11665449,  97.30220226,  53.21996222, 126.19815715,
       128.52786729, 102.75378699,  64.09274567, 113.04610873,
        76.72748552, 124.32283735,  83.6258364 , 126.83069332,
       141.59499742,  99.28236427])

In [None]:
results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results.head()

Unnamed: 0,y_true,y_pred
0,80.211532,96.06256
1,117.418498,111.292685
2,106.969721,101.47881
3,117.624824,105.327957
4,121.022375,130.322321


In [None]:
results['error'] = results['y_true'] - results['y_pred']
results.head()

Unnamed: 0,y_true,y_pred,error
0,80.211532,96.06256,-15.851028
1,117.418498,111.292685,6.125814
2,106.969721,101.47881,5.490912
3,117.624824,105.327957,12.296868
4,121.022375,130.322321,-9.299946


### <a name='a1'></a> 2. Metryki
### <a name='a2'></a> 2.1 Interpretacja graficzna

In [None]:
def plot_regression_results(y_true, y_pred):
    results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
    min = results[['y_true', 'y_pred']].min().min()
    max = results[['y_true', 'y_pred']].max().max()

    fig = go.Figure(data=[go.Scatter(x=results['y_true'], y=results['y_pred'], mode='markers'),
                    go.Scatter(x=[min, max], y=[min, max])],
                    layout=go.Layout(showlegend=False, width=800,
                                     xaxis_title='y_true',
                                     yaxis_title='y_pred',
                                     title='Regression results'))
    fig.show()
plot_regression_results(y_true, y_pred)

In [None]:
y_true = 100 + 20 * np.random.randn(1000)
y_pred = y_true + 10 * np.random.randn(1000)
results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results['error'] = results['y_true'] - results['y_pred']

px.histogram(results, x='error', nbins=50, width=800)

### <a name='a3'></a> 2.2 Mean Absolute Error
### $$MAE = \frac{1}{n}\sum_{i=1}^{N}|y_{true} - y_{pred}|$$

In [None]:
def mean_absolute_error(y_true, y_pred):
    return abs(y_true - y_pred).sum() / len(y_true)

mean_absolute_error(y_true, y_pred)

7.8227757679314625

In [None]:
from tensorflow.keras.losses import mean_absolute_error
mean_absolute_error(y_true, y_pred)

<tf.Tensor: id=5, shape=(), dtype=float64, numpy=7.822775767931465>

In [None]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_true, y_pred)

7.8227757679314625

### <a name='a4'></a> 2.3 Mean Squared Error
### $$MSE = \frac{1}{n}\sum_{i=1}^{N}(y_{true} - y_{pred})^{2}$$

In [None]:
def mean_squared_error(y_true, y_pred):
    return ((y_true - y_pred) ** 2).sum() / len(y_true)

mean_squared_error(y_true, y_pred)

97.74807190779724

In [None]:
from tensorflow.keras.losses import mean_squared_error
mean_squared_error(y_true, y_pred)

<tf.Tensor: id=10, shape=(), dtype=float64, numpy=97.74807190779724>

In [None]:
from sklearn.metrics import mean_squared_error

mean_squared_error(y_true, y_pred)

97.74807190779724

### <a name='a5'></a> 2.4 Root Mean Squared Error
### $$RMSE = \sqrt{MSE}$$

In [None]:
def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(((y_true - y_pred) ** 2).sum() / len(y_true))

root_mean_squared_error(y_true, y_pred)

9.886762458347892

In [None]:
np.sqrt(mean_squared_error(y_true, y_pred))

9.886762458347892

### <a name='a6'></a> 2.5 Max Error

In [None]:
def max_error(y_true, y_pred):
    return abs(y_true - y_pred).max()

In [None]:
max_error(y_true, y_pred)

34.155026647943814

In [None]:
from sklearn.metrics import max_error

max_error(y_true, y_pred)

34.155026647943814

### <a name='a7'></a> 2.6 R2 score - współczynnik determinacji
### $$R2\_score = 1 - \frac{\sum_{i=1}^{N}(y_{true} - y_{pred})^{2}}{\sum_{i=1}^{N}(y_{true} - \overline{y_{true}})^{2}}$$

In [None]:
from sklearn.metrics import r2_score

r2_score(y_true, y_pred)

0.7589842234966283

In [None]:
def r2_score(y_true, y_pred):
    numerator = ((y_true - y_pred) ** 2).sum()
    denominator = ((y_true - y_true.mean()) ** 2).sum()
    try:
        r2 = 1 - numerator / denominator
    except ZeroDivisionError:
        print('Dzielenie przez zero')
    return r2

In [None]:
r2_score(y_true, y_pred)

0.7589842234966283