## Install Evidently

In [None]:
#%pip install evidently -Uq

Use jupyter nbextension to display dashboards in Jupyter notebook

install jupyter nbextension
`$ jupyter nbextension install --sys-prefix --symlink --overwrite --py evidently`

run the below to enable
`$ jupyter nbextension enable evidently --py --sys-prefix`


In [None]:
import evidently
evidently.__version__

'0.1.22.dev0'

## References

- [Step by Step Guide for Jupyter Notebooks](https://docs.evidentlyai.com/step-by-step-guides/step-by-step-guide-for-jupyter-notebooks)
- [Data Drift Iris dataset](https://github.com/evidentlyai/evidently/blob/main/evidently/examples/iris_data_drift.ipynb)
- [Report - Data Drift](https://docs.evidentlyai.com/reports/data-drift#how-it-works)
- [Report - Classification Performance](https://docs.evidentlyai.com/reports/classification-performance)

## Outline

- Data Drift & Model Classification Performance for Iris dataset
  - Prepare the data as pandas DF
  - Pass column_mapping into Dashboard
  - Generate the Report
  - Explore the dashboard in Jupyter Notebook
  - Export the report as HTML file
  - Create a JSON profile

## Data Drift Iris dataset

**When**
- Model maintenance : when to retrain, what features to drop
- Debugging Model Decay: Explore where the changes come from when the model quality dropped
- A/B test or trial use: Detect training-serving skew in order to interpret test results
- Pre-Deployment Validation: Detect Drift in offline environment. Explore the past shifts to define retraining needs & determine threshold for monitoring

**What**
- The report detects changes in feature distributions in the input data. 
- Performs a suitable **statistical test** for numerical and categorical features
- Plots **feature values and distributions** for the two datasets.

Questions
- What are the statistical test being performed? (Source : [Data Drift Report](https://docs.evidentlyai.com/reports/data-drift#how-it-works))
  - For numerical features, two-sample Kolmogorov-Smirnov test. 
  - For categorical features, chi-squared test. 
  - For binary categorical features, the proportion difference test for independent samples based on Z-score.

In [None]:
import pandas as pd

from sklearn import datasets

from evidently.dashboard import Dashboard
from evidently.tabs import DataDriftTab

from evidently.model_profile import Profile
from evidently.profile_sections import DataDriftProfileSection

### Iris Data

In [None]:
iris = datasets.load_iris()
iris_frame = pd.DataFrame(data=iris.data, columns=iris.feature_names)

In [None]:
iris_frame.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


### Data Drift Dashboard

In [None]:
#Dashboard??
#DataDriftTab??

iris_dd_dash = Dashboard(tabs=[DataDriftTab])

In [None]:
iris_dd_dash.calculate(reference_data=iris_frame,
                       current_data=iris_frame,
                       column_mapping=None)

In [None]:
iris_dd_dash.show()

In [None]:
iris_dd_dash.save('iris_data_drift.html')

### Data Drift Profile

In [None]:
iris_dd_profile = Profile(sections=[DataDriftProfileSection])

In [None]:
iris_dd_profile.calculate(iris_frame, iris_frame, column_mapping=None)

In [None]:
import json
json.loads(iris_dd_profile.json())

{'data_drift': {'name': 'data_drift',
  'datetime': '2021-08-13 14:39:23.203146',
  'data': {'utility_columns': {'date': None,
    'id': None,
    'target': None,
    'prediction': None},
   'cat_feature_names': [],
   'num_feature_names': ['sepal length (cm)',
    'sepal width (cm)',
    'petal length (cm)',
    'petal width (cm)'],
   'metrics': {'sepal length (cm)': {'current_small_hist': [[0.16666666666666652,
       0.4259259259259266,
       0.259259259259259,
       0.4999999999999995,
       0.2962962962962968,
       0.48148148148148107,
       0.33333333333333304,
       0.11111111111111129,
       0.09259259259259252,
       0.11111111111111102],
      [4.3, 4.66, 5.02, 5.38, 5.74, 6.1, 6.46, 6.82, 7.18, 7.54, 7.9]],
     'ref_small_hist': [[0.16666666666666652,
       0.4259259259259266,
       0.259259259259259,
       0.4999999999999995,
       0.2962962962962968,
       0.48148148148148107,
       0.33333333333333304,
       0.11111111111111129,
       0.0925925925925925

## Classification Performance Dashboards for Iris Dataset

**When**
- Analyze the results of the model test (evaluate the results of online/offline test and compare it with the performance in training)
- Generate the regular reports on the performance of a production model 
- Analyze the performance on the slices of data
- Trigger for model retraining(if the perf falls below the threshold)
- Debug or improve Model performance(Identify underperforming segments) 

**What**
- Report for the performance of Classification model
- Works for a single model or helps compare the two
- Works for binary & multi-class classification
- Plots related to model performance
- Identify regions where model makes different types of errors

### Iris Data

In [None]:
from sklearn import datasets
iris = datasets.load_iris()

import pandas as pd
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)

### Model Performance Dashboard (Non-probabilistic)

In [None]:
from sklearn.model_selection import train_test_split

# Train Test Splits
# ref: Training prod: Test
ref, prod, y_train, y_test = train_test_split(iris_df, 
                                              iris.target, 
                                              test_size=0.25, 
                                              random_state=0)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Fit the model
# Non probabilistic
model = KNeighborsClassifier(n_neighbors=1)

model.fit(ref, y_train)

KNeighborsClassifier(n_neighbors=1)

In [None]:
# Get the predictions
train_preds = model.predict(ref)
test_preds = model.predict(prod)

In [None]:
ref['target'] = y_train
ref['prediction'] = train_preds

prod['target'] = y_test
prod['prediction'] = test_preds

In [None]:
ref.head(1)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,prediction
61,5.9,3.0,4.2,1.5,1,1


In [None]:
prod.head(1)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,prediction
114,5.8,2.8,5.1,2.4,2,2


In [None]:
# Reverse Label Mapping

def reverse_map(df, col:str): 
    return df[col].apply(lambda x: iris.target_names[x])

ref.target = reverse_map(ref, col='target')
prod.target = reverse_map(prod, col='target')

ref.prediction = reverse_map(ref, col='prediction')
prod.prediction = reverse_map(prod, col='prediction')

In [None]:
ref.head(1)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,prediction
61,5.9,3.0,4.2,1.5,versicolor,versicolor


In [None]:
prod.head(1)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,prediction
114,5.8,2.8,5.1,2.4,virginica,virginica


### Create column mapping

In [None]:
iris_col_map = {}
iris_col_map['target'], iris_col_map['prediction'], iris_col_map['numerical_features'] = 'target', 'prediction', iris.feature_names

### Classification Dashboard

In [None]:
from evidently.dashboard import Dashboard
from evidently.tabs import ClassificationPerformanceTab

In [None]:
##ClassificationPerformanceTab??
iris_model_perf_dash = Dashboard(tabs=[ClassificationPerformanceTab])

In [None]:
iris_model_perf_dash.calculate(reference_data=ref,current_data=prod,column_mapping=iris_col_map)

In [None]:
iris_model_perf_dash.show()

In [None]:
iris_model_perf_dash.save('iris_classification_performance.html')

### Model Performance Profile

In [None]:
from evidently.model_profile import Profile
from evidently.profile_sections import ClassificationPerformanceProfileSection

In [None]:
iris_clas_perf_profile = Profile(sections=[ClassificationPerformanceProfileSection])

In [None]:
iris_clas_perf_profile.calculate(reference_data=ref, 
                                 current_data=prod, 
                                 column_mapping=iris_col_map)

In [None]:
import json
json.loads(iris_clas_perf_profile.json())

{'classification_performance': {'name': 'classification_performance',
  'datetime': '2021-08-13 14:39:25.219935',
  'data': {'utility_columns': {'date': None,
    'id': None,
    'target': 'target',
    'prediction': 'prediction'},
   'cat_feature_names': [],
   'num_feature_names': ['sepal length (cm)',
    'sepal width (cm)',
    'petal length (cm)',
    'petal width (cm)'],
   'target_names': None,
   'metrics': {'reference': {'accuracy': 1.0,
     'precision': 1.0,
     'recall': 1.0,
     'f1': 1.0,
     'metrics_matrix': {'setosa': {'precision': 1.0,
       'recall': 1.0,
       'f1-score': 1.0,
       'support': 37},
      'versicolor': {'precision': 1.0,
       'recall': 1.0,
       'f1-score': 1.0,
       'support': 34},
      'virginica': {'precision': 1.0,
       'recall': 1.0,
       'f1-score': 1.0,
       'support': 41},
      'accuracy': 1.0,
      'macro avg': {'precision': 1.0,
       'recall': 1.0,
       'f1-score': 1.0,
       'support': 112},
      'weighted avg': 

## [Probabilistic] Data Drift & Model Performance for Iris dataset 

In [None]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

### Iris Data

In [None]:
from sklearn import datasets
iris=datasets.load_iris()

import pandas as pd
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)

### Probabilistic Performance Dashboard

In [None]:
from sklearn.model_selection import train_test_split

# Train Test Splits
# ref: Training prod: Test
ref, prod, y_train, y_test = train_test_split(iris_df, 
                                              iris.target, 
                                              test_size=0.25, 
                                              random_state=0)

In [None]:
from sklearn.linear_model import LogisticRegression

# Fit the model
# Probabilistic
model = LogisticRegression()
model.fit(ref, y_train)

LogisticRegression()

In [None]:
# Get the prediction probas for train & test
train_probas = pd.DataFrame(data=model.predict_proba(ref), 
                            columns=iris.target_names)
test_probas = pd.DataFrame(data=model.predict_proba(prod), 
                            columns=iris.target_names)

In [None]:
train_probas.head(2)

Unnamed: 0,setosa,versicolor,virginica
0,0.017768,0.860582,0.121651
1,0.022194,0.934421,0.043385


In [None]:
test_probas.head(2)

Unnamed: 0,setosa,versicolor,virginica
0,0.000118,0.056148,0.943735
1,0.012629,0.960454,0.026917


In [None]:
ref.target = [iris.target_names[each] for each in y_train]
prod.target = [iris.target_names[each] for each in y_test]

In [None]:
ref.reset_index(inplace=True, drop=True)

ref['result'] = [iris.target_names[x] for x in y_train]
merged_ref = pd.concat([ref, train_probas], axis=1)
merged_ref.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),result,setosa,versicolor,virginica
0,5.9,3.0,4.2,1.5,versicolor,0.017768,0.860582,0.1216506
1,5.8,2.6,4.0,1.2,versicolor,0.022194,0.934421,0.04338547
2,6.8,3.0,5.5,2.1,virginica,3.6e-05,0.073558,0.9264064
3,4.7,3.2,1.3,0.2,setosa,0.981263,0.018737,7.458671e-08
4,6.9,3.1,5.1,2.3,virginica,0.000152,0.139633,0.8602151


In [None]:
prod.reset_index(inplace=True, drop=True)

prod['result'] = [iris.target_names[x] for x in y_test]
merged_prod = pd.concat([prod, test_probas], axis=1)
merged_prod.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),result,setosa,versicolor,virginica
0,5.8,2.8,5.1,2.4,virginica,0.000118,0.056148,0.9437345
1,6.0,2.2,4.0,1.0,versicolor,0.012629,0.960454,0.02691666
2,5.5,4.2,1.4,0.2,setosa,0.984398,0.015602,3.856086e-08
3,7.3,2.9,6.3,1.8,virginica,1e-06,0.023152,0.9768464
4,5.0,3.4,1.5,0.2,setosa,0.970235,0.029765,1.625964e-07


### Create column mapping

In [None]:
iris_col_map = {}
iris_col_map['target'] = 'result' # This is incorrect in the example notebook
iris_col_map['prediction'] = iris.target_names.tolist()
iris_col_map['numerical_features'] = iris.feature_names

### Probabilistic Classification Dashboard

In [None]:
from evidently.dashboard import Dashboard
from evidently.tabs import ProbClassificationPerformanceTab

In [None]:
iris_prob_clas_dash = Dashboard(tabs=[ProbClassificationPerformanceTab])
iris_prob_clas_dash.calculate(reference_data=merged_ref, 
                              current_data=merged_prod, 
                              column_mapping=iris_col_map)

In [None]:
## Why is this failing??
## Ans: In the create column mapping, iris_col_map['target'] = 'result' instead of iris_col_map['target']='target'
iris_prob_clas_dash.show()

### Probabilistic Model Performance Profile

In [None]:
iris_col_map

{'target': 'result',
 'prediction': ['setosa', 'versicolor', 'virginica'],
 'numerical_features': ['sepal length (cm)',
  'sepal width (cm)',
  'petal length (cm)',
  'petal width (cm)']}

In [None]:
from evidently.model_profile import Profile
from evidently.profile_sections import ProbClassificationPerformanceProfileSection

In [None]:
iris_prob_clas_profile = Profile(sections=[ProbClassificationPerformanceProfileSection])

In [None]:
iris_clas_perf_profile.calculate(merged_ref, merged_prod, column_mapping=iris_col_map)

ValueError: Classification metrics can't handle a mix of multiclass and continuous-multioutput targets