# Data Drift and Model Performance Dashboards for Iris dataset

In [1]:
import pandas as pd

from sklearn import datasets, model_selection, linear_model

from evidently.dashboard import Dashboard
from evidently.pipeline.column_mapping import ColumnMapping
from evidently.tabs import ProbClassificationPerformanceTab

from evidently.model_profile import Profile
from evidently.profile_sections import ProbClassificationPerformanceProfileSection

In [2]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## Iris Data

In [3]:
iris = datasets.load_iris()

In [4]:
iris_frame = pd.DataFrame(iris.data, columns = iris.feature_names)

## Probabilistic Model Performance Dashboard

In [5]:
reference, production, y_train, y_test = model_selection.train_test_split(iris_frame, 
                                                                          iris.target, 
                                                                          random_state=111)

In [6]:
model = linear_model.LogisticRegression()
#model = neighbors.KNeighborsClassifier(n_neighbors=1)

In [7]:
model.fit(reference, y_train)

LogisticRegression()

In [8]:
train_probas = pd.DataFrame(model.predict_proba(reference))
train_probas.columns = iris.target_names
test_probas = pd.DataFrame(model.predict_proba(production))
test_probas.columns = iris.target_names

In [9]:
reference['target'] = [iris.target_names[x] for x in y_train]
production['target'] = [iris.target_names[x] for x in y_test]

In [10]:
reference.reset_index(inplace=True, drop=True)
reference['result'] = [iris.target_names[x] for x in y_train]
merged_reference = pd.concat([reference, train_probas], axis = 1)

production.reset_index(inplace=True, drop=True)
production['result'] = [iris.target_names[x] for x in y_test]
merged_production = pd.concat([production, test_probas], axis = 1)

In [11]:
iris_column_mapping = ColumnMapping()

iris_column_mapping.target = 'target'
iris_column_mapping.prediction = iris.target_names.tolist()
iris_column_mapping.numerical_features = iris.feature_names

In [12]:
iris_prob_classification_dashboard = Dashboard(tabs=[ProbClassificationPerformanceTab()])
iris_prob_classification_dashboard.calculate(merged_reference, merged_production, 
                                             column_mapping = iris_column_mapping)
iris_prob_classification_dashboard.show()

In [13]:
#iris_prob_classification_dashboard.save('iris_prob_classification_performance.html')

## Probabilistic Model Performance Profile

In [14]:
iris_prob_classification_profile = Profile(sections=[ProbClassificationPerformanceProfileSection()])
iris_prob_classification_profile.calculate(merged_reference, merged_production, 
                                           column_mapping = iris_column_mapping)

In [15]:
iris_prob_classification_profile.json() 

'{"probabilistic_classification_performance": {"name": "probabilistic_classification_performance", "datetime": "2021-07-15 20:32:52.164870", "data": {"utility_columns": {"date": null, "id": null, "target": "target", "prediction": ["setosa", "versicolor", "virginica"]}, "cat_feature_names": [], "num_feature_names": ["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"], "target_names": null, "metrics": {"reference": {"accuracy": 0.9821428571428571, "precision": 0.9821428571428572, "recall": 0.9821428571428572, "f1": 0.9821428571428572, "roc_auc": 0.9992817202638632, "log_loss": 0.12463753144048624, "metrics_matrix": {"setosa": {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 37}, "versicolor": {"precision": 0.975, "recall": 0.975, "f1-score": 0.975, "support": 40}, "virginica": {"precision": 0.9714285714285714, "recall": 0.9714285714285714, "f1-score": 0.9714285714285714, "support": 35}, "accuracy": 0.9821428571428571, "macro avg": {"precision": 