In [4]:
from sklearn import datasets
import pandas as pd

from evidently.pipeline.column_mapping import ColumnMapping
from evidently.model_profile import Profile
from evidently.model_profile.sections import (
    ClassificationPerformanceProfileSection ,
    ProbClassificationPerformanceProfileSection
    )
from sklearn import datasets, model_selection, linear_model, neighbors
from sklearn.neighbors import KNeighborsClassifier

from evidently.dashboard import Dashboard
from evidently.dashboard.tabs import (
    DataDriftTab,
    CatTargetDriftTab,
    DataQualityTab,
    ProbClassificationPerformanceTab,
    ClassificationPerformanceTab
)



In [5]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## **Iris Data**


Machine Learning multivariate Public dataset.Total 150 records with 5 attributes.


**Attributes:** Petal Length ,Petal Width ,Sepal Length , Sepal Width ,Class.


**Target Variable :** "Class" - It has 3 classification of 50 records for each , Iris Setosa,Iris Virginia ,Iris Versicolor

In [26]:
iris = datasets.load_iris()
iris_frame = pd.DataFrame(iris.data, columns = iris.feature_names)
iris_frame['target'] = iris.target
train_data, test_data = model_selection.train_test_split(iris_frame, random_state=0)
target = 'target'
prediction = 'prediction'

numerical_features = iris.feature_names
categorical_features = []

features = numerical_features + categorical_features


In [27]:
iris.target_names



array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [8]:
iris_frame.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


### **Data Quality And Data Drift Reports**

In [4]:
iris = datasets.load_iris(as_frame=True)
iris_frame, iris_frame["target"] = iris.data, iris.target

iris_data_drift_report = Dashboard(tabs=[DataDriftTab()])
iris_data_drift_report.calculate(iris_frame[:100], iris_frame[100:], column_mapping = None)
iris_data_drift_report.save("C:/Users/k.yadav/Downloads/newminiconda_vs/Evidently_AI/DataDrift.html")

dashboard_quality = Dashboard(tabs=[DataQualityTab()])
dashboard_quality.calculate(iris_frame[:100], iris_frame[100:], column_mapping= None)
dashboard_quality.save("C:/Users/k.yadav/Downloads/newminiconda_vs/Evidently_AI/DataQuality.html")

### **Target Drift Reports**

In [5]:
iris_data_and_target_drift_report = Dashboard(tabs=[CatTargetDriftTab()])
iris_data_and_target_drift_report.calculate(iris_frame[:100], iris_frame[100:], column_mapping = None)
iris_data_and_target_drift_report.save("C:/Users/k.yadav/Downloads/newminiconda_vs/Evidently_AI/TargetDrift.html")

### **Model Performance Dashboard**

In [8]:
## Training Model

model = neighbors.KNeighborsClassifier(n_neighbors=1) # can change the value to view the results
model.fit(train_data[features], train_data.target)
KNeighborsClassifier(n_neighbors=1)
train_predictions = model.predict(train_data[features])
test_predictions = model.predict(test_data[features])
train_data['prediction'] = train_predictions
test_data['prediction'] = test_predictions

iris_column_mapping = ColumnMapping() ## remove

iris_column_mapping.target = target ##remove
iris_column_mapping.prediction = prediction ## remove
iris_column_mapping.numerical_features = numerical_features 



In [9]:

## Model Performance Dashboard full (verbose_level=1)
iris_model_performance_dashboard = Dashboard(tabs=[ClassificationPerformanceTab(verbose_level=1)])
iris_model_performance_dashboard.calculate(train_data, test_data, column_mapping = iris_column_mapping)
iris_model_performance_dashboard.save("C:/Users/k.yadav/Downloads/newminiconda_vs/Evidently_AI/classificationPerformance_level1.html")

## Model Performance Dashboard short (verbose_level=0)
iris_model_performance_dashboard = Dashboard(tabs=[ClassificationPerformanceTab(verbose_level=0)])
iris_model_performance_dashboard.calculate(train_data, test_data, column_mapping = iris_column_mapping)
iris_model_performance_dashboard.save("C:/Users/k.yadav/Downloads/newminiconda_vs/Evidently_AI/classificationPerformance_level0.html")

In [10]:

#Model Performance Profile
iris_classification_performance_profile = Profile(sections=[ClassificationPerformanceProfileSection()])
iris_classification_performance_profile.calculate(train_data, test_data, column_mapping = iris_column_mapping)
iris_classification_performance_profile.json() 

'{"classification_performance": {"name": "classification_performance", "datetime": "2022-07-21 16:10:00.489513", "data": {"utility_columns": {"date": null, "id": null, "target": "target", "prediction": "prediction"}, "cat_feature_names": [], "num_feature_names": ["petal length (cm)", "petal width (cm)", "sepal length (cm)", "sepal width (cm)"], "datetime_feature_names": [], "target_names": null, "metrics": {"reference": {"accuracy": 1.0, "precision": 1.0, "recall": 1.0, "f1": 1.0, "metrics_matrix": {"0": {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 37}, "1": {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 34}, "2": {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 41}, "accuracy": 1.0, "macro avg": {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 112}, "weighted avg": {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 112}}, "confusion_matrix": {"labels": [0, 1, 2], "values": [[37, 0, 0], [0, 34, 0], [0, 0, 41]]}}, "cur

### **Probabilistic Model Performance Dashboard**

In [11]:
### Probabilistic Model Performance Dashboard

model = linear_model.LogisticRegression()
model.fit(train_data[features], train_data.target)


# make a data frame with predictions for each class

train_probas = pd.DataFrame(model.predict_proba(train_data[features]))
train_probas.columns = iris.target_names
test_probas = pd.DataFrame(model.predict_proba(test_data[features]))
test_probas.columns = iris.target_names

train_probas.head()

# get labels for target: [0, 1, 0, 2] -> ['setosa', 'versicolor', 'setosa', 'virginica']

train_data['target'] = [iris.target_names[x] for x in train_data['target']]
test_data['target'] = [iris.target_names[x] for x in test_data['target']]





In [12]:

# merge train and test data with predictions

train_data.reset_index(inplace=True, drop=True)
test_data.reset_index(inplace=True, drop=True)

merged_train_data = pd.concat([train_data, train_probas], axis=1)
merged_test_data = pd.concat([test_data, test_probas], axis=1)

merged_train_data.head()

iris_column_mapping = ColumnMapping()

iris_column_mapping.target = 'target'
iris_column_mapping.prediction = iris.target_names.tolist()
iris_column_mapping.numerical_features = iris.feature_names



In [13]:
#Probabilistic Model Performance Dashboard full (verbose_level=1)
iris_prob_classification_dashboard = Dashboard(tabs=[ProbClassificationPerformanceTab(verbose_level=1)])
iris_prob_classification_dashboard.calculate(merged_train_data, merged_test_data, 
                                             column_mapping = iris_column_mapping)
#iris_prob_classification_dashboard.show()

iris_prob_classification_dashboard.save("C:/Users/k.yadav/Downloads/newminiconda_vs/Evidently_AI/Probability_classificationPerformance_level1.html")




In [14]:

#Probabilistic Model Performance Dashboard short (verbose_level=0)
iris_prob_classification_dashboard = Dashboard(tabs=[ProbClassificationPerformanceTab(verbose_level=0)])
iris_prob_classification_dashboard.calculate(merged_train_data, merged_test_data, 
                                             column_mapping = iris_column_mapping)
iris_prob_classification_dashboard.show()


In [15]:
#Model Performance Profile
iris_prob_classification_profile = Profile(sections=[ProbClassificationPerformanceProfileSection()])
iris_prob_classification_profile.calculate(merged_train_data, merged_test_data, 
                                           column_mapping = iris_column_mapping)

In [16]:
iris_prob_classification_profile.json()

'{"probabilistic_classification_performance": {"name": "probabilistic_classification_performance", "datetime": "2022-07-21 16:10:23.173213", "data": {"utility_columns": {"date": null, "id": null, "target": "target", "prediction": ["setosa", "versicolor", "virginica"]}, "cat_feature_names": [], "num_feature_names": ["petal length (cm)", "petal width (cm)", "sepal length (cm)", "sepal width (cm)"], "datetime_feature_names": [], "target_names": null, "options": {"conf_interval_n_sigmas": 1, "classification_threshold": 0.5, "cut_quantile": null}, "metrics": {"reference": {"accuracy": 0.9821428571428571, "precision": 0.9844961240310077, "recall": 0.9803921568627452, "f1": 0.981962481962482, "pr_curve": {"setosa": {"pr": [0.33035714285714285, 0.3333333333333333, 0.33636363636363636, 0.3394495412844037, 0.3425925925925926, 0.34579439252336447, 0.3490566037735849, 0.3523809523809524, 0.3557692307692308, 0.3592233009708738, 0.3627450980392157, 0.36633663366336633, 0.37, 0.37373737373737376, 0.3

: 