In [71]:
import pandas as pd
import datetime
from evidently.ui.workspace.cloud import CloudWorkspace
from evidently.report import Report
from evidently.metric_preset import DataQualityPreset

In [72]:
from evidently.ui.workspace.cloud import CloudWorkspace

from evidently.report import Report
from evidently.metric_preset import DataQualityPreset
from evidently.metric_preset import DataDriftPreset
from evidently.metric_preset import ClassificationPreset
from evidently import ColumnMapping
from evidently.metrics import *
from evidently.test_suite import TestSuite
from evidently.tests import *
from evidently.test_preset import DataDriftTestPreset
from evidently.tests.base_test import TestResult, TestStatus

In [73]:
from evidently import metrics
from evidently.ui.dashboards import DashboardPanelPlot
from evidently.ui.dashboards import DashboardPanelTestSuite
from evidently.ui.dashboards import PanelValue
from evidently.ui.dashboards import PlotType
from evidently.ui.dashboards import ReportFilter
from evidently.ui.dashboards import TestFilter
from evidently.ui.dashboards import TestSuitePanelType
from evidently.renderers.html_widgets import WidgetSize

### What is Evidently?
It helps evaluate, test, and monitor data and ML models from validation to production. It works with tabular, text data and embeddings.

### Preparing The Data

In [88]:
ref_df = pd.read_csv('../bank-model-experimentation/bank-full.csv')

In [38]:
ref_df.head()

Unnamed: 0,age,job,martial,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


In [87]:
import json
temp_df = pd.read_csv('bank-model-logs.csv')
request_payload_list = temp_df['request_payload'].apply(lambda x: json.loads(x)).to_list()
response_payload_list = temp_df['response_payload'].apply(lambda x: json.loads(x)).to_list()
records = []
for i, x in enumerate(request_payload_list):
    for j, y in enumerate(x):
        y['y'] = response_payload_list[i][j]
        records.append(y)

# convert list of dict list in pandas dataframe
log_df = pd.DataFrame(records)

In [45]:
ref_df.columns

Index(['age', 'job', 'martial', 'education', 'default', 'balance', 'housing',
       'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays',
       'previous', 'poutcome', 'y'],
      dtype='object')

In [75]:
log_df.columns

Index(['martial', 'loan', 'previous', 'default', 'age', 'education', 'housing',
       'day', 'duration', 'month', 'campaign', 'contact', 'balance',
       'poutcome', 'job', 'pdays', 'y'],
      dtype='object')

In [47]:
print(len(ref_df.columns), len(log_df.columns))

17 17


### Connecting to Evidently Dashboard

In [30]:
ws = CloudWorkspace(token="dG9rbgFQPma4D79ETJJGBZHy//KaGOKVcKr0CeQ5cga3UgSgEQBQlHXYS1wdQ605cZLt/1C1+/3nBW66SUn01qlzk699L17hGuAO/XMwY0IhSHaWQyrfNuDHkVEjqVlBc+0Uk3BrPy/ybPnGoOsVSGbtbV3r492fiHLH", url="https://app.evidently.cloud")
bank_project = ws.get_project("f051d11c-3182-4079-b4ae-1d1d6e8887cb")

### Creating Data Quality and Data Drift Metric Report

In [48]:
def create_report():
    data_report = Report(
        metrics=[
            # Read More about PSI here
            # https://medium.com/model-monitoring-psi/population-stability-index-psi-ab133b0a5d42
            DataDriftPreset(stattest='psi'),
            DataQualityPreset(),
        ],
        timestamp=datetime.datetime.now(),
    )

    data_report.run(reference_data=ref_df, current_data=log_df)
    return data_report

In [49]:
report = create_report()
ws.add_report(bank_project.id, report)

### Evidently Jargons

**Metric** - component that evaluates a specific aspect of the data or model quality(e.g - Number of missing values.). Metrics exist on the dataset level and on the column level.

**Report** - A Report is a combination of different Metrics that evaluate data or ML model quality.

**Metric Preset** - A Metric Preset is a pre-built Report that combines Metrics for a particular use case. (e.g - DataDriftPreset)

**Test** - A Test is a Metric with a condition. It calculates a value and compares it against the defined threshold.

**Test Suite** - You can list multiple Tests and execute them together in a Test Suite.

**Test Preset** - A Test Preset is a pre-built Test Suite that combines checks for a particular use case.


### Classification Report

In [89]:
log_df.rename(columns={'y': 'prediction'}, inplace=True)
ref_df.rename(columns={'y': 'target'}, inplace=True)

In [92]:
# randomly generate yes or no as new target column in log_df
import random
log_df['target'] = log_df['prediction'].apply(lambda x: random.choice(['yes', 'no']))
ref_df['prediction'] = ref_df['target'].apply(lambda x: random.choice(['yes', 'no']))

# convert yes to 1 and no to 0
log_df['target'] = log_df['target'].apply(lambda x: 1 if x == 'yes' else 0)
log_df['prediction'] = log_df['prediction'].apply(lambda x: 1 if x == 'yes' else 0)
ref_df['target'] = ref_df['target'].apply(lambda x: 1 if x == 'yes' else 0)
ref_df['prediction'] = ref_df['prediction'].apply(lambda x: 1 if x == 'yes' else 0)

classification_performance_report = Report(metrics=[
    ClassificationPreset(),
])

classification_performance_report.run(reference_data=ref_df, current_data=log_df)

ws.add_report(bank_project.id, classification_performance_report)