# Data drift with reports with evidently.ai

In [1]:
from evidently import ColumnMapping

from evidently.report import Report
from evidently.metrics.base_metric import generate_column_metrics
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, DataQualityPreset, RegressionPreset
from evidently.metrics import *

from evidently.test_suite import TestSuite
from evidently.tests.base_test import generate_column_tests
from evidently.test_preset import DataStabilityTestPreset, NoTargetPerformanceTestPreset
from evidently.tests import *

import pandas as pd

In [2]:
def get_raw_data():
    df_train = pd.read_csv('data/crop_train.csv')
    df_test = pd.read_csv('data/crop_test.csv')
    experiment_name = '1'
    
    return df_train, df_test

In [3]:
df_train, df_test = get_raw_data()

In [4]:
reference = df_train.copy()
current = df_test.copy()

# Get data drift

In [5]:
report = Report(metrics=[
    DataDriftPreset(), 
])

report.run(reference_data=reference, current_data=current)

In [6]:
report.as_dict()

{'metrics': [{'metric': 'DatasetDriftMetric',
   'result': {'drift_share': 0.5,
    'number_of_columns': 8,
    'number_of_drifted_columns': 0,
    'share_of_drifted_columns': 0.0,
    'dataset_drift': False}},
  {'metric': 'DataDriftTable',
   'result': {'number_of_columns': 8,
    'number_of_drifted_columns': 0,
    'share_of_drifted_columns': 0.0,
    'dataset_drift': False,
    'drift_by_columns': {'K': {'column_name': 'K',
      'column_type': 'num',
      'stattest_name': 'Wasserstein distance (normed)',
      'stattest_threshold': 0.1,
      'drift_score': np.float64(0.0),
      'drift_detected': False,
      'current': {'small_distribution': {'x': [5.0,
         25.0,
         45.0,
         65.0,
         85.0,
         105.0,
         125.0,
         145.0,
         165.0,
         185.0,
         205.0],
        'y': [0.01948863636363636,
         0.014204545454545454,
         0.009573863636363637,
         0.001846590909090909,
         0.0002556818181818182,
         0.0,

In [None]:
report.json()

'{"version": "0.6.7", "metrics": [{"metric": "DatasetDriftMetric", "result": {"drift_share": 0.5, "number_of_columns": 8, "number_of_drifted_columns": 0, "share_of_drifted_columns": 0.0, "dataset_drift": false}}, {"metric": "DataDriftTable", "result": {"number_of_columns": 8, "number_of_drifted_columns": 0, "share_of_drifted_columns": 0.0, "dataset_drift": false, "drift_by_columns": {"K": {"column_name": "K", "column_type": "num", "stattest_name": "Wasserstein distance (normed)", "stattest_threshold": 0.1, "drift_score": 0.0, "drift_detected": false, "current": {"small_distribution": {"x": [5.0, 25.0, 45.0, 65.0, 85.0, 105.0, 125.0, 145.0, 165.0, 185.0, 205.0], "y": [0.01948863636363636, 0.014204545454545454, 0.009573863636363637, 0.001846590909090909, 0.0002556818181818182, 0.0, 0.0, 0.0, 0.0, 0.0046306818181818185]}}, "reference": {"small_distribution": {"x": [5.0, 25.0, 45.0, 65.0, 85.0, 105.0, 125.0, 145.0, 165.0, 185.0, 205.0], "y": [0.01948863636363636, 0.014204545454545454, 0.00

In [8]:
report.save_html('report.html')

# Get tests

In [9]:
tests = TestSuite(tests=[
    TestNumberOfColumnsWithMissingValues(),
    TestNumberOfRowsWithMissingValues(),
    TestNumberOfConstantColumns(),
    TestNumberOfDuplicatedRows(),
    TestNumberOfDuplicatedColumns(),
    TestColumnsType(),
    TestNumberOfDriftedColumns(),
])

tests.run(reference_data=reference, current_data=current)

In [10]:
tests.save_html('tests.html')

In [11]:
suite = TestSuite(tests=[
    NoTargetPerformanceTestPreset(),
])

suite.run(reference_data=reference, current_data=current)

In [12]:
suite.save_html('suite.html')