# Debugging with Evidently Test Suites and Reports

In [1]:
import datetime
import pandas as pd

from evidently import ColumnMapping
from evidently.report import Report
# there are other resets e.g. classification performance, regression performance, data quality
# addressing different parts of data pipeline
from evidently.metric_preset import DataDriftPreset

# test suites contain a lot of different tests to help you figure out whether the metric values are as expected
from evidently.test_suite import TestSuite
from evidently.test_preset import DataDriftTestPreset

from joblib import dump, load

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

# Load data and model

In [2]:
ref_data = pd.read_parquet('data/reference.parquet')

In [3]:
current_data = pd.read_parquet('data/green_tripdata_2022-02.parquet')

In [4]:
with open('models/lin_reg.bin', 'rb') as f_in:
    model = load(f_in)

In [5]:
# data labeling
target = "duration_min"
num_features = ["passenger_count", "trip_distance", "fare_amount", "total_amount"]
cat_features = ["PULocationID", "DOLocationID"]

In [6]:
# select problematic part of current dataset
problematic_data = current_data.loc[(current_data.lpep_pickup_datetime >= datetime.datetime(2022,2,2,0,0)) & 
                               (current_data.lpep_pickup_datetime < datetime.datetime(2022,2,3,0,0))]

## Generate test suite and report

interested in prediction drift

In [7]:
column_mapping = ColumnMapping(
    prediction='prediction',
    numerical_features=num_features,
    categorical_features=cat_features,
    target=None
)

In [8]:
problematic_data['prediction'] = model.predict(problematic_data[num_features + cat_features].fillna(0))

In [12]:
print(dir(DataDriftPreset))

['Config', '__abstractmethods__', '__annotations__', '__class__', '__class_vars__', '__config__', '__custom_root_type__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__exclude_fields__', '__fields__', '__fields_set__', '__format__', '__ge__', '__get_classpath__', '__get_type__', '__get_validators__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__include_fields__', '__init__', '__init_subclass__', '__init_values__', '__is_base_type__', '__iter__', '__json_encoder__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__post_root_validators__', '__pre_root_validators__', '__pretty__', '__private_attributes__', '__reduce__', '__reduce_ex__', '__repr__', '__repr_args__', '__repr_name__', '__repr_str__', '__rich_repr__', '__schema_cache__', '__setattr__', '__setstate__', '__signature__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__subtypes__', '__try_update_forward_refs__', '__validators__', '_abc_impl', '_calculate_keys', '_copy_and_set_

In [11]:
test_suite = TestSuite(tests=[DataDriftPreset(),])
test_suite.run(current_data=problematic_data,reference_data=ref_data,column_mapping=column_mapping)

AttributeError: 'DataDriftPreset' object has no attribute 'set_context'