# Debugging with Evidently Test Suites and Reports

In [1]:
import pandas as pd
import datetime

from joblib import dump, load
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

from evidently import ColumnMapping
from evidently.report import report
from evidently.metric_preset import DataDriftPreset

from evidently.test_suite import TestSuite
from evidently.test_preset import DataDriftTestPreset

In [2]:
reference_data = pd.read_parquet("data/reference.parquet")
current_data = pd.read_parquet("data/green_tripdata_2022-02.parquet")

In [3]:
with open("models/lin_reg.bin", "rb") as f_in:
    model = load(f_in)

In [4]:
num_features = ["passenger_count", "trip_distance", "fare_amount", "total_amount"]
cat_features = ["PULocationID", "DOLocationID"]
target = "duration_min"

In [6]:
problematic_data = current_data.loc[(current_data.lpep_pickup_datetime >= (datetime.datetime(2022, 2, 2, 0, 9)))
                                    & (current_data.lpep_pickup_datetime < (datetime.datetime(2022, 2, 3, 0, 9)))]

In [8]:
column_mapping = ColumnMapping(
    prediction="prediction",
    numerical_features=num_features,
    categorical_features=cat_features,
    target=None
)

In [11]:
test_suite = TestSuite(tests = [DataDriftTestPreset()])
test_suite.run(reference_data=reference_data,
               current_data=problematic_data,
               column_mapping=column_mapping)

ValueError: Prediction column (prediction) is partially present in data