# Solutions to the workshop exercises

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd

from evidently import ColumnMapping
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset, DataQualityPreset, TargetDriftPreset, RegressionPreset
from evidently.test_suite import TestSuite
from evidently.test_preset import NoTargetPerformanceTestPreset, DataQualityTestPreset, DataStabilityTestPreset, DataDriftTestPreset, RegressionTestPreset

from pathlib import Path

from sklearn import datasets, ensemble, model_selection

### Regression model and California housing dataset

In [None]:
housing_data = datasets.fetch_california_housing(as_frame="auto")
housing = housing_data.frame

housing.rename(columns={"MedHouseVal": "target"}, inplace=True)
numerical_features_reg = [
    "MedInc",
    "HouseAge",
    "AveRooms",
    "AveBedrms",
    "Population",
    "AveOccup",
    "Latitude",
    "Longitude",
]
categorical_features_reg = []
features_reg = numerical_features_reg

housing_ref, housing_cur = model_selection.train_test_split(housing, test_size=0.3)

housing_ref = housing_ref.sample(n=5000, replace=False)
housing_cur = housing_cur.sample(n=1000, replace=False)

reg_model = ensemble.RandomForestRegressor(random_state=42)
reg_model.fit(housing_ref[features_reg], housing_ref.target)

housing_ref["prediction"] = reg_model.predict(housing_ref[features_reg])
housing_cur["prediction"] = reg_model.predict(housing_cur[features_reg])

housing_ref_input_data = housing_ref[housing_ref.columns[~housing_ref.columns.isin(["target", "prediction"])]]
housing_cur_input_data = housing_cur[housing_cur.columns[~housing_cur.columns.isin(["target", "prediction"])]]

## Model performance

### Column mapping

#### Exercise 1: Solution

Map columns for the regression model


In [None]:
reg_column_mapping = ColumnMapping()

reg_column_mapping.numerical_features = numerical_features_reg

reg_column_mapping.target = "target"
reg_column_mapping.prediction = "prediction"

reg_column_mapping.task = "regression"

### Regression Performance report

#### Exercise 2: Solution

Create a Regression Performance report for the regression model, run it, show it and save it as html and json

In [None]:
# create the report and run it
reg_performance_report = Report(metrics=[RegressionPreset()])
reg_performance_report.run(reference_data=housing_ref, current_data=housing_cur, column_mapping=reg_column_mapping)

In [None]:
# show the report
reg_performance_report.show(mode="inline")

In [None]:
# save the report as html and json
reg_performance_report.save_html(Path("reports", "reg_perf_report.html"))
reg_performance_report.save_json(Path("reports", "reg_perf_report.json"))

## Handling the drift

#### Exercise 3: Solution

Create a Data Quality report, Data Drift report, Target Drift report for the regression model, run them, show them and save them as html and json

In [None]:
# create all 3 reports and run them
reg_data_quality_report = Report(metrics=[DataQualityPreset()])
reg_data_quality_report.run(reference_data=housing_ref_input_data, current_data=housing_cur_input_data, column_mapping=reg_column_mapping)

reg_data_drift_report = Report(metrics=[DataDriftPreset()])
reg_data_drift_report.run(reference_data=housing_ref_input_data, current_data=housing_cur_input_data, column_mapping=reg_column_mapping)

reg_target_drift_report = Report(metrics=[TargetDriftPreset()])
reg_target_drift_report.run(reference_data=housing_ref, current_data=housing_cur, column_mapping=reg_column_mapping)

In [None]:
# show the reports
reg_data_quality_report.show(mode="inline")

In [None]:
reg_data_drift_report.show(mode="inline")

In [None]:
reg_target_drift_report.show(mode="inline")

In [None]:
# save the reports as html and json
reg_data_quality_report.save_html(Path("reports", "reg_data_quality_report.html"))
reg_data_quality_report.save_json(Path("reports", "reg_data_quality_report.json"))

reg_data_drift_report.save_html(Path("reports", "reg_data_drift_report.html"))
reg_data_drift_report.save_json(Path("reports", "reg_data_drift_report.json"))

reg_target_drift_report.save_html(Path("reports", "reg_target_drift_report.html"))
reg_target_drift_report.save_json(Path("reports", "reg_target_drift_report.json"))

## Test-based monitoring

### Pre-built test suites and their usage for the regression model
#### Exercise 4: Solution

Create a test suite with No Targert Performance, Data Quality, Data Stability, Data Drift, Regression test presets for the regression model, run it, show it and save it as html and json

In [None]:
# create a test suite and run it
reg_test_suite = TestSuite(tests=[NoTargetPerformanceTestPreset(), DataQualityTestPreset(), DataStabilityTestPreset(), DataDriftTestPreset(), RegressionTestPreset()])
reg_test_suite.run(reference_data=housing_ref, current_data=housing_cur, column_mapping=reg_column_mapping)

In [None]:
# show the test suite
reg_test_suite.show(mode='inline')

In [21]:
# save the test suite as html and json
reg_test_suite.save_html(Path("reports", "reg_test_suite.html"))
reg_test_suite.save_json(Path("reports", "reg_test_suite.json"))