In [2]:
import pandas as pd
import numpy as np

from sklearn.datasets import fetch_california_housing

from evidently import ColumnMapping

from evidently.report import Report
from evidently.metrics.base_metric import generate_column_metrics
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, DataQualityPreset, RegressionPreset
from evidently.metrics import *

from evidently.test_suite import TestSuite
from evidently.tests.base_test import generate_column_tests
from evidently.test_preset import DataStabilityTestPreset, NoTargetPerformanceTestPreset, RegressionTestPreset
from evidently.tests import *

In [3]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [4]:
# load data

file1 = "../../data/processed/reference_data.csv" 
file2 = "../../data/processed/current_data.csv" 

reference = pd.read_csv(file1)
current = pd.read_csv(file2)

In [5]:
reference

Unnamed: 0,datetime,temps,no2,pm2.5,o3,pm10
0,2023-03-17-00:00:00,3.6,3.0,4.0,66.677136,5.0
1,2023-03-17-01:00:00,3.5,3.0,3.0,66.677136,4.0
2,2023-03-17-02:00:00,3.1,8.0,2.0,66.677136,3.0
3,2023-03-17-03:00:00,2.9,5.0,3.0,66.677136,4.0
4,2023-03-17-04:00:00,2.5,2.0,3.0,66.677136,4.0
...,...,...,...,...,...,...
187,2023-03-24-19:00:00,14.0,15.0,15.0,92.000000,18.0
188,2023-03-24-20:00:00,13.0,19.0,25.0,87.000000,29.0
189,2023-03-24-21:00:00,13.0,34.0,26.0,70.000000,30.0
190,2023-03-24-22:00:00,13.8,35.0,22.0,60.000000,27.0


In [6]:
current

Unnamed: 0,datetime,temps,no2,pm2.5,o3,pm10
0,2023-03-18-00:00:00,4.1,8.0,11.0,100.0,13.0
1,2023-03-18-01:00:00,4.2,12.0,10.0,91.0,12.0
2,2023-03-18-02:00:00,3.4,9.0,10.0,90.0,12.0
3,2023-03-18-03:00:00,2.9,9.0,11.0,88.0,12.0
4,2023-03-18-04:00:00,3.3,6.0,10.0,96.0,12.0
...,...,...,...,...,...,...
187,2023-03-25-19:00:00,11.9,21.0,6.0,60.0,8.0
188,2023-03-25-20:00:00,11.3,14.0,6.0,67.0,8.0
189,2023-03-25-21:00:00,10.4,8.0,5.0,76.0,6.0
190,2023-03-25-22:00:00,9.3,9.0,3.0,76.0,5.0


In [7]:
report = Report(metrics=[
    DataDriftPreset(), 
])

report.run(reference_data=reference, current_data=current)
report

In [14]:
report.as_dict()
report.json()
report.save_html('../../reports/report.html')
report.save_json('../../reports/report.json')

In [8]:
tests = TestSuite(tests=[
    TestNumberOfColumnsWithMissingValues(),
    TestNumberOfRowsWithMissingValues(),
    TestNumberOfConstantColumns(),
    TestNumberOfDuplicatedRows(),
    TestNumberOfDuplicatedColumns(),
    TestColumnsType(),
    TestNumberOfDriftedColumns(),
])

tests.run(reference_data=reference, current_data=current)
tests

In [15]:

tests.as_dict()
tests.json()
tests.save_html('../../reports/tests.html')
tests.save_json('../../reports/tests.json')

In [9]:
suite = TestSuite(tests=[
    TestNumberOfColumnsWithMissingValues(),
    TestNumberOfRowsWithMissingValues(),
    TestNumberOfConstantColumns(),
    TestNumberOfDuplicatedRows(),
    TestNumberOfDuplicatedColumns(),
    TestColumnsType(),
    TestNumberOfDriftedColumns(),
    TestColumnDrift('Population'),
    TestShareOfOutRangeValues('Population'),
    DataStabilityTestPreset(),
    RegressionTestPreset()
    
])

suite.run(reference_data=reference, current_data=current)
suite

ValueError: Cannot find column 'Population' in current dataset

<evidently.test_suite.test_suite.TestSuite at 0x2cd17f90a30>

In [None]:
suite.as_dict()
suite.json()
suite.save_html('test_suite.html')
suite.save_json('test_suite.json')