In [7]:
import pandas as pd
import json, requests, zipfile, io
from evidently.pipeline.column_mapping import ColumnMapping
from evidently.report import Report
from evidently.metrics.base_metric import generate_column_metrics
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset
from evidently.metrics import TextDescriptorsDriftMetric

from evidently.test_suite import TestSuite
from evidently.tests.base_test import generate_column_tests
from evidently.test_preset import DataStabilityTestPreset, NoTargetPerformanceTestPreset
from evidently.tests import *

from evidently.features.text_length_feature import TextLength
from evidently.features.OOV_words_percentage_feature import OOVWordsPercentage

# Prepare legal acts dataset

In [3]:
legal_acts_content = requests.get('https://drive.usercontent.google.com/u/0/uc?id=11xy6a6cbteD2TG9r3CU16ysO50xrqZf2&export=download').content

In [8]:
with zipfile.ZipFile(io.BytesIO(legal_acts_content)) as arc:
    legal_acts_data = pd.read_json(arc.open("data.json"))
legal_acts_data['labels'] = legal_acts_data['labels'].apply(lambda x: ','.join(map(str, x)))
legal_acts_data['year'] = legal_acts_data['id'].str.slice(5, 9).astype(int)

In [4]:
#text_data = load_data("../output-folder/data.json")
data = pd.read_json("../output-folder/data.json")
data

Unnamed: 0,text,id,labels
0,Ratifica ed esecuzione dei seguenti Trattati: ...,ipzs-20210604_21G00088,[A1810]
1,Governance del Piano nazionale di ripresa e re...,ipzs-20210531_21G00087,[A1490]
2,"Conversione in legge, con modificazioni, del d...",ipzs-20210531_21G00086,[A6040]
3,Ratifica ed esecuzione dell'Accordo di coopera...,ipzs-20210526_21G00082,[A1810]
4,Ratifica ed esecuzione dell'Accordo tra il Gov...,ipzs-20210525_21G00083,[A1810]
...,...,...,...
130,Regolamento di organizzazione del Ministero pe...,ipzs-20200121_20G00006,[A0330]
131,Regolamento concernente la banca dati nazional...,ipzs-20200117_20G00005,[A6040]
132,Disposizioni integrative e correttive del decr...,ipzs-20200109_20G00003,[A1490]
133,Regolamento recante valutazione del personale ...,ipzs-20200103_19G00167,[A6040]


In [15]:
reference = legal_acts_data[legal_acts_data['year'] < 2000]
current = legal_acts_data[legal_acts_data['year'] > 2000]

# prepare data and map schema
column_mapping_legal = ColumnMapping()
column_mapping_legal.target = "labels"
#column_mapping.predictions = "predicted_labels"
column_mapping_legal.text_features = ['text']
column_mapping_legal.categorical_features = []
column_mapping_legal.numerical_features = []

In [16]:
# report presets
report = Report(metrics=[
    DataDriftPreset(),
    #TextDescriptorsDriftMetric(column_name="text")
    ])
report.run(reference_data=reference, current_data=current, column_mapping=column_mapping_legal)
report.save_html("reports/data_drift.html")