In [None]:
from jenga.tasks.reviews import VideogameReviewsTask
from jenga.evaluation.schema_stresstest import SchemaStresstest

import tensorflow_data_validation as tfdv

import numpy as np
import pandas as pd

### Instantiate the video game reviews task with a randomly chosen seed

In [None]:
seed = np.random.randint(2**32 - 1)

task = VideogameReviewsTask(seed=seed)

### Create a tfdv schema by first auto-infering it from training data and then adjusting it

In [None]:
train_data_stats = tfdv.generate_statistics_from_dataframe(task.train_data)
schema = tfdv.infer_schema(statistics=train_data_stats)
review_date_feature = tfdv.get_feature(schema, 'review_date')
review_date_feature.distribution_constraints.min_domain_mass = 0.0

In [None]:
schema

### Train the baseline model for the task

In [None]:
model = task.fit_baseline_model(task.train_data, task.train_labels)

### Run a stress test for the schema and the model with 250 randomly chosen corruptions, and mark performance drops of 3% as failures

In [None]:
stress_test = SchemaStresstest()
results = stress_test.run(task, model, schema, num_corruptions=250, performance_threshold=.03)

### Look at the dataframe containing the results

In [None]:
results