In [None]:
%load_ext autoreload
%autoreload 2

import checklist
import pickle
from checklist.test_suite import TestSuite
from suite_utils import get_test_info
import pandas as pd

In [None]:
try:
    suite_path = './data/release_data/sentiment/sentiment_suite.pkl'
    suite = TestSuite.from_file(suite_path)
except FileNotFoundError:
    print("Downloading CheckList release files...")
    ! wget -P "./data" https://github.com/marcotcr/checklist/raw/master/release_data.tar.gz
    ! tar xvzf data/release_data.tar.gz -C ./data
    suite_path = './data/release_data/sentiment/sentiment_suite.pkl'
    suite = TestSuite.from_file(suite_path)

## Sentiment

In [None]:
pred_path = './data/release_data/sentiment/predictions/bert'

In [None]:
suite.run_from_file(pred_path, overwrite=True)

In [None]:
suite.visual_summary_table()

In [None]:
def suite_to_df(suite):
    suite_df = pd.DataFrame()
    for test_name, test in suite.tests.items():
        df = pd.DataFrame()
        test_cases, test_ids = test.example_list_and_indices()
        df["test_case"] = test_cases
        df["test_id"] = test_ids
        if test.labels is not None and not isinstance(test.labels, int) and not isinstance(test.labels[0], (int, str)): ## label is list of lists and has to be flattened
            df["label"] = [item for sublist in test.labels for item in sublist]
        else:
            df["label"] = test.labels
        df["capability"] = test.capability
        df["functionality"] = test.name
        df["type"] = test.form_test_info()["type"]
        suite_df = pd.concat([suite_df, df], axis=0, ignore_index=True)
    return suite_df

In [None]:
suite_df = suite_to_df(suite); suite_df

In [None]:
suite_df[suite_df.functionality == "reducers"]

In [None]:
suite_df[suite_df.type == "dir"].groupby(["functionality"]).sample(1)

In [None]:
suite_df.loc[suite_df.functionality == '"used to" should reduce', "direction"] = "not_more_conf"
suite_df.loc[suite_df.functionality == '"used to" should reduce', "slice"] = "not_1"

In [None]:
suite_df.loc[suite_df.functionality == 'add negative phrases', "direction"] = "not_less_0"

In [None]:
suite_df.loc[suite_df.functionality == 'add positive phrases', "direction"] = "not_less_2"

In [None]:
suite_df.loc[suite_df.functionality == 'intensifiers', "direction"] = "not_less_conf"
suite_df.loc[suite_df.functionality == 'intensifiers', "slice"] = "not_1"

In [None]:
suite_df.loc[suite_df.functionality == 'reducers', "direction"] = "not_more_conf"
suite_df.loc[suite_df.functionality == 'reducers', "slice"] = "not_1"

In [None]:
suite_df[suite_df.type == "inv"].groupby(["functionality"]).sample(1)

In [None]:
suite_df[suite_df.type == "mft"].groupby(["functionality"]).sample(1)

In [None]:
suite_df.loc[suite_df.functionality == "simple negations: not negative", "label"] = "not_0"
suite_df.loc[suite_df.functionality == "simple negations: I thought x was negative, but it was not (should be neutral or positive)", "label"] = "not_0"
suite_df.loc[suite_df.functionality == "Hard: Negation of negative with neutral stuff in the middle (should be positive or neutral)", "label"] = "not_0"

In [None]:
suite_df.groupby(["functionality"]).sample(1)

In [None]:
suite_df.dtypes

In [None]:
suite_df.to_csv("data/sa/sa.csv", index=False)

## Quora Question Pair

In [None]:
suite_path = './data/release_data/qqp/qqp_suite.pkl'
suite = TestSuite.from_file(suite_path)

In [None]:
pred_path = './data/release_data/qqp/predictions/bert'

In [None]:
suite.run_from_file(pred_path, overwrite=True, file_format='binary_conf')

In [None]:
suite.visual_summary_table()

In [None]:
get_test_info(suite)

In [None]:
suite_df = suite_to_df(suite); suite_df

In [None]:
suite_df[suite_df.type == "mft"].groupby(["functionality"]).sample(1)

In [None]:
suite_df[suite_df.type == "dir"].groupby(["functionality"]).sample(1)

In [None]:
suite_df[suite_df.type == "inv"].groupby(["functionality"]).sample(1)

In [None]:
suite_df.loc[suite_df.functionality == "(q, paraphrase(q))", "direction"] = "label"
suite_df.loc[suite_df.functionality == "(q, paraphrase(q))", "label"]  = "1"

In [None]:
suite_df.loc[suite_df.functionality == "Change first and last name in one of the questions", "direction"] = "label"
suite_df.loc[suite_df.functionality == "Change first and last name in one of the questions", "label"] = "0"
suite_df.loc[suite_df.functionality == "Change first and last name in one of the questions", "slice"] = "1"

In [None]:
suite_df.loc[suite_df.functionality == "Change first name in one of the questions", "direction"] = "label"
suite_df.loc[suite_df.functionality == "Change first name in one of the questions", "label"] = "0"
suite_df.loc[suite_df.functionality == "Change first name in one of the questions", "slice"] = "1"

In [None]:
suite_df.loc[suite_df.functionality == "Change location in one of the questions", "direction"] = "label"
suite_df.loc[suite_df.functionality == "Change location in one of the questions", "label"] = "0"
suite_df.loc[suite_df.functionality == "Change location in one of the questions", "slice"] = "1"

In [None]:
suite_df.loc[suite_df.functionality == "Change numbers in one of the questions", "direction"] = "label"
suite_df.loc[suite_df.functionality == "Change numbers in one of the questions", "label"] = "0"
suite_df.loc[suite_df.functionality == "Change numbers in one of the questions", "slice"] = "1"

In [None]:
suite_df.loc[suite_df.functionality == "Keep entitites, fill in with gibberish", "direction"] = "label"
suite_df.loc[suite_df.functionality == "Keep entitites, fill in with gibberish", "label"] = "0"

In [None]:
suite_df.loc[suite_df.functionality == "Testing implications", "direction"] = "label"

In [None]:
# original lable incorrect
suite_df.loc[suite_df.functionality == "Order does matter for asymmetric relations", "label"] = "0"

In [None]:
# As implemented, it seem as it is actually a MFT
suite_df.loc[suite_df.functionality == "(question, f(question)) where f(question) replaces synonyms?", "label"] = "1"
suite_df.loc[suite_df.functionality == "(question, f(question)) where f(question) replaces synonyms?", "type"] = "mft"

In [None]:
suite_df.groupby(["functionality"]).sample(1)

In [None]:
suite_df.to_csv("data/qqp/qqp.csv", index=False)

## Machine Comprehension

In [None]:
suite_path = './data/release_data/squad/squad_suite.pkl'
suite = TestSuite.from_file(suite_path)

In [None]:
pred_path = './data/release_data/squad/predictions/bert'

In [None]:
suite.run_from_file(pred_path, overwrite=True, file_format='pred_only')

In [None]:
suite.visual_summary_table()

In [None]:
get_test_info(suite)

In [None]:
suite_df = suite_to_df(suite); suite_df

In [None]:
suite_df[suite_df.type == "mft"].groupby(["functionality"]).sample(1)

In [None]:
suite_df[suite_df.type == "inv"].groupby(["functionality"]).sample(1)

In [None]:
suite_df.groupby(["functionality"]).sample(1)

In [None]:
suite_df.to_csv("data/squad/squad.csv", index=False)