In [12]:
import os
import sys

import pandas as pd

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from data_input.load_data import load_corpus, load_queries
from evaluation.evaluation import evaluate_predictions
from models.rankers import SentenceTransformerRanker
from models.utils import process_predictions

Note that `Scenario 1: Baseline` is equal to the SBERT baseline for all cycles

# Test Cycle 1

## Scenario 2: Explicit Perspectivism

In [13]:
queries_perspective = load_queries("../../data-release-test-1/", "perspective", "test")
corpus = load_corpus("../../data-release-test-1/")

In [14]:
generated_arguments = pd.read_csv("../../data/produced_arguments/culture_queries_test.csv")
generated_arguments.fillna("", inplace=True) # replace NA with empty string

In [15]:
retrieval_ranker = SentenceTransformerRanker()
retrieval_ranker.fit_trainsform(corpus["argument"].values)

Batches:   0%|          | 0/1013 [00:00<?, ?it/s]

In [5]:
preselected_per_attribute = {}
for attribute in ['gender', 'age',
       'residence', 'civil_status', 'denomination', 'education',
       'political_spectrum']:
    preselected_per_attribute[attribute] = {}
    for key in corpus[attribute].unique():
        preselected_per_attribute[attribute][key] = [i for i in corpus[corpus[attribute] == key].index]
vals = []
for i, row in corpus.iterrows():
    vals += row["important_political_issues"]
vals = list(set(vals))
preselected_per_attribute["important_political_issue"] = {}
for val in vals:
    preselected_per_attribute["important_political_issue"][val] = [i for i, row in corpus.iterrows() if val in row["important_political_issues"]]

In [6]:
pred = []
for i, query in queries_perspective.iterrows():
    attribute = list(query["demographic_property"].keys())[0]
    key = list(query["demographic_property"].values())[0]
    if attribute == "age_bin":
        attribute = "age"
    pred += retrieval_ranker.rank([query["text"]], preselected_ids=preselected_per_attribute[attribute][key], top_k=200)

In [8]:
sbert_basic_prompt_predictions = []
for i, argument in enumerate(generated_arguments["culture_German_pro"].values):
        sbert_basic_prompt_predictions += retrieval_ranker.rank([argument], top_k=200, preselected_ids=pred[i])

In [9]:
predictions = process_predictions(corpus, queries_perspective, sbert_basic_prompt_predictions)

In [11]:
evaluate_predictions(predictions=predictions,
                     data_dir="../../data-release-test-1/",
                     scenario="perspective",
                     split="test",
                     output_dir="../../results/generated_test1_explicit",
                     diversity=True)

100%|██████████| 2358/2358 [00:00<00:00, 7164.02it/s]


+----+-----+----------+---------------+
|    |   k |   ndcg@k |   precision@k |
|----+-----+----------+---------------|
|  0 |   4 | 0.660812 |      0.610051 |
|  1 |   8 | 0.64771  |      0.553965 |
|  2 |  16 | 0.637238 |      0.493268 |
|  3 |  20 | 0.63433  |      0.473585 |
+----+-----+----------+---------------+


100%|██████████| 2358/2358 [01:56<00:00, 20.18it/s]


+----+-----+----------------+-------------------+
|    |   k |   alpha_ndcg@k |   kl_divergence@k |
|----+-----+----------------+-------------------|
|  0 |   4 |       0.618968 |          0.205054 |
|  1 |   8 |       0.618594 |          0.19152  |
|  2 |  16 |       0.618306 |          0.163282 |
|  3 |  20 |       0.617711 |          0.155345 |
+----+-----+----------------+-------------------+


## Scenario 3: Implicit Perspectivism

In [18]:
candidates = retrieval_ranker.rank(queries_perspective["text"].values, top_k=200)
sbert_basic_prompt_predictions = []
for i, argument in enumerate(generated_arguments["culture_German_pro"].values):
        sbert_basic_prompt_predictions += retrieval_ranker.rank([argument], top_k=200, preselected_ids=candidates[i])

In [19]:
predictions = process_predictions(corpus, queries_perspective, sbert_basic_prompt_predictions)
evaluate_predictions(predictions=predictions,
                     data_dir="../../data-release-test-1/",
                     scenario="perspective",
                     split="test",
                     output_dir="../../results/generated_test1_implicit",
                     diversity=True,
                     implicit=True)

100%|██████████| 2358/2358 [00:00<00:00, 3606.84it/s]


+----+-----+----------+---------------+
|    |   k |   ndcg@k |   precision@k |
|----+-----+----------+---------------|
|  0 |   4 | 0.178108 |      0.178541 |
|  1 |   8 | 0.18168  |      0.182199 |
|  2 |  16 | 0.187526 |      0.185008 |
|  3 |  20 | 0.191808 |      0.18785  |
+----+-----+----------+---------------+


100%|██████████| 2358/2358 [02:05<00:00, 18.77it/s]


+----+-----+----------------+-------------------+
|    |   k |   alpha_ndcg@k |   kl_divergence@k |
|----+-----+----------------+-------------------|
|  0 |   4 |       0.164391 |          0.163773 |
|  1 |   8 |       0.169568 |          0.147226 |
|  2 |  16 |       0.177328 |          0.112381 |
|  3 |  20 |       0.18156  |          0.102674 |
+----+-----+----------------+-------------------+


# Test cycle 2

## Scenario 2: Explicit Perspectivism

In [12]:
queries_perspective = load_queries("../../data-release-test-2/", "perspective", "test")
corpus = load_corpus("../../data-release-test-2/")

In [13]:
generated_arguments = pd.read_csv("../../data/produced_arguments/culture_queries_test2.csv")
generated_arguments.fillna("", inplace=True) # replace NA with empty string

In [14]:
retrieval_ranker = SentenceTransformerRanker()
retrieval_ranker.fit_trainsform(corpus["argument"].values)

Batches:   0%|          | 0/1222 [00:00<?, ?it/s]

In [15]:
preselected_per_attribute = {}
for attribute in ['gender', 'age',
       'residence', 'civil_status', 'denomination', 'education',
       'political_spectrum']:
    preselected_per_attribute[attribute] = {}
    for key in corpus[attribute].unique():
        preselected_per_attribute[attribute][key] = [i for i in corpus[corpus[attribute] == key].index]
vals = []
for i, row in corpus.iterrows():
    vals += row["important_political_issues"]
vals = list(set(vals))
preselected_per_attribute["important_political_issue"] = {}
for val in vals:
    preselected_per_attribute["important_political_issue"][val] = [i for i, row in corpus.iterrows() if val in row["important_political_issues"]]

In [16]:
pred = []
for i, query in queries_perspective.iterrows():
    attribute = list(query["demographic_property"].keys())[0]
    key = list(query["demographic_property"].values())[0]
    if attribute == "age_bin":
        attribute = "age"
    pred += retrieval_ranker.rank([query["text"]], preselected_ids=preselected_per_attribute[attribute][key], top_k=200)

In [17]:
sbert_basic_prompt_predictions = []
for i, argument in enumerate(generated_arguments["culture_German_pro"].values):
        sbert_basic_prompt_predictions += retrieval_ranker.rank([argument], top_k=200, preselected_ids=pred[i])

In [18]:
predictions = process_predictions(corpus, queries_perspective, sbert_basic_prompt_predictions)

In [20]:
evaluate_predictions(predictions=predictions,
                     data_dir="../../data-release-test-2/",
                     scenario="perspective",
                     split="test",
                     output_dir="../../results/generated_test2_explicit",
                     diversity=True)

100%|██████████| 1782/1782 [00:00<00:00, 2977.54it/s]


+----+-----+----------+---------------+
|    |   k |   ndcg@k |   precision@k |
|----+-----+----------+---------------|
|  0 |   4 | 0.622549 |      0.548681 |
|  1 |   8 | 0.602224 |      0.472854 |
|  2 |  16 | 0.583915 |      0.394851 |
|  3 |  20 | 0.579499 |      0.368743 |
+----+-----+----------+---------------+


100%|██████████| 1782/1782 [01:58<00:00, 15.06it/s]


+----+-----+----------------+-------------------+
|    |   k |   alpha_ndcg@k |   kl_divergence@k |
|----+-----+----------------+-------------------|
|  0 |   4 |       0.591646 |          0.188017 |
|  1 |   8 |       0.582609 |          0.175575 |
|  2 |  16 |       0.572135 |          0.150025 |
|  3 |  20 |       0.568865 |          0.142816 |
+----+-----+----------------+-------------------+


## Scenario 3: Implicit Perspectivism

In [23]:
candidates = retrieval_ranker.rank(queries_perspective["text"].values, top_k=200)

In [24]:
sbert_basic_prompt_predictions = []
for i, argument in enumerate(generated_arguments["culture_German_pro"].values):
        sbert_basic_prompt_predictions += retrieval_ranker.rank([argument], top_k=200, preselected_ids=candidates[i])

In [25]:
predictions = process_predictions(corpus, queries_perspective, sbert_basic_prompt_predictions)

In [26]:
evaluate_predictions(predictions=predictions,
                     data_dir="../../data-release-test-2/",
                     scenario="perspective",
                     split="test",
                     output_dir="../../results/generated_test2_implicit",
                     diversity=True,
                     implicit=True)

100%|██████████| 1782/1782 [00:00<00:00, 4144.80it/s]


+----+-----+----------+---------------+
|    |   k |   ndcg@k |   precision@k |
|----+-----+----------+---------------|
|  0 |   4 | 0.130036 |      0.129349 |
|  1 |   8 | 0.126899 |      0.123106 |
|  2 |  16 | 0.126043 |      0.117565 |
|  3 |  20 | 0.126924 |      0.116891 |
+----+-----+----------+---------------+


100%|██████████| 1782/1782 [01:49<00:00, 16.21it/s]


+----+-----+----------------+-------------------+
|    |   k |   alpha_ndcg@k |   kl_divergence@k |
|----+-----+----------------+-------------------|
|  0 |   4 |       0.120722 |          0.155261 |
|  1 |   8 |       0.120515 |          0.139217 |
|  2 |  16 |       0.121494 |          0.105921 |
|  3 |  20 |       0.122604 |          0.096631 |
+----+-----+----------------+-------------------+


# Test Cycle 3

## Scenario 2: Explicit Perspectivism

In [29]:
queries_perspective = load_queries("../../data-release-test-3/", "perspective", "test")
corpus = load_corpus("../../data-release-test-3/")

In [30]:
generated_arguments = pd.read_csv("../../data/produced_arguments/culture_queries_test3.csv")
generated_arguments.fillna("", inplace=True) # replace NA with empty string

In [31]:
retrieval_ranker = SentenceTransformerRanker()
retrieval_ranker.fit_trainsform(corpus["argument"].values)

Batches:   0%|          | 0/897 [00:00<?, ?it/s]

In [32]:
preselected_per_attribute = {}
for attribute in ['gender', 'age',
       'residence', 'civil_status', 'denomination', 'education',
       'political_spectrum']:
    preselected_per_attribute[attribute] = {}
    for key in corpus[attribute].unique():
        preselected_per_attribute[attribute][key] = [i for i in corpus[corpus[attribute] == key].index]
vals = []
for i, row in corpus.iterrows():
    vals += row["important_political_issues"]
vals = list(set(vals))
preselected_per_attribute["important_political_issue"] = {}
for val in vals:
    preselected_per_attribute["important_political_issue"][val] = [i for i, row in corpus.iterrows() if val in row["important_political_issues"]]

In [33]:
pred = []
for i, query in queries_perspective.iterrows():
    attribute = list(query["demographic_property"].keys())[0]
    key = list(query["demographic_property"].values())[0]
    if attribute == "age_bin":
        attribute = "age"
    pred += retrieval_ranker.rank([query["text"]], preselected_ids=preselected_per_attribute[attribute][key], top_k=200)

In [34]:
sbert_basic_prompt_predictions = []
for i, argument in enumerate(generated_arguments["culture_German_pro"].values):
        sbert_basic_prompt_predictions += retrieval_ranker.rank([argument], top_k=200, preselected_ids=pred[i])

In [35]:
predictions = process_predictions(corpus, queries_perspective, sbert_basic_prompt_predictions)

In [36]:
evaluate_predictions(predictions=predictions,
                     data_dir="../../data-release-test-3/",
                     scenario="perspective",
                     split="test",
                     output_dir="../../results/generated_test3_explicit",
                     diversity=True)

100%|██████████| 729/729 [00:00<00:00, 7323.37it/s]


+----+-----+----------+---------------+
|    |   k |   ndcg@k |   precision@k |
|----+-----+----------+---------------|
|  0 |   4 | 0.515849 |      0.491084 |
|  1 |   8 | 0.494304 |      0.410151 |
|  2 |  16 | 0.481858 |      0.319616 |
|  3 |  20 | 0.48077  |      0.290192 |
+----+-----+----------+---------------+


100%|██████████| 729/729 [00:33<00:00, 21.64it/s]


+----+-----+----------------+-------------------+
|    |   k |   alpha_ndcg@k |   kl_divergence@k |
|----+-----+----------------+-------------------|
|  0 |   4 |       0.503821 |          0.239436 |
|  1 |   8 |       0.494901 |          0.228403 |
|  2 |  16 |       0.486398 |          0.204929 |
|  3 |  20 |       0.485007 |          0.198093 |
+----+-----+----------------+-------------------+


## Scenario 3: Implicit Perspectivism

In [39]:
candidates = retrieval_ranker.rank(queries_perspective["text"].values, top_k=200)

In [40]:
sbert_basic_prompt_predictions = []
for i, argument in enumerate(generated_arguments["culture_German_pro"].values):
        sbert_basic_prompt_predictions += retrieval_ranker.rank([argument], top_k=200, preselected_ids=candidates[i])

In [41]:
predictions = process_predictions(corpus, queries_perspective, sbert_basic_prompt_predictions)

In [42]:
evaluate_predictions(predictions=predictions,
                     data_dir="../../data-release-test-3/",
                     scenario="perspective",
                     split="test",
                     output_dir="../../results/generated_test3_implicit",
                     diversity=True,
                     implicit=True)

100%|██████████| 729/729 [00:00<00:00, 3561.38it/s]


+----+-----+----------+---------------+
|    |   k |   ndcg@k |   precision@k |
|----+-----+----------+---------------|
|  0 |   4 | 0.303507 |      0.318244 |
|  1 |   8 | 0.336466 |      0.335219 |
|  2 |  16 | 0.368269 |      0.297754 |
|  3 |  20 | 0.385001 |      0.283608 |
+----+-----+----------+---------------+


100%|██████████| 729/729 [00:35<00:00, 20.64it/s]


+----+-----+----------------+-------------------+
|    |   k |   alpha_ndcg@k |   kl_divergence@k |
|----+-----+----------------+-------------------|
|  0 |   4 |       0.287694 |          0.185971 |
|  1 |   8 |       0.322824 |          0.17126  |
|  2 |  16 |       0.35076  |          0.138318 |
|  3 |  20 |       0.36382  |          0.128291 |
+----+-----+----------------+-------------------+
