In [4]:
import os
import sys

import pandas as pd
import sentence_transformers
from sklearn.metrics.pairwise import cosine_similarity
from tqdm.notebook import tqdm

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from data_input.load_data import load_corpus, load_queries

In [5]:
model = sentence_transformers.SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')

Loading corpus, embeddings for the arguments

In [7]:
corpus = load_corpus("../../data")
corpus_embeddings = model.encode(corpus["argument"].values, show_progress_bar=True)

Batches:   0%|          | 0/1013 [00:00<?, ?it/s]

# Results Vanilla SBERT
Essentially, this is the baseline the organizers provide, but for reference, we test this on the perspective scenario, too

In [9]:
for split in ["train", "dev"]:
    for scenario in ["baseline", "perspective"]:
        queries = load_queries("../../data", scenario, split)
        queries_embeddings = model.encode(queries["text"].values, show_progress_bar=True)
        similarities = cosine_similarity(queries_embeddings, corpus_embeddings)

        predictions = [
            {
                "query_id": queries.iloc[i]["query_id"],
                "relevant_candidates": [
                    corpus.iloc[candidate_index]["argument_id"]
                    for candidate_index in candidates.argsort()[::-1][:1000]
                ]
            }
            for i, candidates in enumerate(similarities)
        ]
        pd.DataFrame(predictions).to_json(f"sbert-{split}-{scenario}-predictions.jsonl", orient="records", lines=True)
        print(f"Results {split} {scenario}:")
        os.system(f"python3 ../evaluation/scripts/evaluation.py --data ../../data --scenario {scenario} --split {split} --predictions  sbert-{split}-{scenario}-predictions.jsonl --output_dir ../../results/sbert --diversity True")

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Results train baseline:


100%|██████████| 105/105 [00:00<00:00, 5014.60it/s]


+----+-----+----------+---------------+
|    |   k |   ndcg@k |   precision@k |
|----+-----+----------+---------------|
|  0 |   4 | 0.957958 |      0.957143 |
|  1 |   8 | 0.953623 |      0.95119  |
|  2 |  16 | 0.935205 |      0.925595 |
|  3 |  20 | 0.92902  |      0.918095 |
+----+-----+----------+---------------+


100%|██████████| 105/105 [00:05<00:00, 17.97it/s]


+----+-----+----------------+-------------------+
|    |   k |   alpha_ndcg@k |   kl_divergence@k |
|----+-----+----------------+-------------------|
|  0 |   4 |       0.876534 |         0.157353  |
|  1 |   8 |       0.879313 |         0.14083   |
|  2 |  16 |       0.881375 |         0.107892  |
|  3 |  20 |       0.883236 |         0.0988534 |
+----+-----+----------------+-------------------+


Batches:   0%|          | 0/175 [00:00<?, ?it/s]

Results train perspective:


100%|██████████| 5577/5577 [00:00<00:00, 6790.24it/s]


+----+-----+----------+---------------+
|    |   k |   ndcg@k |   precision@k |
|----+-----+----------+---------------|
|  0 |   4 | 0.183197 |      0.182939 |
|  1 |   8 | 0.18268  |      0.18128  |
|  2 |  16 | 0.180829 |      0.17691  |
|  3 |  20 | 0.180404 |      0.17548  |
+----+-----+----------+---------------+


100%|██████████| 5577/5577 [04:55<00:00, 18.84it/s]


+----+-----+----------------+-------------------+
|    |   k |   alpha_ndcg@k |   kl_divergence@k |
|----+-----+----------------+-------------------|
|  0 |   4 |       0.170985 |         0.157367  |
|  1 |   8 |       0.17241  |         0.140879  |
|  2 |  16 |       0.173679 |         0.108009  |
|  3 |  20 |       0.17426  |         0.0989848 |
+----+-----+----------------+-------------------+


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Results dev baseline:


100%|██████████| 30/30 [00:00<00:00, 3593.58it/s]


+----+-----+----------+---------------+
|    |   k |   ndcg@k |   precision@k |
|----+-----+----------+---------------|
|  0 |   4 | 0.96837  |      0.975    |
|  1 |   8 | 0.96466  |      0.966667 |
|  2 |  16 | 0.955535 |      0.952083 |
|  3 |  20 | 0.95034  |      0.945    |
+----+-----+----------+---------------+


100%|██████████| 30/30 [00:01<00:00, 18.64it/s]


+----+-----+----------------+-------------------+
|    |   k |   alpha_ndcg@k |   kl_divergence@k |
|----+-----+----------------+-------------------|
|  0 |   4 |       0.877609 |         0.150392  |
|  1 |   8 |       0.879654 |         0.136491  |
|  2 |  16 |       0.890723 |         0.107088  |
|  3 |  20 |       0.893866 |         0.0995033 |
+----+-----+----------------+-------------------+


Batches:   0%|          | 0/51 [00:00<?, ?it/s]

Results dev perspective:


100%|██████████| 1611/1611 [00:00<00:00, 5728.94it/s]


+----+-----+----------+---------------+
|    |   k |   ndcg@k |   precision@k |
|----+-----+----------+---------------|
|  0 |   4 | 0.180462 |      0.181719 |
|  1 |   8 | 0.180812 |      0.180866 |
|  2 |  16 | 0.180292 |      0.177917 |
|  3 |  20 | 0.179676 |      0.176381 |
+----+-----+----------+---------------+


100%|██████████| 1611/1611 [01:24<00:00, 18.96it/s]


+----+-----+----------------+-------------------+
|    |   k |   alpha_ndcg@k |   kl_divergence@k |
|----+-----+----------------+-------------------|
|  0 |   4 |       0.166714 |         0.150345  |
|  1 |   8 |       0.168643 |         0.136453  |
|  2 |  16 |       0.171306 |         0.107077  |
|  3 |  20 |       0.171819 |         0.0994725 |
+----+-----+----------------+-------------------+


The performance of SBERT here for the perspective sets gives us a baseline for the implicit perspectivism setting.

# Oracle Sampling of candidates from SBERT (explicit perspectivism setting)
Of the candidates we retrieve using SBERT, we only consider those that have the respective sociocultural attribute 

In [10]:
for split in ["train", "dev"]:
    for scenario in ["perspective"]:
        queries = load_queries("../../data", scenario, split)
        queries_embeddings = model.encode(queries["text"].values, show_progress_bar=True)
        similarities = cosine_similarity(queries_embeddings, corpus_embeddings)

        predictions = []
        for i, candidates in tqdm(enumerate(similarities), total=len(queries)):
            relevant_candidates = []
            for candidate_index in candidates.argsort()[::-1][:1000]:
                demographic_property = list(queries.iloc[i]["demographic_property"].keys())[0]
                value = queries.iloc[i]["demographic_property"][demographic_property]
                # Dealing with mismatch between the demographic property names in the queries and the corpus;
                # "age_bin" in the queries is "age" in the corpus, and "important_political_issue" in the queries
                # is "important_political_issues" in the corpus with multiple values.
                if (demographic_property != "age_bin") and (demographic_property != "important_political_issue"):
                    if corpus.iloc[candidate_index][demographic_property] == value:
                        relevant_candidates.append(corpus.iloc[candidate_index]["argument_id"])
                elif demographic_property == "age_bin":
                    if corpus.iloc[candidate_index]["age"] == value:
                        relevant_candidates.append(corpus.iloc[candidate_index]["argument_id"])
                elif demographic_property == "important_political_issue":
                    if value in corpus.iloc[candidate_index]["important_political_issues"]:
                        relevant_candidates.append(corpus.iloc[candidate_index]["argument_id"])
            predictions.append({
                "query_id": queries.iloc[i]["query_id"],
                "relevant_candidates": relevant_candidates
            })
        
        pd.DataFrame(predictions).to_json(f"sbert-oracle-{split}-{scenario}-predictions.jsonl", orient="records", lines=True)
        print(f"Results {split} {scenario}:")
        os.system(f"python3 ../evaluation/scripts/evaluation.py --data ../../data --scenario {scenario} --split {split} --predictions  sbert-oracle-{split}-{scenario}-predictions.jsonl --output_dir ../../results/sbert-oracle --diversity True")

Batches:   0%|          | 0/175 [00:00<?, ?it/s]

  0%|          | 0/5577 [00:00<?, ?it/s]

Results train perspective:


100%|██████████| 5577/5577 [00:00<00:00, 8310.42it/s]


+----+-----+----------+---------------+
|    |   k |   ndcg@k |   precision@k |
|----+-----+----------+---------------|
|  0 |   4 | 0.815208 |      0.788163 |
|  1 |   8 | 0.788282 |      0.732634 |
|  2 |  16 | 0.755618 |      0.672123 |
|  3 |  20 | 0.744415 |      0.653217 |
+----+-----+----------+---------------+


100%|██████████| 5577/5577 [04:49<00:00, 19.24it/s]


+----+-----+----------------+-------------------+
|    |   k |   alpha_ndcg@k |   kl_divergence@k |
|----+-----+----------------+-------------------|
|  0 |   4 |       0.767332 |          0.195655 |
|  1 |   8 |       0.757438 |          0.182934 |
|  2 |  16 |       0.742814 |          0.156376 |
|  3 |  20 |       0.736475 |          0.148968 |
+----+-----+----------------+-------------------+


Batches:   0%|          | 0/51 [00:00<?, ?it/s]

  0%|          | 0/1611 [00:00<?, ?it/s]

Results dev perspective:


100%|██████████| 1611/1611 [00:00<00:00, 2752.23it/s]


+----+-----+----------+---------------+
|    |   k |   ndcg@k |   precision@k |
|----+-----+----------+---------------|
|  0 |   4 | 0.823443 |      0.788382 |
|  1 |   8 | 0.795688 |      0.728695 |
|  2 |  16 | 0.767779 |      0.67078  |
|  3 |  20 | 0.756538 |      0.651607 |
+----+-----+----------+---------------+


100%|██████████| 1611/1611 [01:27<00:00, 18.31it/s]


+----+-----+----------------+-------------------+
|    |   k |   alpha_ndcg@k |   kl_divergence@k |
|----+-----+----------------+-------------------|
|  0 |   4 |       0.774376 |          0.198324 |
|  1 |   8 |       0.764345 |          0.185526 |
|  2 |  16 |       0.753354 |          0.158378 |
|  3 |  20 |       0.747223 |          0.150637 |
+----+-----+----------------+-------------------+
