In [1]:
import sys 
sys.path.append("..")

In [2]:
from src.datasets import HateXplainRace

In [3]:
import faiss

In [4]:
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
def filter_demographics(
        demographics, overall_demographics
    ) -> str:

        set_of_demographics = set(demographics)

        intersection = set_of_demographics.intersection(overall_demographics)

        if len(intersection) == 0:
            return ""

        else:
            return list(intersection)[0]


In [6]:
embedding = SentenceTransformer('all-mpnet-base-v2')

In [7]:
hate = HateXplainRace("../data/HateXplain/")

In [8]:
train_df, test_df, overall_demographics = hate.create_prompts()

In [9]:
train_df["filtered_demographics"] = train_df["demographics"].apply(
    lambda x: filter_demographics(x, set(hate.demographics))
)

test_df["filtered_demographics"] = test_df["demographics"].apply(
    lambda x: filter_demographics(x, set(hate.demographics))
)

filtered_train_df = train_df[train_df.filtered_demographics != ""].copy()

test_df = test_df[test_df.filtered_demographics != ""]

In [10]:
train_vectors = embedding.encode(train_df['prompts'].tolist(), batch_size = 32, show_progress_bar=True)
test_vectors = embedding.encode(test_df['prompts'].tolist(), batch_size = 32, show_progress_bar=True)

Batches: 100%|██████████| 221/221 [00:06<00:00, 36.52it/s]
Batches: 100%|██████████| 22/22 [00:00<00:00, 41.13it/s]


In [11]:
faiss.normalize_L2(train_vectors)

faiss.normalize_L2(test_vectors)

In [12]:
vector_dim = train_vectors.shape[1]

index = faiss.IndexFlatIP(vector_dim)

index.add(train_vectors)

distances, neighbors = index.search(test_vectors, 5)

In [13]:
pre_computed_inclusions = dict()

for demographic in set(hate.demographics):
    pre_computed_inclusions[demographic] = filtered_train_df[filtered_train_df.filtered_demographics == demographic]

In [14]:
within_samples = []
for row in test_df.itertuples():
    filtered_df = pre_computed_inclusions[row.filtered_demographics]

    within_sample = filtered_df["prompts"].sample(n=5).index.tolist()

    within_samples.append(within_sample)

In [15]:
import numpy as np

In [16]:
average_cosine_differences_between_groups = []
for i in range(len(within_samples)):

    within_vectors = train_vectors[within_samples[i]]
    sim_vectors = train_vectors[neighbors[i]]

    average_cosine_differences_between_groups.append((within_vectors @ sim_vectors.T).mean())

In [17]:
np.array(average_cosine_differences_between_groups).mean()

0.7786888