In [1]:
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer, util

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = pd.read_csv('../../Data/prelim_study.csv')
black = data.query("race == 'black'")
white = data.query("race == 'white'")

## Define Functions

In [3]:
def return_cosines(model_name, list_of_text):
    model = SentenceTransformer(model_name)
    embedding = model.encode(list_of_text)
    cosines = util.cos_sim(embedding, embedding)
    return(cosines[np.triu_indices(len(list_of_text),1)])

## Results using all-mpnet-base-v2

In [4]:
mpnetbase_black = return_cosines('sentence-transformers/all-mpnet-base-v2', list(black.text))
mpnetbase_white = return_cosines('sentence-transformers/all-mpnet-base-v2', list(white.text))

In [5]:
mpnetbase_cosines = np.append(mpnetbase_black.numpy(), mpnetbase_white.numpy())
mpnetbase_model = ['all-mpnet-base-v2'] * len(mpnetbase_cosines)
mpnetbase_race = ['African Americans' for _ in range(len(mpnetbase_black))] + ['White Americans' for _ in range(len(mpnetbase_white))]
mpnetbase_df = pd.DataFrame({'model': mpnetbase_model, 'race': mpnetbase_race, 'cosine': mpnetbase_cosines})
mpnetbase_df.to_csv('../Cosine/mpnetbase.csv', index = False)

## Results using all-distilroberta-v1

In [6]:
distilroberta_black = return_cosines('sentence-transformers/all-distilroberta-v1', list(black.text))
distilroberta_white = return_cosines('sentence-transformers/all-distilroberta-v1', list(white.text))

In [7]:
distilroberta_cosines = np.append(distilroberta_black.numpy(), distilroberta_white.numpy())
distilroberta_model = ['all-distilroberta-v1'] * len(distilroberta_cosines)
distilroberta_race = ['African Americans' for _ in range(len(distilroberta_black))] + ['White Americans' for _ in range(len(distilroberta_white))]
distilroberta_df = pd.DataFrame({'model': distilroberta_model, 'race': distilroberta_race, 'cosine': distilroberta_cosines})
distilroberta_df.to_csv('../Cosine/distilroberta.csv', index = False)

## Results using all-MiniLM-L12-v2

In [8]:
allminilm_black = return_cosines('sentence-transformers/all-MiniLM-L12-v2', list(black.text))
allminilm_white = return_cosines('sentence-transformers/all-MiniLM-L12-v2', list(white.text))

In [9]:
allminilm_cosines = np.append(allminilm_black.numpy(), allminilm_white.numpy())
allminilm_model = ['all-MiniLM-L12-v2'] * len(allminilm_cosines)
allminilm_race = ['African Americans' for _ in range(len(allminilm_black))] + ['White Americans' for _ in range(len(allminilm_white))]
allminilm_df = pd.DataFrame({'model': allminilm_model, 'race': allminilm_race, 'cosine': allminilm_cosines})
allminilm_df.to_csv('../Cosine/allminilm.csv', index = False)