In [ ]:
!pip install scikit-learn pandas numpy plotly

In [43]:
import pandas as pd
import re

df = pd.read_pickle('perplexity_results.pkl')
df=df.drop(columns=['time','prompt'])

def extract_value(json_string:str, key: str):
    json_string = json_string.replace("'", "\"").replace("\_", "_")
    pattern = f'{key}"?: ?("?[^"]*?"?)(?:,|\n)'
    match = re.search(pattern, json_string)
    return match.group(1).strip() if match else None

keys = [
    "relevant_facts_from_the_cultural_context",
"relevant_word_definitions",
"contained_explicit_racist_vocabulary",
"contained_explicit_sexist_vocabulary",
"contained_explicit_toxic_vocabulary",
"argument_for_hate_speech",
"argument_against_hate_speech",
"likelihood_of_presence_of_implicit_hate_speech",
"likelihood_of_presence_of_explicit_hate_speech",
"likelihood_of_ad_hominem_attack",
"likelihood_of_minority_attack",
"likelihood_of_takedown_on_social_media",
"hate_speech_probability",
]

for key in keys:
    df[key] = df['answer'].apply(lambda x: extract_value(x, key))

In [44]:
def maybe_float(i:str):
    try:
        return float(i)
    except Exception:
        return None
float_keys=[
"likelihood_of_presence_of_implicit_hate_speech",
"likelihood_of_presence_of_explicit_hate_speech",
"likelihood_of_ad_hominem_attack",
"likelihood_of_minority_attack",
"likelihood_of_takedown_on_social_media",
"hate_speech_probability",]
for key in float_keys:
    df[key] = df[key].apply(maybe_float)

# Check for missing values
missing_values = df.isnull().sum()
print("Missing values for each column:")
print(missing_values)

# Check for duplicated rows
duplicated_rows = df.duplicated().sum()
print(f"\nNumber of duplicated rows: {duplicated_rows}")

data_types = df.dtypes

Missing values for each column:
model                                                0
text                                                 0
answer                                               0
truth                                                0
relevant_facts_from_the_cultural_context            23
relevant_word_definitions                          334
contained_explicit_racist_vocabulary               484
contained_explicit_sexist_vocabulary               947
contained_explicit_toxic_vocabulary               1238
argument_for_hate_speech                           806
argument_against_hate_speech                       672
likelihood_of_presence_of_implicit_hate_speech      42
likelihood_of_presence_of_explicit_hate_speech      41
likelihood_of_ad_hominem_attack                   2867
likelihood_of_minority_attack                       58
likelihood_of_takedown_on_social_media              69
hate_speech_probability                            114
dtype: int64

Number of duplicate

In [47]:
import numpy as np
import plotly.express as px
df["bin"] = pd.cut(df['hate_speech_probability']*100, bins=np.arange(0, 105, 5),include_lowest=True)
df['bin'] = df["bin"].apply(lambda bin: str(bin))

df_grouped = df.groupby(["bin", "model", "truth"]).count().reset_index()
fig = px.box(df_grouped, x="bin", y="text", color="truth", hover_name="model")
fig.show()





In [49]:
import numpy as np
import plotly.express as px
df["bin"] = pd.cut(df['hate_speech_probability']*100, bins=np.arange(0, 110, 10),include_lowest=True)
df['bin'] = df["bin"].apply(lambda bin: str(bin))

true_df = df.query("not truth")
df_grouped = true_df.groupby(["bin", "model", "truth"]).count().reset_index()
fig = px.scatter(df_grouped, x="bin", y="text", color="model", hover_name="truth", title="Hate Speech Probability (only non-hatefull, our probability should be 0)")
fig.show()





In [50]:
import numpy as np
import plotly.express as px
df["bin"] = pd.cut(df['hate_speech_probability']*100, bins=np.arange(0, 110, 10),include_lowest=True)
df['bin'] = df["bin"].apply(lambda bin: str(bin))

true_df = df.query("truth")
df_grouped = true_df.groupby(["bin", "model"]).count().reset_index()
fig = px.scatter(df_grouped, x="bin", y="text", color="model", hover_name="truth", title="Hate Speech Probability (only hatefull, our probability should be 1)")
fig.show()





In [29]:
from sklearn.metrics import roc_auc_score, log_loss, brier_score_loss

for model in df["model"].unique():
    # Assuming y_true is your array of true labels and y_pred is your array of predicted probabilities
    no_nans = df[df["hate_speech_probability"].notna()]
    no_nans = no_nans[no_nans["model"]==model]
    y_true = no_nans['truth']
    y_pred = no_nans['hate_speech_probability']
    
    roc_auc = roc_auc_score(y_true, y_pred)
    logloss = log_loss(y_true, y_pred)
    brier_score = brier_score_loss(y_true, y_pred)
    
    print(f" {model} ".center(40,'-'))
    print(f"ROC AUC Score: {roc_auc}")
    print(f"Log Loss:      {logloss}")
    print(f"Brier Score:   {brier_score}")

ValueError: could not convert string to float: '0.8.'

In [35]:
df

Unnamed: 0,model,text,answer,truth,relevant_facts,contained_explicit_hate_speech_vocabular,argument_for_hate_speech,argument_against_hate_speech,likelihood_of_presence_of_implicit_hate_speech,likelihood_of_presence_of_explicit_hate_speech,likelihood_of_ad_hominin_attack,likelihood_of_minority_attack,likelihood_of_takedown_on_social_media,hate_speech_probability
0,pplx-7b-chat,I 've always considered teaching as one of the...,"{\n ""relevant_facts_from_the_cultural_context...",True,,,"""The statement expresses a powerful negative e...",,0.3,0,0.2,0.1,0.5,0.2
1,pplx-7b-chat,And the sad thing is the white students at tho...,"{\n ""relevant_facts_from_the_cultural_context...",True,,,"""The statement implies that white students wil...","""The statement is an observation",0.5,0.2,0.1,0,0.5,0.3
2,pplx-7b-chat,This whole situation starts to become really b...,"{\n ""relevant_facts_from_the_cultural_context...",False,,,"""The situation mentioned is not explicit in an...","""There is no explicit hate speech in the given...",0.3,0,0,0,0,0
3,pplx-7b-chat,That's all I needed to hear .,"```json\n{\n ""relevant_facts_from_the_cultura...",False,,,"""The sentence contains derogatory language tow...","""The sentence may be protected under the First...",0.5,0.6,0.1,1,0.8,0.7
4,pplx-7b-chat,Excellent Article !! * * * * * Why Were n't Th...,"{\n ""relevant_facts_from_the_cultural_context...",False,,,"""The question suggests that there is a perceiv...","""The sentence is a question and does not conta...",0.1,0,0,0,0.1,0.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2863,mixtral-8x7b-instruct,I have one word for you ....... MOVE . We chos...,"{\n ""relevant_facts_from_the_cultural_context...",False,,,"""The statement explicitly expresses a preferen...","""The statement does not contain any overt thre...",0.5,0.3,0.1,0.4,0.5,0.4
2864,mixtral-8x7b-instruct,There are only eight people living within a mi...,"{\n ""relevant_facts_from_the_cultural_context...",False,,,"""This statement does not contain hate speech""","""""",0,0,0,0,0,0.
2865,mixtral-8x7b-instruct,you just say blacks hate the cold. the only re...,"{\n ""relevant_facts_from_the_cultural_context...",True,,,"""The statement suggests that Black people are ...",,0.8,0.7,0.0,1.0,0.8,0.8
2866,mixtral-8x7b-instruct,"And all those holes below 40,000 feet are fill...","{\n ""relevant_facts_from_the_cultural_context...",False,,,"""The statement does not contain any explicit h...","""The statement does not target a specific ethn...",0,0,0,0,0,0
