In [2]:
import pandas as pd
import re

df = pd.read_pickle('mistral_results.pkl')

def extract_value(json_string, key):
    pattern = f'"{key}": (.*?)(?:,|\n)'
    match = re.search(pattern, json_string)
    return match.group(1).strip() if match else None

keys = [
    "relevant_facts",
    "contained_explicit_hate_speech_vocabular",
    "argument_for_hate_speech",
    "argument_against_hate_speech",
    "likelihood_of_presence_of_implicit_hate_speech",
    "likelihood_of_presence_of_explicit_hate_speech",
    "likelihood_of_ad_hominem_attack",
    "likelihood_of_minority_attack",
    "likelyhood_of_takedown_on_social_media",
    "hate_speech_probability"
]

for key in keys:
    df[key] = df['answer'].apply(lambda x: extract_value(x, key))

In [10]:
def maybe_float(i:str):
    try:
        return float(i)
    except Exception:
        return None
for key in ["likelihood_of_presence_of_implicit_hate_speech","likelihood_of_presence_of_explicit_hate_speech","likelihood_of_ad_hominem_attack","likelihood_of_minority_attack","likelyhood_of_takedown_on_social_media", "hate_speech_probability"]:
    df[key] = df[key].apply(maybe_float)

# Check for missing values
missing_values = df.isnull().sum()
print("Missing values for each column:")
print(missing_values)

# Check for duplicated rows
duplicated_rows = df.duplicated().sum()
print(f"\nNumber of duplicated rows: {duplicated_rows}")

data_types = df.dtypes

Missing values for each column:
model                                              0
time                                               0
promt_hash                                         0
prompt                                             0
answer                                             0
truth                                              0
relevant_facts                                     3
contained_explicit_hate_speech_vocabular          10
argument_for_hate_speech                          24
argument_against_hate_speech                      53
likelihood_of_presence_of_implicit_hate_speech    10
likelihood_of_presence_of_explicit_hate_speech    18
likelihood_of_ad_hominem_attack                    6
likelihood_of_minority_attack                      3
likelyhood_of_takedown_on_social_media            15
hate_speech_probability                           13
dtype: int64

Number of duplicated rows: 0


In [27]:
import numpy as np
import plotly.express as px
df["bin"] = pd.cut(df['hate_speech_probability']*100, bins=np.arange(0, 105, 5),include_lowest=True)
df['bin'] = df["bin"].apply(lambda bin: str(bin))

df_grouped = df.groupby(["bin", "truth"]).count().reset_index()
fig = px.box(df_grouped, x="bin", y="time", color="truth")
fig.show()





In [36]:
!pip install scikit-learn
from sklearn.metrics import roc_auc_score, log_loss, brier_score_loss

# Assuming y_true is your array of true labels and y_pred is your array of predicted probabilities
no_nans = df[df["hate_speech_probability"].notna()]
y_true = no_nans['truth']
y_pred = no_nans['hate_speech_probability']

roc_auc = roc_auc_score(y_true, y_pred)
logloss = log_loss(y_true, y_pred)
brier_score = brier_score_loss(y_true, y_pred)

print(f"ROC AUC Score: {roc_auc}")
print(f"Log Loss: {logloss}")
print(f"Brier Score: {brier_score}")

ROC AUC Score: 0.8702778702778704
Log Loss: 1.4708352350980267
Brier Score: 0.18307827956989248
