In [39]:
import pandas as pd
import matplotlib.pyplot as plt

In [40]:
df2 = pd.read_csv('data/old_alignment_eval_mistral-small_cn.csv')
df = pd.read_csv('data/old_alignment_eval_mistral-small_en.csv')

In [41]:
import pandas as pd
import ast

CUTOFF = 0.459

def mark_wrong_options(row):
    scores = ast.literal_eval(row["SimilarityScores"])
    
    cutoff = max(scores) - CUTOFF
    
    opts = ["A", "B", "C", "D"]
    values = [row[opt] for opt in opts]
    
    new_values = [
        val if score >= cutoff else "DO NOT PICK THIS OPTION"
        for val, score in zip(values, scores)
    ]
    
    for opt, new_val in zip(opts, new_values):
        row[opt] = new_val
    
    return row

df = df.apply(mark_wrong_options, axis=1)
df2 = df2.apply(mark_wrong_options, axis=1)

df.to_csv("output/new_0.459_alignment_eval_mistral-small_en.csv", index=False)
df2.to_csv("output/new_0.459_alignment_eval_mistral-small_cn.csv", index=False)

# Analyze what we just did

In [43]:
df1_res = pd.read_csv('output/new_0.459_alignment_eval_mistral-small_en.csv')
df2_res = pd.read_csv('output/new_0.459_alignment_eval_mistral-small_cn.csv')

In [44]:
total_wrong = df1_res.isin(["DO NOT PICK THIS OPTION"]).sum().sum()

print(f"Total DO NOT PICK THIS OPTION occurrences: {total_wrong}")

# — if you just want to see the breakdown by column (e.g. A–D), you can do:
col_counts = df1_res[['A','B','C','D']].isin(["DO NOT PICK THIS OPTION"]).sum()
print("Counts per option column:")
print(col_counts)

Total DO NOT PICK THIS OPTION occurrences: 238
Counts per option column:
A    58
B    53
C    52
D    75
dtype: int64


In [45]:
total_wrong = df2_res.isin(["DO NOT PICK THIS OPTION"]).sum().sum()

print(f"Total DO NOT PICK THIS OPTION occurrences: {total_wrong}")

# — if you just want to see the breakdown by column (e.g. A–D), you can do:
col_counts = df2_res[['A','B','C','D']].isin(["DO NOT PICK THIS OPTION"]).sum()
print("Counts per option column:")
print(col_counts)

Total DO NOT PICK THIS OPTION occurrences: 142
Counts per option column:
A    34
B    27
C    31
D    50
dtype: int64


# Let's test a lower cutoff

In [46]:
df2 = pd.read_csv('data/old_alignment_eval_mistral-small_cn.csv')
df = pd.read_csv('data/old_alignment_eval_mistral-small_en.csv')

CUTOFF = 0.2

def mark_wrong_options(row):
    scores = ast.literal_eval(row["SimilarityScores"])
    
    cutoff = max(scores) - CUTOFF
    
    opts = ["A", "B", "C", "D"]
    values = [row[opt] for opt in opts]
    
    new_values = [
        val if score >= cutoff else "DO NOT PICK THIS OPTION"
        for val, score in zip(values, scores)
    ]
    
    for opt, new_val in zip(opts, new_values):
        row[opt] = new_val
    
    return row

df = df.apply(mark_wrong_options, axis=1)
df2 = df2.apply(mark_wrong_options, axis=1)

df.to_csv("output/new_0.2_alignment_eval_mistral-small_en.csv", index=False)
df2.to_csv("output/new_0.2_alignment_eval_mistral-small_cn.csv", index=False)

In [47]:
df1_res = pd.read_csv('output/new_0.2_alignment_eval_mistral-small_en.csv')
df2_res = pd.read_csv('output/new_0.2_alignment_eval_mistral-small_cn.csv')

In [48]:
total_wrong = df1_res.isin(["DO NOT PICK THIS OPTION"]).sum().sum()

print(f"Total DO NOT PICK THIS OPTION occurrences: {total_wrong}")
col_counts = df1_res[['A','B','C','D']].isin(["DO NOT PICK THIS OPTION"]).sum()
print("Counts per option column:")
print(col_counts)

Total DO NOT PICK THIS OPTION occurrences: 1270
Counts per option column:
A    320
B    296
C    305
D    349
dtype: int64


In [50]:
total_wrong = df2_res.isin(["DO NOT PICK THIS OPTION"]).sum().sum()

print(f"Total DO NOT PICK THIS OPTION occurrences: {total_wrong}")
col_counts = df2_res[['A','B','C','D']].isin(["DO NOT PICK THIS OPTION"]).sum()
print("Counts per option column:")
print(col_counts)

Total DO NOT PICK THIS OPTION occurrences: 1254
Counts per option column:
A    297
B    286
C    306
D    365
dtype: int64
