In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from scipy.stats import pearsonr, spearmanr

pd.set_option("display.width", 80)
pd.set_option("display.max_colwidth", None)

In [None]:
df = pd.read_csv(
    "ethz-spylab-rlhf-7b-harmless_l256_promptseed42_numprompt500_numgenerations100_ancestral_scoredreward_scorednll.csv"
)
df["generated_text"] = df.apply(
    lambda row: row["generated_text"][len(row["prompt"]) :], axis=1
)
df["log_probability"] = -df["negative_log_probability"]

df = df.drop_duplicates()
df.head()

In [None]:
df[["score", "negative_log_probability", "log_probability"]].describe()

In [None]:
num_corpuses = 1000

In [None]:
# Sample level statistics
print("===== Sample level statistics =====")
print(f"Spearman: {spearmanr(df['score'], df['log_probability'])}")
print(f"Pearson: {pearsonr(df['score'], df['log_probability'])}")
df.plot.scatter(y="score", x="log_probability", title="Samples")
plt.show()
plt.close()

# Corpus level
print("===== Corpus level statistics =====")
# 1. Sample corpuses
corpuses = {
    corpus_seed: df.sample(len(df) * 2, random_state=corpus_seed, replace=True)
    for corpus_seed in range(num_corpuses)
}
mean_corpuses_df = pd.DataFrame.from_dict({
    corpus_seed: corpus[["score", "log_probability"]].mean(axis=0)
    for corpus_seed, corpus in corpuses.items()
}).T
print(
    "Spearman:"
    f" {spearmanr(mean_corpuses_df['score'], mean_corpuses_df['log_probability'])}"
)
print(
    "Pearson:"
    f" {pearsonr(mean_corpuses_df['score'], mean_corpuses_df['log_probability'])}"
)
mean_corpuses_df.plot.scatter(
    y="score",
    x="log_probability",
    title="Means by corpus",
)
plt.show()
plt.close()