In [1]:
import wandb
import yaml
import pandas as pd
with open('config.yml', 'r') as file:
    config = yaml.safe_load(file)

In [2]:
api = wandb.Api(timeout=300)
runs = api.runs(f"{config.get('wandb_entity')}/{config.get('wandb_project')}")

filtered_runs = [run for run in runs if 'mmlu' in run.tags]

df = pd.concat([pd.DataFrame(run.scan_history()) for run in filtered_runs])


ValueError: Could not find project wandb_project

In [None]:
import re
def score_row(row):
    classes = ('A', 'B', 'C', 'D')
    results = []
    for completion in row['completions']:
        matches = [
                word
                for word in re.sub(r"\W", " ", completion).split()
                if word in classes
            ]

            # Take the last match as the answer
        if matches:
            results.append(1 if matches[-1] == classes[row['answer_idx']] else 0)
        else:
            results.append(0)
    return results

def score_df(df):
    df['scores'] = df.apply(score_row, axis=1)
    
    def calculate_score(row):
        # Filter the incentives and scores where completions are not empty
        filtered_incentives_scores = [(incentive, score) for incentive, score, completion in zip(row['incentives'], row['scores'], row['completions']) if completion != '']
        
        if filtered_incentives_scores:
            # Find the maximum incentive and get the corresponding score
            max_incentive_score = max(filtered_incentives_scores, key=lambda x: x[0])
            return 1 if max_incentive_score[1] == 1 else 0
        else:
            return 0

    df['score'] = df.apply(calculate_score, axis=1)
    return df


scored_df = score_df(df)

scored_df.to_csv('scored_df.csv', index=False)

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv('scored_df.csv')

# Assuming your dataframe is named df and has a 'datetime' column
df['datetime'] = pd.to_datetime(df['datetime'])

# Sort the dataframe by datetime, just in case
df = df.sort_values('datetime')

# Convert datetime column to datetime type and set as index
df['datetime'] = pd.to_datetime(df['datetime'])

df.set_index('datetime', inplace=True)

# Create a 24-hour rolling average of the score using the 'datetime' index
df['score_24h_rolling'] = df['score'].rolling(window=2000, min_periods=1).mean()

# Cut the plot to start at 8-28
df_filled = df[df.index >= '2024-08-28']

# Save to csv 
df_filled.to_csv('moving_averaged.csv')

# Set the figure size
plt.figure(figsize=(12, 4))

# Set the seaborn pastel palette
sns.set_palette("pastel")

# Plot the 24-hour rolling average
df_filled['score_24h_rolling'].plot()

plt.title('24-Hour Rolling Average of Score')
# Add a horizontal line to the plot that's orange and has the value 0.82
plt.axhline(y=0.82*0.9, color='orange', linestyle='-')


plt.show()


