In [None]:
# !pip install anthropic

In [1]:
import sys
import time
import string
import numpy as np
import pandas as pd
import os
from pydantic import BaseModel, Field
import matplotlib.pyplot as plt
from google import genai
from google.genai import types
import openai
from openai import OpenAI

# from anthropic import Anthropic

# key from delegation OpenAI project


# MODEL = "gpt-4o-2024-05-13"
# MODEL = "gpt-4o-mini-2024-07-18"
MODEL = "gpt-5-nano" # cheaper than 4o-mini!
# MODEL = "o1-2024-12-17"
# MODEL = "o3-mini-2025-01-31"
# MODEL = "meta-llama/Llama-3.2-3B-Instruct-Turbo"
# MODEL = "gemini-2.5-flash"

# list of claude models here: https://docs.anthropic.com/en/docs/about-claude/models/overview
# MODEL = "claude-3-5-haiku@20241022"
# MODEL = "claude-sonnet-4@20250514"
# MODEL = "claude-opus-4-20250514"

# list of llama models here: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/llama/llama4-scout?utm_source=chatgpt.com
# MODEL = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
# MODEL = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"

# initialize client
client = genai.Client(
    vertexai=True,
    # project="accuracy-obsession",  # this links to my CC! Although credits might be in there before. Exceptions project is exceptions-467800
    project = "exceptions-467800",
    location="us-central1"
)


In [None]:
# load all Movielens data (large!)
# df_movies = pd.read_csv("gs://exceptions-data/LLM Delegation/MovieLens/data/movies.csv")
# df_ratings = pd.read_csv("gs://exceptions-data/LLM Delegation/MovieLens/data/ratings.csv")

# merge and save data (32M rows!)
# df_all = df_ratings.merge(df_movies, left_on = 'movieId', right_on = 'movieId')
# df_all.to_csv("gs://exceptions-data/LLM Delegation/MovieLens/data/movies_and_ratings.csv")

# save subset (for speed)
# temp = df_all.tail(1000000)
# temp.to_csv("gs://exceptions-data/LLM Delegation/MovieLens/data/movies_and_ratings_last1000000.csv")

In [2]:
# load subset of merged data
df = pd.read_csv("gs://exceptions-data/LLM Delegation/MovieLens/data/movies_and_ratings_last1000000.csv")

In [3]:
import statsmodels.formula.api as smf
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


##################
#### Train OLS ###
##################

# clean training dataset
df_ols = df.copy()

# extract 4-digit year at end of title, else 0
df_ols['year'] = (
    df_ols['title']
    .str.extract(r'\((\d{4})\)$', expand=False)
    .pipe(pd.to_numeric, errors='coerce')
    .fillna(0)
    .astype(int)
)

# fast vectorized genre one-hot encoding (spreads)
genre_counts = (
    df_ols['genres']
    .fillna('')
    .str.get_dummies(sep='|')
    .astype('int8')
)

# join back to dataframe
df_ols = df_ols.join(genre_counts)


# build formula: C(...) tells statsmodels to treat as categorical
# but if I do categorical, everything breaks!
# no userId or movieId because then everything breaks
formula = (
    "rating ~ year + "
    "Action + Adventure + Animation + Children + Comedy + Crime + "
    "Documentary + Drama + Fantasy + Q('Film-Noir') + Horror + IMAX + "
    "Musical + Mystery + Romance + Q('Sci-Fi') + Thriller + War + Western"
)

# create merge key (string is safest)
df_ols['user_movie_key'] = (
    df_ols['userId'].astype(str) + "_" + df_ols['movieId'].astype(str)
)

# train / test split (keep indices for later!)
train_df, test_df = train_test_split(
    df_ols, test_size=0.2, random_state=42
)

# mark train / test
train_df = train_df.copy()
test_df = test_df.copy()
train_df['split'] = 'train'
test_df['split'] = 'test'

# fit model on training data
ols = smf.ols(formula=formula, data=train_df).fit()

# optional: full model summary
print(ols.summary())

# generate predictions for ALL rows
df_ols['pred'] = ols.predict(df_ols)

# keep only what we need for merging back
pred_df = df_ols[['user_movie_key', 'pred']]

split_df = pd.concat([
    train_df[['user_movie_key', 'split']],
    test_df[['user_movie_key', 'split']]
])

# save r^2
r2 = ols.rsquared

# merge predictions + split info back to original df
df['user_movie_key'] = (
    df['userId'].astype(str) + "_" + df['movieId'].astype(str)
)
df = df.merge(pred_df, on='user_movie_key', how='left')
df = df.merge(split_df, on='user_movie_key', how='left')

                            OLS Regression Results                            
Dep. Variable:                 rating   R-squared:                       0.033
Model:                            OLS   Adj. R-squared:                  0.033
Method:                 Least Squares   F-statistic:                     1374.
Date:                Wed, 04 Feb 2026   Prob (F-statistic):               0.00
Time:                        18:07:19   Log-Likelihood:            -1.1614e+06
No. Observations:              800000   AIC:                         2.323e+06
Df Residuals:                  799979   BIC:                         2.323e+06
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          3.8330      0.032    121.

In [4]:
import pandas as pd
import random

def create_prompt_base(row):

    # 1. check if given row has at least 7 ratings
    temp_ID = df.iloc[row]['userId']
    temp_ratings = df.loc[df['userId'] == temp_ID].shape[0]

    if temp_ratings < 7:
        return "Not enough reviews per user to generate this prompt.", None, None

    # grab user data
    user_data = df[df['userId'] == temp_ID].copy()

    # 2. Select two movies with DIFFERENT ratings
    shuffled_user_data = user_data.sample(frac=1, random_state=None)

    test_movie_1 = shuffled_user_data.iloc[0]
    test_movie_2 = None

    for i in range(1, len(shuffled_user_data)):
        if shuffled_user_data.iloc[i]['rating'] != test_movie_1['rating']:
            test_movie_2 = shuffled_user_data.iloc[i]
            used_indices = [0, i]
            break

    if test_movie_2 is None:
        return "Could not find two movies with different ratings for this user.", None, None

    # 3. Grab 5 other movies for the history
    history_pool = shuffled_user_data.drop(shuffled_user_data.index[used_indices])
    history_movies = history_pool.head(5)

    # 4. Format the Prompt
    prompt = "Person 1 has reviewed the following movies:\n\n"
    for _, r in history_movies.iterrows():
        prompt += f"- {r['title']} ({r['genres']}): Rated {r['rating']}/5\n"

    prompt += "\nConsider these two movies they have not seen:\n\n"

    test_pair = [test_movie_1, test_movie_2]
    random.shuffle(test_pair)

    for movie in test_pair:
        prompt += f"- {movie['title']} ({movie['genres']})\n"

    # 5. Return prompt, true ratings, and predicted ratings
    answer_key = {
        test_movie_1['title']: test_movie_1['rating'],
        test_movie_2['title']: test_movie_2['rating']
    }

    pred_key = {
        test_movie_1['title']: test_movie_1['pred'],
        test_movie_2['title']: test_movie_2['pred']
    }

    return prompt, answer_key, pred_key


In [5]:
# test one prompt
create_prompt_base(0)

("Person 1 has reviewed the following movies:\n\n- She's Gotta Have It (1986) (Comedy|Romance): Rated 3.0/5\n- Ladyhawke (1985) (Adventure|Fantasy|Romance): Rated 2.0/5\n- Tao of Steve, The (2000) (Comedy): Rated 3.0/5\n- X-Men (2000) (Action|Adventure|Sci-Fi): Rated 4.0/5\n- Animal House (1978) (Comedy): Rated 2.0/5\n\nConsider these two movies they have not seen:\n\n- Sarafina! (1992) (Drama)\n- Jagged Edge (1985) (Crime|Romance|Thriller)\n",
 {'Jagged Edge (1985)': 3.0, 'Sarafina! (1992)': 2.0},
 {'Jagged Edge (1985)': 3.6179953204067603,
  'Sarafina! (1992)': 3.654780256109197})

In [6]:
from google import genai


# base LLM prompt
def get_llm_base(base_prompt, model_id=MODEL):

    # 1. Construct the one-shot prompt
    instructions = (
        "\n\nRespond with two numbers ONLY on two lines. "
        "First, respond 1 (first movie) or 2 (second movie) based on your prediction of which movie Person 1 would rate higher. \n"
        "Second, respond 0 to recommend the movie you predicted, "
        "or 1 if you would like to delegate to Person 1 so they can choose the movie. Person 1 will always choose the movie they would rate higher \n"
        "A decision is considered 'correct' if it is the movie Person 1 would rate higher."
    )

    full_prompt = base_prompt + instructions

    # 2. Call the LLM

    # openAI version
    messages = [{"role": "user", "content": full_prompt}]
    response = openai.chat.completions.create(model=model_id, messages=messages)
    lines = response.choices[0].message.content.strip().split('\n')


    # gemini version
    # response = client.models.generate_content(
    #     model=model_id,
    #     contents=full_prompt
    # )
    # lines = response.text.strip().split('\n')

    # 3. Parse the response (extract numbers from the two lines)
    try:


        # Filter out empty lines if the LLM added extra spacing
        clean_lines = [line.strip() for line in lines if line.strip()]

        # save integer if there is one
        try:
            prediction = int(clean_lines[0])
            delegation = int(clean_lines[1])
        except (ValueError, TypeError, IndexError):
            prediction = None
            delegation = None

        return {
              "pred": prediction,
              "del": delegation
          }


    except (ValueError, IndexError):
        print(f"Error parsing LLM response: {response.text}")
        return None, None

In [7]:
import re
from google import genai

# sequential inference (generator -> auditor -> judge)
def get_sequential_inference(base_prompt, model_id=MODEL):
    try:
        # --- STEP 1: PREDICT & EXPLAIN (Generator) ---
        p1 = (
            f"{base_prompt}\n\n"
            "TASK: Predict which movie Person 1 would rate higher.\n"
            "Explain your reasoning in 1 sentence. Then conclude with exactly:\n"
            "'PREDICTION: 1' (first movie) or 'PREDICTION: 2' (second movie)."
        )


        # OpenAI version
        messages = [{"role": "user", "content": p1}]
        r1 = openai.chat.completions.create(model=model_id, messages=messages)
        full_output = r1.choices[0].message.content.strip()

        # gemini version?
        # r1 = client.models.generate_content(model=model_id, contents=p1)
        # full_output = r1.text.strip()

        preds = re.findall(r'PREDICTION:\s*([12])', full_output)
        pred = int(preds[-1]) if preds else 1  # default to 1 if parsing fails

        # --- STEP 2: CRITIQUE (Auditor) ---
        p2 = (
            f"PROMPT:\n{base_prompt}\n\n"
            f"PROPOSED LOGIC:\n{full_output}\n\n"
            "TASK: Critique this reasoning in 1 sentence."
        )

        # OpenAI version
        messages = [{"role": "user", "content": p2}]
        r2 = openai.chat.completions.create(model=model_id, messages=messages)
        critique = r2.choices[0].message.content.strip()


        # gemini version?
        # r2 = client.models.generate_content(model=model_id, contents=p2)
        # critique = r2.text.strip()

        # --- STEP 3: FINAL DECISION (Judge) ---
        p3 = (
            f"PROMPT:\n{base_prompt}\n\n"
            f"PREDICTION & REASONING:\n{full_output}\n\n"
            f"CRITIQUE:\n{critique}\n\n"
            "TASK: Respond with two numbers ONLY on two lines.\n"
            "Line 1: output 1 or 2 for which movie Person 1 would rate higher.\n"
            "Line 2: output 0 to implement your prediction, or 1 to delegate to Person 1.\n"
            "A decision is correct if it matches the movie Person 1 would rate higher."
        )

        # openai version
        messages = [{"role": "user", "content": p3}]
        r3 = openai.chat.completions.create(model=model_id, messages=messages)
        final_text = r3.choices[0].message.content.strip()

        # gemini version
        # r3 = client.models.generate_content(model=model_id, contents=p3)
        # final_text = r3.text.strip()

        # parse final response (two digits on two lines)
        lines = [ln.strip() for ln in final_text.split('\n') if ln.strip()]
        final_pred = int(re.findall(r'[12]', lines[0])[0]) if len(lines) > 0 else pred
        final_del = int(re.findall(r'[01]', lines[1])[0]) if len(lines) > 1 else 1

        return {
            "full_thought": full_output,
            "pred": final_pred,
            "critique": critique,
            "del": final_del
        }

    except Exception as e:
        print(f"Error in sequential inference: {e}")
        return {"full_thought": str(e), "pred": None, "critique": None, "del": None}


In [8]:
def get_llm_ols(base_prompt, ols_pred1, ols_pred2, r2, model_id=MODEL):


    # round for ease of use
    ols_pred1 = round(ols_pred1, 2)
    ols_pred2 = round(ols_pred2, 2)
    r2 = round(r2, 2)

    # 1. Construct the one-shot prompt
    instructions = (
        f"An OLS model trained on a similar dataset predicts Person 1 would rate the first movie {ols_pred1} and the second movie {ols_pred2}."
        f"The OLS model has an R-squared of {r2}. "
        "\n\nRespond with two numbers ONLY on two lines. "
        "First, respond 1 (first movie) or 2 (second movie) based on your prediction of which movie Person 1 would rate higher. \n"
        "Second, respond 0 to recommend the movie you predicted, "
        "or 1 if you would like to delegate to Person 1 so they can choose the movie. Person 1 will always choose the movie they would rate higher \n"
        "A decision is considered 'correct' if it is the movie Person 1 would rate higher."
    )

    full_prompt = base_prompt + instructions

    # 2. Call the LLM

    # OpenAI version
    messages = [{"role": "user", "content": full_prompt}]
    response = openai.chat.completions.create(model=model_id, messages=messages)
    lines = response.choices[0].message.content.strip().split('\n')

    # gemini version
    # response = client.models.generate_content(
    #     model=model_id,
    #     contents=full_prompt
    # )
    # lines = response.text.strip().split('\n')

    # 3. Parse the response (extract numbers from the two lines)
    try:

        # Filter out empty lines if the LLM added extra spacing
        clean_lines = [line.strip() for line in lines if line.strip()]

        # save integer if there is one
        try:
            prediction = int(clean_lines[0])
            delegation = int(clean_lines[1])
        except (ValueError, TypeError, IndexError):
            prediction = None
            delegation = None

        return {
              "pred": prediction,
              "del": delegation
          }

    except (ValueError, IndexError):
        print(f"Error parsing LLM response: {response.text}")
        return None, None




In [15]:
import pandas as pd
import re
from concurrent.futures import ThreadPoolExecutor, as_completed



# THE MULTI-THREADED RUNNER
def call_llm(row_idx):


    # avoid rate limits
    # time.sleep(10)

    ########################
    # DECIDE method to run #
    ########################
    # method to run
    # TEMP_METHOD = "base"
    # TEMP_METHOD = "sft"
    # TEMP_METHOD = "ols"
    TEMP_METHOD = "auditor"

    # base
    if TEMP_METHOD == "base":

      # get prompt and answers
      base, answer_key, pred_key = create_prompt_base(row_idx)

      # unpack in a stable order
      titles = list(answer_key.keys())
      title_1, title_2 = titles[0], titles[1]

      # actual ratings
      rating_1 = answer_key[title_1]
      rating_2 = answer_key[title_2]
      human_response = 1 if rating_1 >= rating_2 else 2

      # OLS predictions
      ols_pred_1 = pred_key[title_1]
      ols_pred_2 = pred_key[title_2]

      # run LLM (OLS!)
      result = get_llm_base(base, model_id=MODEL)

      # grab metadata
      user_id = df.iloc[row_idx]['userId']
      movie_id_1 = df.loc[df['title'] == title_1, 'movieId'].iloc[0]
      movie_id_2 = df.loc[df['title'] == title_2, 'movieId'].iloc[0]

      # build output row (new object, not tied to df rows)
      row_copy = pd.Series({
          'userId': user_id,
          'movieId1': movie_id_1,
          'movieId2': movie_id_2,
          'rating_1': rating_1,
          'rating_2': rating_2,
          'ols_pred_1': ols_pred_1,
          'ols_pred_2': ols_pred_2,
          'human_response': human_response,
          'prompt': base,
          'llm_prediction': result['pred'],
          'llm_delegate': result['del'],
          'method': TEMP_METHOD
      })


      return row_copy


    # ols
    if TEMP_METHOD == "ols":

      # get prompt and answers
      base, answer_key, pred_key = create_prompt_base(row_idx)

      # unpack in a stable order
      titles = list(answer_key.keys())
      title_1, title_2 = titles[0], titles[1]

      # actual ratings
      rating_1 = answer_key[title_1]
      rating_2 = answer_key[title_2]
      human_response = 1 if rating_1 >= rating_2 else 2

      # OLS predictions
      ols_pred_1 = pred_key[title_1]
      ols_pred_2 = pred_key[title_2]

      # run LLM (OLS!)
      result = get_llm_ols(base, ols_pred_1, ols_pred_2, r2, model_id=MODEL)

      # grab metadata
      user_id = df.iloc[row_idx]['userId']
      movie_id_1 = df.loc[df['title'] == title_1, 'movieId'].iloc[0]
      movie_id_2 = df.loc[df['title'] == title_2, 'movieId'].iloc[0]

      # build output row (new object, not tied to df rows)
      row_copy = pd.Series({
          'userId': user_id,
          'movieId1': movie_id_1,
          'movieId2': movie_id_2,
          'rating_1': rating_1,
          'rating_2': rating_2,
          'ols_pred_1': ols_pred_1,
          'ols_pred_2': ols_pred_2,
          'human_response': human_response,
          'prompt': base,
          'llm_prediction': result['pred'],
          'llm_delegate': result['del'],
          'method': TEMP_METHOD
      })


      return row_copy



    # auditor
    if TEMP_METHOD == "auditor":

      # get prompt and answers
      base, answer_key, pred_key = create_prompt_base(row_idx)

      # unpack in a stable order
      titles = list(answer_key.keys())
      title_1, title_2 = titles[0], titles[1]

      # actual ratings
      rating_1 = answer_key[title_1]
      rating_2 = answer_key[title_2]
      human_response = 1 if rating_1 >= rating_2 else 2

      # OLS predictions
      ols_pred_1 = pred_key[title_1]
      ols_pred_2 = pred_key[title_2]

      # run LLM (SEQUENTIAL INFERENCE)
      result = get_sequential_inference(base, MODEL)

      # grab metadata
      user_id = df.iloc[row_idx]['userId']
      movie_id_1 = df.loc[df['title'] == title_1, 'movieId'].iloc[0]
      movie_id_2 = df.loc[df['title'] == title_2, 'movieId'].iloc[0]

      # build output row (new object, not tied to df rows)
      row_copy = pd.Series({
          'userId': user_id,
          'movieId1': movie_id_1,
          'movieId2': movie_id_2,
          'rating_1': rating_1,
          'rating_2': rating_2,
          'ols_pred_1': ols_pred_1,
          'ols_pred_2': ols_pred_2,
          'human_response': human_response,
          'prompt': base,
          'llm_prediction': result['pred'],
          'llm_delegate': result['del'],
          'full_thought': result.get('full_thought'),
          'critique': result.get('critique'),
          'method': TEMP_METHOD
      })

      return row_copy






In [16]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd

# get sample indices from test split only
N_SAMPLES = 200 # pretty fast with OpenAI
sampled_row_idxs = (
    df.loc[df['split'] == 'test']
    .sample(n=N_SAMPLES) # can't reproduce seed otherwise we'll always do the same ones
    .index
    .tolist()
)

# initialize results
results = []

# make call (call_llm should accept row_idx only)
with ThreadPoolExecutor(max_workers=5) as executor:
    futures = [executor.submit(call_llm, row_idx) for row_idx in sampled_row_idxs]
    for f in as_completed(futures):
        results.append(f.result())

# save
df_results = pd.DataFrame(results)


In [17]:
import datetime

# write file; add timestamp
df_results['timestamp'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

###########################################
# MAKE SURE THE FILE MATCHES TEMP_METHOD! #
###########################################
# path = 'gs://exceptions-data/LLM Delegation/MovieLens/Results/base_' + MODEL + '.csv'
# path = 'gs://exceptions-data/LLM Delegation/MovieLens/Results/ols_' + MODEL + '.csv'
path = 'gs://exceptions-data/LLM Delegation/MovieLens/Results/auditor_' + MODEL + '.csv'

# Load, append, and re-save
try:
    df_existing = pd.read_csv(path)
    df_results = pd.concat([df_existing, df_results], ignore_index=True)
except FileNotFoundError:
    pass

df_results.to_csv(path, index=False)