In [71]:
import pandas as pd
import os
import requests
from dotenv import load_dotenv
import gsm_prompts

In [54]:
os.getcwd()

'/home/sam/code/cohere/cohere-reasoning-v5/gsm8k'

In [67]:
load_dotenv()

True

In [68]:
HEADERS = {
    "Authorization": f"Bearer {os.getenv('OPENROUTER_API_KEY')}",
    "Content-Type": "application/json",
}

OPENROUTER_COMPLETION_URL = "https://openrouter.ai/api/v1/chat/completions"


In [70]:
# MEND THESE MANUALLY!
df = pd.read_csv("datasets/gsm8k_completions_off_policy.csv")
df = df[df["perturbed_stub_lm_completion"] == "<RETRIES FAILED>"]
print(df["completion_model"].unique())
print(f"Found {len(df)} rows with retries failed")

# MEND THESE MANUALLY!
df = pd.read_csv("datasets/gsm8k_completions_on_policy.csv")
df = df[df["perturbed_stub_lm_completion"] == "<RETRIES FAILED>"]
print(df["completion_model"].unique())
print(f"Found {len(df)} rows with retries failed")

df.head(1)

['qwen/qwq-32b-preview']
Found 13 rows with retries failed
['qwen/qwq-32b-preview']
Found 29 rows with retries failed


Unnamed: 0,problem_id,problem,answer,stub,perturb_model,perturb_model_provider,completion_model,completion_model_provider,perturbed_stub_lm,perturbed_stub_lm_completion,perturbed_stub_lm_solution_verified
2543,43,"For $0 \le x \le 40$ and $0 \le y \le 50,$ fin...",70 \sqrt{2},So I've got this problem here that I need to s...,meta-llama/llama-3.1-405b-instruct,DeepInfra,qwen/qwq-32b-preview,DeepInfra,So I've got this problem here that I need to s...,<RETRIES FAILED>,False


In [78]:
# Get completion
from json import JSONDecodeError


def _get_completion(problem: str, stub: str):
    response = requests.post(
        OPENROUTER_COMPLETION_URL,
        headers=HEADERS,
        json={
            "model": "qwen/qwq-32b-preview",
            "messages": [
                {"role": "user", "content": gsm_prompts.get_solution_prompt(problem)},
                {"role": "assistant", "content": stub}
            ],
            "provider": {
                "order": [
                    "DeepInfra"
                ],
                "allow_fallbacks": False,
            },
            "temperature": 0.2,
            "top_p": 0.8
        }
    )
    try:
        return response.json()["choices"][0]["message"]["content"]
    except JSONDecodeError as e:
        print(f"JSON decode error at position {e.pos}: {e.msg}")
        print(f"Character at error position: {repr(response.text[e.pos:e.pos+10])}")
        return "<JSON_PARSE_ERROR>"

# Get verification
def _get_verification(problem: str, answer: int, solution: str):
    response = requests.post(
        OPENROUTER_COMPLETION_URL,
        headers=HEADERS,
        json={
            "model": "meta-llama/llama-3.1-405b-instruct",
            "messages": [
                {"role": "user", "content": gsm_prompts.get_verification_prompt(problem, answer, solution)},
            ],
            "provider": {
                "order": [
                    "DeepInfra"
                ],
                "allow_fallbacks": False,
            },
            "temperature": 0,
            "top_k": 0
        }
    )
    print(f"Verification response.status_code: {response.status_code}")
    response = response.json()["choices"][0]["message"]["content"]
    return response.lower() == "correct"


In [79]:
def repair_row(row: pd.Series):
    problem = row["problem"]
    perturbed_stub = row["perturbed_stub_lm"]
    answer = row["answer"]

    # Get completion
    completion: str = _get_completion(problem, perturbed_stub)
    
    # Get verification
    solution = f"{perturbed_stub}{completion}"
    verification: bool = _get_verification(problem, answer, solution)

    return {
        "perturbed_stub_lm_completion": completion,
        "perturbed_stub_lm_solution_verified": verification
    }


In [80]:
test_df = df.head(2).copy()
print("Before Update")
print(test_df[["perturbed_stub_lm_completion", "perturbed_stub_lm_solution_verified"]])

test_df.update(test_df.apply(repair_row, axis=1))
print("After update:")
print(test_df[["perturbed_stub_lm_completion", "perturbed_stub_lm_solution_verified"]])



Before Update
     perturbed_stub_lm_completion  perturbed_stub_lm_solution_verified
2543             <RETRIES FAILED>                                False
2580             <RETRIES FAILED>                                False
Completion response.status_code: 200


JSONDecodeError: Expecting value: line 11855 column 1 (char 65197)

In [60]:
df = pd.read_csv("datasets/gsm8k_completions_off_policy.csv")
df = df[df["perturbed_stub_lm_completion"] == "<RETRIES FAILED>"]
print(df["completion_model"].unique())
df.head(1)

['qwen/qwq-32b-preview']


Unnamed: 0,problem_id,problem,answer,stub,stub_and_perturb_model,stub_and_perturb_model_provider,completion_model,completion_model_provider,perturbed_stub_lm,perturbed_stub_lm_completion,perturbed_stub_lm_solution_verified
2604,104,The graphs of $x^2 + y^2 + 6x - 24y + 72 = 0$ ...,40,## Step 1: The problem asks us to find the su...,meta-llama/llama-3.1-405b-instruct,DeepInfra,qwen/qwq-32b-preview,DeepInfra,## Step 1: The problem asks us to find the su...,<RETRIES FAILED>,False


In [None]:
# REMEMBER TO INVOKE IT ON COPIES FIRST TO MAKE SURE IT WORKS!
