In [None]:
!pip install openai aiohttp tqdm nest_asyncio pandas --quiet

In [None]:
import os
import json
import asyncio
import aiohttp
import nest_asyncio
import pandas as pd
from tqdm import tqdm
from openai import AsyncOpenAI

nest_asyncio.apply()


In [None]:
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")
client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])

Enter your OpenAI API key: ··········


In [None]:
def clean_instruction(instr: str) -> str:
    """Removes noisy prefixes or trailing text."""
    if not instr:
        return ""
    return instr.split("Now it is your turn")[0].strip()

def build_bluff_prompt(instruction: str, llm_move: str) -> str:
    """Builds concise yes/no prompt."""
    first_word = llm_move.split()[0] if llm_move.strip() else "Raise"
    clean_instr = clean_instruction(instruction)
    return f"""{clean_instr}

The player decided to: {first_word}.
Is this a bluff? Reply with only 'Yes' or 'No'."""


In [None]:
async def get_bluff_label(session, prompt, model="gpt-4o"):
    """Queries OpenAI model and returns 'Yes' or 'No' only."""
    for attempt in range(3):
        try:
            response = await client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": "You are an expert poker analyst."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.2,
                max_tokens=5,
            )
            result = response.choices[0].message.content.strip()
            if result not in ["Yes", "No"]:
                # Fallback if LLM gives explanation
                result = "Yes" if "yes" in result.lower() else "No"
            return result
        except Exception as e:
            if attempt < 2:
                await asyncio.sleep(2 ** attempt)  # Exponential backoff
            else:
                return "Error"

In [None]:
train_data_path = "/content/llm_raise.jsonl"

with open(train_data_path, "r", encoding="utf-8") as f:
    train_data = [json.loads(line) for line in f]

print(f"Loaded {len(train_data)} records")

Loaded 70144 records


In [None]:
async def process_batch(batch, output_file, session):
    tasks = []
    for row in batch:
        prompt = build_bluff_prompt(row["instruction"], row["llm_move"])
        tasks.append(get_bluff_label(session, prompt))

    results = await asyncio.gather(*tasks)
    records = []

    for row, is_bluff in zip(batch, results):
        row["is_bluff"] = is_bluff
        records.append(row)

    with open(output_file, "a", encoding="utf-8") as f:
        for rec in records:
            f.write(json.dumps(rec, ensure_ascii=False) + "\n")

async def main():
    output_file = "is_bluff_openai_gpt-4o.jsonl"
    batch_size = 100  # adjust based on rate limits

    connector = aiohttp.TCPConnector(limit=20)
    async with aiohttp.ClientSession(connector=connector) as session:
        for i in tqdm(range(0, len(train_data), batch_size)):
            batch = train_data[i:i + batch_size]
            await process_batch(batch, output_file, session)

    print(f"\n✅ Done! Results saved to {output_file}")

In [None]:
await main()

100%|██████████| 702/702 [44:55<00:00,  3.84s/it]


✅ Done! Results saved to is_bluff_openai_gpt-4o.jsonl





In [None]:
df = pd.read_json("is_bluff_openai_gpt-4o.jsonl", lines=True)
print("\nLabel distribution:")
print(df["is_bluff"].value_counts())


Label distribution:
is_bluff
Yes    36907
No     33237
Name: count, dtype: int64


In [None]:
import pandas as pd
import json

input_path = "is_bluff_openai_gpt-4o.jsonl"
output_path = "is_bluff_yes_gpt-4o.jsonl"

# Load JSONL file into a DataFrame
df = pd.read_json(input_path, lines=True)

# Filter where is_bluff == "Yes"
df_yes = df[df["is_bluff"].str.lower() == "yes"]

# Save filtered rows to new JSONL file
with open(output_path, "w", encoding="utf-8") as f:
    for _, row in df_yes.iterrows():
        f.write(json.dumps(row.to_dict(), ensure_ascii=False) + "\n")

print(f"✅ Saved {len(df_yes)} bluff rows to {output_path}")

✅ Saved 36907 bluff rows to is_bluff_yes_gpt-4o.jsonl
