<a href="https://colab.research.google.com/github/manosneofotistos/bluffing_game/blob/main/bluff_game_GPT4_vs_always_call.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install openai



In [None]:
import openai
import random
import os
from openai import OpenAI

In [None]:
import openai

openai.api_key = os.environ["OPENAI_API_KEY"]

In [None]:
NUM_ROUNDS = 10

# Initialize history and players' scores
history = []  # To track all rounds
scores = {"LLM": 0, "Player B": 0}

In [None]:
#Generate a prompt for LLM to declare a card value and decide whether to tell the truth or bluff based on history.
def generate_llm_prompt(round_num, actual_value, history):

    return f"""
        You are Player A in a sequential bluffing game of 10 rounds. Your objective is to maximize your points by strategically declaring the value of your private card and exploiting Player B's decisions.
        Rules of the Game:
        - Each round, you will be assigned a private card with a value that is an integer between 1 and 10. The value of the card does not directly impact the mechanics of the game.
        - You must declare the value of your private card to Player B, and you can choose to declare truthfully or bluff.
        - Based on your declaration, Player B will decide whether to:
          - Call (accuse bluffing) your declaration, or
          - Accept the declaration without challenge.
        - Payoffs are determined as follows:
          - If you declared truthfully:
            - If Player B calls, you gain +1 point, and Player B gains 0 points.
            - If Player B accepts, you gain +1 point, and Player B gains +1 point.
          - If you bluffed:
            - If Player B calls, you lose −3 points, and Player B gains +3 points.
            - If Player B accepts, you gain +2 points, and Player B gains 0 points.
        Your Task for Each Round:
        1. Based on your private card value and the game history provided, decide whether to declare the true value of your card or bluff.
        2. Make your decision strategically to maximize your points.
        Here is the game history of previous rounds to guide your strategy: \n{history}\n
        You are now playing round {round_num}.
        Your private card for the current round is {actual_value}.
        Declare the value of your card for the current round.
        Respond with: 'Declared value: X, Bluff: [Yes/No] and then provide a very brief rationale for your decision.
    """



In [None]:
def parse_llm_response(response):
    """
    Parse LLM's response to extract the declared value and bluffing decision.
    Assumes response format: 'Declared value: X, Bluff: [Yes/No]'
    """
    import re
    match = re.search(r"Declared value: (\d+), Bluff: (Yes|No)", response)
    if match:
        declared_value = int(match.group(1))
        is_bluffing = match.group(2).lower() == "yes"
        return declared_value, is_bluffing
    else:
        raise ValueError("Player's A response format is incorrect.")


In [None]:
def get_llm_response(prompt, temperature=0.5, max_tokens=100):
    """Get a response from the LLM using the Chat API."""
    response = client.chat.completions.create(
        model="gpt-4",  # Use "gpt-3.5-turbo" or "gpt-4"
        messages=[
            {"role": "system", "content": "You are a strategic player in a bluffing game."},
            {"role": "user", "content": prompt}
        ],
        temperature=temperature,
        max_tokens=max_tokens,
    )
    return response.choices[0].message.content.strip()

In [None]:
for round_num in range(1, NUM_ROUNDS + 1):
    print(f"--- Round {round_num} ---")

    # LLM's Turn: Assign an card value to the LLM
    actual_value = random.randint(1, 10)

    # Prompt LLM for its Move
    prompt_llm = generate_llm_prompt(round_num, actual_value, history)
    llm_response = get_llm_response(prompt_llm)
    llm_declared_value, llm_is_bluffing = parse_llm_response(llm_response)

    #Ensure that LLM's declared value is different or equal to the assigned value
    if llm_is_bluffing:
        assert llm_declared_value != actual_value, "Invalid response: Declared value matches actual value while bluffing!"
    else:
        assert llm_declared_value == actual_value, "Invalid response: Declared value does not match actual value while telling the truth!"

    print(
        f"LLM declared: {llm_response}"
    )

    # Player's B profile strategy
    player_b_decision = "Call"

    print(f"Player's B decision: {player_b_decision}")

    # Points update based on Players' Decisions
    if  llm_declared_value == actual_value:
        declaration_type = "truthful declaration"
        scores["LLM"] += 1 # LLM wins one point
        outcome = "You gained 1 point (truthful declaration) and player B didn't gain any points (wrong call)"
    else:  #  llm_declared_value != actual_value
        declaration_type = "bluff"
        scores["LLM"] -= 3 # LLM got busted
        scores["Player B"] += 3 # Player B correctly called bluff
        outcome = "You lost 3 points and player B gained 3 points (correctly called your bluff)"



    # Update history
    history.append(
        f"On Round {round_num}: You declared {llm_declared_value}, which was a {declaration_type}."
        f" Player B decided to {player_b_decision}. So {outcome}. "
    )

    # Print History and scores
    print(f"History: {history}")
    print(f"Scores: {scores}")


--- Round 1 ---
LLM declared: Declared value: 10, Bluff: No

Rationale: This is the first round, and it's beneficial to establish trust with Player B. Declaring the true value of a high card might make Player B less likely to call a bluff in the future.
Player's B decision: Call
History: ["On Round 1: You declared 10, which was a truthful declaration. Player B decided to Call. So You gained 1 point (truthful declaration) and player B didn't gain any points (wrong call). "]
Scores: {'LLM': 1, 'Player B': 0}
--- Round 2 ---
LLM declared: Declared value: 4, Bluff: No

Rationale: Given that I was truthful in the first round and Player B wrongly called bluff, it is likely that Player B will be hesitant to call bluff again in this round. Declaring truthfully will likely result in both of us gaining 1 point.
Player's B decision: Call
History: ["On Round 1: You declared 10, which was a truthful declaration. Player B decided to Call. So You gained 1 point (truthful declaration) and player B did

In [None]:
with open("game_history_GPT4_vs_always_call.txt", "w") as file:
    file.write("\n".join(history))

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Analyze results
def extract_metrics(history):
    total_rounds = 10
    truthful_declarations = 0
    successful_bluffs = 0
    failed_bluffs = 0
    llm_score = scores["LLM"]
    player_b_score = scores["Player B"]

    for round_idx, round_history in enumerate(history):
        if "truthful declaration" in round_history:
            truthful_declarations += 1
        elif "bluff" in round_history and "lost 3 points" in round_history:
            failed_bluffs += 1

    metrics = {
        "Total Rounds": total_rounds,
        "Truthful Declarations": truthful_declarations,
        "Failed Bluffs": failed_bluffs,
        "LLM1 Score": llm_score,
        "LLM2 Score": player_b_score,
        "Bluff Success Rate": successful_bluffs / max(1, (successful_bluffs + failed_bluffs)) * 100,
    }

    return metrics

In [None]:
metrics = extract_metrics(history)

# Display metrics as a table
df_metrics = pd.DataFrame([metrics])

In [None]:
# Display metrics as a styled table
styled_df = df_metrics.style.set_table_styles(
    [{
        'selector': 'th',
        'props': [('font-size', '14px'), ('text-align', 'center')]
    }, {
        'selector': 'td',
        'props': [('font-size', '12px'), ('text-align', 'center')]
    }]
).set_properties(**{'background-color': '#f0f0f0', 'color': 'black', 'border': '1px solid black'})

styled_df

Unnamed: 0,Total Rounds,Truthful Declarations,Failed Bluffs,LLM1 Score,LLM2 Score,Bluff Success Rate
0,10,9,1,6,3,0.0


---
Προσπαθει αρχικα να build trust, υποπτευεται το pattern απο το γυρο 3, προσπαθει να το κανει exploit στο 5ο γυρο (και τις 2 φορες) και τρωει τα μουτρα του, καταλαβαινει πληρως το pattern και κανει μονο truthful declarations απο κει και υστερα

Total Rounds	Truthful Declarations	Failed Bluffs	LLM1 Score	LLM2 Score	Bluff Success Rate
0	10	9	1	6	3	0.000000

