# LLM 20 Questions Rule Based Baseline

## Prerequisites
Set accelerator to GPU T4

In [None]:
%%bash
mkdir -p /kaggle/working/submission
mkdir -p /tmp/model
pip install -q bitsandbytes accelerate
pip install -qU transformers

## Download model

### HuggingFace Login

Add HugginFace access token to secrets. You can find it in `Add-ons -> secrets`

In [None]:
from kaggle_secrets import UserSecretsClient
secrets = UserSecretsClient()

HF_TOKEN: str | None  = None

try:
    HF_TOKEN = secrets.get_secret("HF_TOKEN")
except:
    pass

### Select Model

Find your desired model from [HuggingFace Model Hub](https://huggingface.co/models) and use the model name in the next command.

Supported models:
- `LLAMA3 variants`
- `Phi-3 variants`
- `Gwen-2 variants`

In [None]:
repo_id = "abacusai/Llama-3-Smaug-8B"

### Download Model via HuggingFace
To reduce disk usage, download model in `/tmp/model`

In [None]:
from huggingface_hub import snapshot_download
from pathlib import Path
import shutil

g_model_path = Path("/tmp/model")
if g_model_path.exists():
    shutil.rmtree(g_model_path)
g_model_path.mkdir(parents=True)

snapshot_download(
    repo_id=repo_id,
    ignore_patterns="original*",
    local_dir=g_model_path,
    token=globals().get("HF_TOKEN", None)
)

In [None]:
!ls -l /tmp/model

### Save quantized model
Now, load downloaded model on memory with quantization.  
This will save storage.


Moreover, since the saved model has already been quantized, we do not need `bitsandbytes` package in `main.py`

In [None]:
# load model on memory
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

downloaded_model = "/tmp/model"

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    downloaded_model,
    quantization_config = bnb_config,
    torch_dtype = torch.float16,
    device_map = "auto",
    trust_remote_code = True,
)

tokenizer = AutoTokenizer.from_pretrained(downloaded_model)

In [None]:
# save model in submission directory
model.save_pretrained("/kaggle/working/submission/model")
tokenizer.save_pretrained("/kaggle/working/submission/model")

In [None]:
# unload model from memory
import gc, torch
del model, tokenizer
gc.collect()
torch.cuda.empty_cache()

## Agent

### Rules

#### BasicQuestions
In this notebook, category will be specified in system prompt after first question.  
So, `BasicQuestions` has one question to determine category.

#### PlaceQuestions
PlaceQuestions has 3 distinct questions to categorize.  
Once answer is 'yes', disable hard-coded questions.

#### ThingQuestions
ThingQuestions has 3 questions.
All questions will be asked in order.


In [None]:
%%writefile submission/questions.py

BasicQuestions = [
    "is it a place?",
    # "is it a thing?",
]

PlaceQuestions = [
    "is it a country?",
    "is it a city?",
    "is it a natural feature?",
    # "is it a mountain?",
    # "is it a river?",
]

ThingsQuestions = [
    "is it a living thing?",   
    "is it edible?",           
    "is it something that can be held in your hand?",
    "Does it require electricity to operate?",
    # "Would the keyword be included in the broad category of [Group]?",
]


### Rule Based Agent

In [None]:
%%writefile submission/rulebased.py

from questions import *


class RuleBasedQuestions:
    def __init__(self):
        """
        Attributes:
            log (list): A list to store the user's answers.
            count (int): The count of questions asked.
            enabled (bool): Indicates if all questions have been asked.
            category (str): The current category of questions.
        """
        self.log = []
        self.count = 0
        self.enabled = True
        self.category = "basic"

    def getQuestion(self):
        """
        Returns the next question based on the current state of the game.

        Returns:
            str: The next question to be asked.
        """
        if self.enabled == False:
            return "No more available questions."
        
        if self.category == "basic":
            return BasicQuestions[self.count]
        elif self.category == "place":
            return PlaceQuestions[self.count - len(BasicQuestions)]
        elif self.category == "things":
            return ThingsQuestions[self.count - len(BasicQuestions)]

    def logAnswer(self, answer):
        """
        Logs the user's answer and updates the category and count based on the answer.

        Parameters:
        - answer (str): The user's answer, either "yes" or "no".

        Returns:
        None
        """
        answer_yes = True
        if "no" in answer.lower():
            answer_yes = False
        self.log.append(answer_yes)

        self.count += 1

        if self.category == "basic": 
            self.category = "place" if answer_yes else "things"
        elif self.category == "place":
            if answer_yes or self.count == len(BasicQuestions) + len(PlaceQuestions):
                self.enabled = False
        elif self.category == "things":
            if self.count == len(BasicQuestions) + len(ThingsQuestions):
                self.enabled = False

    def reset(self):
        self.log = []
        self.count = 0
        self.enabled = True
        self.category = "basic"


### Prompts

Prompts are reffered from the [Anthropic Prompt Library](https://docs.anthropic.com/en/prompt-library/library)

In [None]:
%%writefile submission/prompts.py

def asker_prompt(category, obs):
    message = []
    
    # System prompt
    ask_prompt = f"""You are a helpful AI assistant with expertise in playing 20 questions game.
Your task is to ask questions to the user to guess the word the user is thinking of.
The keyword is of category: "{obs.category}"
Narrow down the possibilities by asking yes/no questions.
Think step by step and try to ask the most informative questions.
\n"""

    message.append({"role": "system", "content": ask_prompt})

    # Chat history
    for q, a in zip(obs.questions, obs.answers):
        message.append({"role": "assistant", "content": q})
        message.append({"role": "user", "content": a})

    return message


def guesser_prompt(category, obs):
    message = []
    
    # System prompt
    guess_prompt = f"""You are a helpful AI assistant with expertise in playing 20 questions game.
Your task is to guess the word the user is thinking of.
The keyword is of category: "{category}"
Think step by step.
\n"""

    # Chat history
    chat_history = ""
    for q, a in zip(obs.questions, obs.answers):
        chat_history += f"""Question: {q}\nAnswer: {a}\n"""
    prompt = (
            guess_prompt + f"""so far, the current state of the game is as following:\n{chat_history}
        based on the conversation, can you guess the word, please give only the word, no verbosity around"""
    )
    
    
    message.append({"role": "system", "content": prompt})
    
    return message


def answerer_prompt(obs):
    message = []
    
    # System prompt
    prompt = f"""You are a helpful AI assistant with expertise in playing 20 questions game.
Your task is to answer the questions of the user to help him guess the word you're thinking of.
Your answers must be 'yes' or 'no'.
The keyword is: "{obs.keyword}", it is of category: "{obs.category}"
Provide accurate answers to help the user to guess the keyword.
"""

    message.append({"role": "system", "content": prompt})
    
    # Chat history
    message.append({"role": "user", "content": obs.questions[0]})
    
    if len(obs.answers)>=1:
        for q, a in zip(obs.questions[1:], obs.answers):
            message.append({"role": "assistant", "content": a})
            message.append({"role": "user", "content": q})
    
    return message



### Agent

Rule based mode is set to True by default.  
If you want to disable rule based mode, 
uncomment `self.RuleBasedAgent.enabled = False` in `__init__` method.

In [None]:
%%writefile submission/main.py
# comment magic command before simulation

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
import sys

from prompts import *
from rulebased import RuleBasedQuestions

torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)


KAGGLE_AGENT_PATH = "/kaggle_simulations/agent/"
if os.path.exists(KAGGLE_AGENT_PATH):
    MODEL_PATH = os.path.join(KAGGLE_AGENT_PATH, "model")
else:
    MODEL_PATH = "/kaggle/working/submission/model"


model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

# specify end-of-turn tokens for your desired model
terminators = [tokenizer.eos_token_id]

# Additional potential end-of-turn token
# llama3, phi3, gwen2 by order
potential_terminators = ["<|eot_id|>", "<|end|>", "<end_of_turn>"]

for token in potential_terminators:
    token_id = tokenizer.convert_tokens_to_ids(token)
    if token_id is not None:
        terminators.append(token_id)

def generate_response(chat):
    inputs = tokenizer.apply_chat_template(chat, add_generation_prompt=True, return_tensors="pt").to(model.device)
    outputs = model.generate(inputs, max_new_tokens=24, pad_token_id=tokenizer.eos_token_id, eos_token_id=terminators)
    response = outputs[0][inputs.shape[-1]:]
    out = tokenizer.decode(response, skip_special_tokens=True)

    return out


class Robot:
    def __init__(self):
        self.RuleBasedAgent = RuleBasedQuestions()
        
        # To disable the rule-based agent, uncomment the following line
        # self.RuleBasedAgent.enabled = False

    def on(self, mode, obs):
        assert mode in [
            "asking", "guessing", "answering",
        ], "mode can only take one of these values: asking, answering, guessing"
        if mode == "asking":
            # launch the asker role
            output = self.asker(obs)
        if mode == "answering":
            # launch the answerer role
            output = self.answerer(obs)
            if "yes" in output.lower():
                output = "yes"
            elif "no" in output.lower():
                output = "no"
            if "yes" not in output.lower() and "no" not in output.lower():
                output = "yes"
        if mode == "guessing":
            # launch the guesser role
            output = self.guesser(obs)
        return output

    def asker(self, obs):
        # if the rule-based agent is enabled, use it to ask questions
        if self.RuleBasedAgent.enabled:
            output = self.RuleBasedAgent.getQuestion()
            return output
        
        input = asker_prompt(self.RuleBasedAgent.category, obs)
        output = generate_response(input)
        
        return output

    def guesser(self, obs):
        # if the rule-based agent is enabled, log the answer 
        if self.RuleBasedAgent.enabled:
            self.RuleBasedAgent.logAnswer(obs.answers[-1])
        
        input = guesser_prompt(self.RuleBasedAgent.category, obs)
        output = generate_response(input)
        
        return output

    def answerer(self, obs):
        input = answerer_prompt(obs)
        output = generate_response(input)

        return output


robot = Robot()


def agent(obs, cfg):

    if obs.turnType == "ask":
        response = robot.on(mode="asking", obs=obs)

    elif obs.turnType == "guess":
        response = robot.on(mode="guessing", obs=obs)

    elif obs.turnType == "answer":
        response = robot.on(mode="answering", obs=obs)

    if response == None or len(response) <= 1:
        response = "yes"

    return response


## Simulation

### Install pygame

In [None]:
# !pip install pygame

To run game, you need to specify agent. Before execute next cell, excute main.py cell above with commenting `%%writefile -a submission/main.py`

In [None]:
# %%time

# from kaggle_environments import make
# env = make("llm_20_questions", debug=True)
# game_output = env.run(agents=[agent, agent, agent, agent])

In [None]:
# env.render(mode="ipython", width=600, height=500)

## Submit Agent

In [None]:
!apt install pigz pv > /dev/null

In [None]:
!tar --use-compress-program='pigz --fast --recursive | pv' -cf submission.tar.gz -C /kaggle/working/submission .