This notebook illustrates the agent creation process for the **LLM 20 Questions**. Running this notebook produces a `submission.tar.gz` file. You may submit this file directly from the **Submit to competition** heading to the right. Alternatively, from the notebook viewer, click the *Output* tab then find and download `submission.tar.gz`. Click **Submit Agent** at the upper-left of the competition homepage to upload your file and make your submission. 

In [1]:
%%bash
cd /kaggle/working
pip install -q -U -t /kaggle/working/submission/lib immutabledict sentencepiece
git clone https://github.com/google/gemma_pytorch.git > /dev/null
mkdir -p /kaggle/working/submission/lib/gemma/
mv -n /kaggle/working/gemma_pytorch/gemma/* /kaggle/working/submission/lib/gemma/
echo "done"

Cloning into 'gemma_pytorch'...


done


In [2]:
%%writefile submission/main.py
import os
import sys

# **IMPORTANT:** Set up your system path like this to make your code work
# both in notebooks and in the simulations environment.
KAGGLE_AGENT_PATH = "/kaggle_simulations/agent/"
if os.path.exists(KAGGLE_AGENT_PATH):
    sys.path.insert(0, os.path.join(KAGGLE_AGENT_PATH, 'lib'))
else:
    sys.path.insert(0, "/kaggle/working/submission/lib")
    
import contextlib
from pathlib import Path

import torch
from gemma.config import get_config_for_7b, get_config_for_2b
from gemma.model import GemmaForCausalLM
import re

if os.path.exists(KAGGLE_AGENT_PATH):
                                                    
    WEIGHTS_PATH = os.path.join(KAGGLE_AGENT_PATH, "gemma/pytorch/7b-it-quant/2")
else:
    WEIGHTS_PATH = "/kaggle/input/gemma/pytorch/7b-it-quant/2"

# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"

# Load the model           
@contextlib.contextmanager
def _set_default_tensor_type(dtype: torch.dtype):
  """Sets the default torch dtype to the given dtype."""
  torch.set_default_dtype(dtype)
  yield
  torch.set_default_dtype(torch.float)
    
### Utils function
def extract_bold_text(text):
  pattern = r"[\*|\"]+(.*?)[\*|\"]+"
  matches = re.findall(pattern, text)
  to_ret = matches[0] if len(matches) > 0 else text
  parts = to_ret.split(":") 
  to_ret = parts[1].strip() if len(parts) > 1 else to_ret
  return to_ret

class BaseAgent:
    def __init__(self, variant, device_str):
        self._variant = variant
        self._device = torch.device(device_str)
        self._weigts_path = WEIGHTS_PATH

        model_config = get_config_for_2b() if "2b" in self._variant else get_config_for_7b()
        model_config.tokenizer = os.path.join(self._weigts_path, "tokenizer.model")
        model_config.quant = "quant" in variant

        with _set_default_tensor_type(model_config.get_dtype()):
          self._model = GemmaForCausalLM(model_config)
          ckpt_path = os.path.join(self._weigts_path, f'gemma-{variant}.ckpt')
          self._model.load_weights(ckpt_path)
          self._model = self._model.to(self._device).eval()
            

    def get_response_from_llm(self, obs, text): 
        CHAT_TEMPLATE = "<start_of_turn>user\n{prompts}<end_of_turn>\n<start_of_turn>model\n"

        sampler_kwargs = {
            'temperature': 0.01,
            'top_p': 0.1,
            'top_k': 1,
        }
        
        prompts = CHAT_TEMPLATE.format(prompts=text)
        
        try:
            resp = self._model.generate(prompts, device=self._device, output_len=150, **sampler_kwargs)
            print(f"""prompts: {prompts}\nresp: {resp}""")
        except RuntimeError as e:
            resp = "**no**"
        
        resp = resp.replace("**Sure, here's the question:**", "")
        resp = resp.replace("Sure, here's the conversion:", "")
        resp = resp.replace("Sure, ", "")
        resp = resp.replace("here's the ", "")
        resp = resp.replace("question:", "")
        resp = resp.replace("conversion:", "")
        resp = resp.replace("**Guess:**", "")
        resp = resp.replace("**Anwser:**", "")
        resp = resp.replace("**Question:**", "")

        return extract_bold_text(resp)

class Questioner(BaseAgent):
    def __init__(self, variant, device_str):
        super().__init__(variant, device_str)
        self._information = ""
    
    def call(self, obs):
        prompts = """You are a AI assistant in the 20 Questions game. The questioner "asks" the yes-or-no-question to identify the secret, and "guesses" to identify the secret word. The answerer reply only "yes or "no". The secret could be a person, a place / geographic location, or a physical object in the world."""

        if obs.turnType == 'guess':
            if obs.step > 0:
                self._information = f"""Previous question: {obs.questions[-1]}. The anwser is: {obs.answers[-1]}"""
#                 self._information = super().get_response_from_llm(obs,
#                     f"""Convert the yes-or-no question to declarative sentence, for example: "Is it a cat?", "Yes". The declarative sentence would be "It is a cat". Here is the yes-or-no question from previous round: \"{obs.questions[-1]}\",  \"{obs.answers[-1]}\"."""
#                 )
            prompts += f"""
Now, you are the guesser. Use this answer to guess at what that object might be. The game is over when you guess correctly the object or place or person the answerer is thining of.
Note:
- The guess "MUST" be the keyword only. DO NOT provider the reason.
- Example "**Grand Palace**".
- The guess must less than 100 charactors. Wrapped the question within **.
{self._information}
Now guess for the keyword.
"""
            return super().get_response_from_llm(obs, prompts)

        elif obs.turnType == 'ask':
            prompts += f"""
Now, you are the questioner. It is best to ask questions that are as broad as possible; i.e. if the answer to the question is \"No\", it means that millions of possibilities can be eliminated. 
The goal of the questions should be to identify what category the object or place in question is. All questions asked should pertain to ascertaining the category of the item.
Once the category has been ascertained, then use the questions and answers already given to ask subsequent questions, that will further eliminate possibilities.
Note:
-The question must not have any text before or after.
-The question must less than 100 charactors. Wrapped the question within **.
"""
                       
            if obs.step == 0:
                prompts += """
BEGIN EXAMPLE
 - Is it a man-made?
 - Is its size bigger than a house?
 - Is it used in the kitchen?
 - Is it the place in Europe?
END EXAMPLE"""

            else:
                prompts += f"""Note: {self._information}"""

            return super().get_response_from_llm(obs, prompts)
                

class Answerer(BaseAgent):
    def __init__(self, variant, device_str):
        super().__init__(variant, device_str)
        self._information = ""
    
    def call(self, obs):
        prompts = f"""You are a helpful AI assistant in the 20 Questions game. The questioner "asks" the yes-or-no-question and "guesses" to identify the secret word. The answerer reply only "yes or "no". The keyword is a thing.
        You are the answerer, Yes or no if the question: \"{obs.questions[-1]}\", is the right answer for \"{obs.keyword}\".
        Note:
 -Answer only yes or no.
 -Example: no
 -Wrapped the answer within **."""
        
        response =  super().get_response_from_llm(obs, prompts)
       
        if response == None:
            response = "no"

        response = response.replace(".", "")
        if response not in ["yes", "no"]: 
            response = "no"
        
        return response
                
############################################################
agent = None

VARIANT = "7b-it-quant"
# VARIANT = "2b-it"
DEVICE = "cuda"
# DEVICE = "cpu"

# DEVICE =  "cuda:0" if torch.cuda.is_available() else "cpu"

def get_agent(name):
    global agent

    if agent is None and name == 'questioner':
        agent = Questioner(variant=VARIANT, device_str=DEVICE)
    elif agent is None and name == 'answerer':
        agent = Answerer(variant=VARIANT, device_str=DEVICE)

    assert agent is not None, "Agent not initialized."
    return agent


def agent_fn(obs, cfg):
    print(f"""--obsinfo: {obs}""")
    selected_agent = None
    if obs.turnType == "ask":
        selected_agent = get_agent("questioner")
    elif obs.turnType == "guess":
        selected_agent = get_agent("questioner")
    else:
        selected_agent = get_agent("answerer")
        
    try:
        response = selected_agent.call(obs)
    except Exception as e:
        print(f"error: {e}")
        response = "no"

    if response is None or len(response) <= 1: return "no" 
    else: return response

Writing submission/main.py


In [3]:
# %run submission/main.py

# def agent_fn(obs, cfg):
#     if obs.turnType == "ask": response = "Is it a pig?"
#     elif obs.turnType == "guess": response = "pig"
#     elif obs.turnType == "answer": response = "yes"
#     return response

# debug_config = {'episodeSteps':20,     # initial step plus 3 steps per round (ask/answer/guess)
#                 'actTimeout': 120,       # agent time per round in seconds; default is 60
#                 'runTimeout': 1200,      # max time for the episode in seconds; default is 1200
#                 'agentTimeout': 3600}  # obsolete field; default is 3600
# from kaggle_environments import make
# # env = make("llm_20_questions", debug=True, configuration=debug_config)
# env = make("llm_20_questions", debug=True)
# agent = "/kaggle/working/submission/main.py"
# env.reset()
# logs = env.run([agent, agent_fn, agent_fn, agent_fn])
# #while not env.done: #add steps here for testing
# env.render(mode="ipython", width=800, height=800)

In [4]:
# def simple_agent(obs, cfg):
#     if obs.turnType == "ask": response = "Is it a pig?"
#     elif obs.turnType == "guess": response = "pig"
#     elif obs.turnType == "answer": response = "yes"
#     return response

# from kaggle_environments import make
# # For debugging, play game with only two rounds
# debug_config = {'episodeSteps': 31,     # initial step plus 3 steps per round (ask/answer/guess)
#                 'actTimeout': 60,       # agent time per round in seconds; default is 60
#                 'runTimeout': 1200,      # max time for the episode in seconds; default is 1200
#                 'agentTimeout': 3600}  # obsolete field; default is 3600

# env = make("llm_20_questions", configuration=debug_config, debug=True)

# print("start.....")
# game_output = env.run(agents=[agent_fn, agent_fn, agent_fn, agent_fn])
# print("finish....")
# env.render(mode="ipython", width=700, height=700)

In [5]:
!apt install pigz pv > /dev/null





In [6]:
!tar --use-compress-program='pigz --fast --recursive | pv' -cf submission.tar.gz -C /kaggle/working/submission . -C /kaggle/input/ gemma/pytorch/7b-it-quant/2


