<a href="https://www.kaggle.com/code/matthewsfarmer/solution-l3-1-8b-inst-kraut?scriptVersionId=193016878" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import subprocess

def execute_command(cmd):
    result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    if result.returncode == 0:
        print(f'Work Complete! {cmd} ')
    else:
        print(f'We are being attacked!! {cmd}')
        print(result.stdout.decode('utf-8'))

execute_command("huggingface-cli download VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct --local-dir /tmp/submission/")
execute_command("pip install -t /tmp/submission/lib bitsandbytes outlines")
execute_command("pip install bitsandbytes outlines")

Work Complete! huggingface-cli download VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct --local-dir /tmp/submission/ 
Work Complete! pip install -t /tmp/submission/lib bitsandbytes outlines 
Work Complete! pip install bitsandbytes outlines 


In [8]:
import json
with open("/tmp/submission/config.json", "r") as file:
    config = json.load(file)
config["rope_scaling"] = {"factor": 4.0,"type":"dynamic"}
with open("/tmp/submission/config.json", "w") as file:
    json.dump(config, file)

In [13]:
#%%writefile main.py

# ====================ENV PATH========================

import os
import sys

KAGGLE_AGENT_PATH = "/kaggle_simulations/agent/"
if not os.path.exists(KAGGLE_AGENT_PATH):
    KAGGLE_AGENT_PATH = "/tmp/submission/"

if os.path.exists(KAGGLE_AGENT_PATH):
    sys.path.insert(0, os.path.join(KAGGLE_AGENT_PATH, 'lib'))
else:
    sys.path.insert(0, "/tmp/submission/lib")
    
# ====================IMPORTS=========================

import json
import itertools
import re
import random
from enum import Enum
from pathlib import Path
from typing import List
import typing as t

import torch
from pydantic import BaseModel, Field
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

import outlines
from outlines import generate, samplers

# ====================CONSTANTS========================

device = "cuda" if torch.cuda.is_available() else "cpu"
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True, 
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)
model_kwargs = {
    "quantization_config": quantization_config, 
    "trust_remote_code": True,
}

system_token = '<|begin_of_text|><|start_header_id|>system<|end_header_id|>'
user_token = '<|start_header_id|>user<|end_header_id|>'
assistant_token = '<|start_header_id|>assistant<|end_header_id|>'
eot = '<|eot_id|>'

# ====================INITIALIZATION========================

if 'model' in locals() and model is not None:
    print("Model already loaded. Skipping creation.")
else:    
    model = outlines.models.transformers(KAGGLE_AGENT_PATH,device=device, model_kwargs=model_kwargs)
    
# =====================CLASSES=========================

class Observation(BaseModel):
    step: int
    role: t.Literal["guesser", "answerer"]
    turnType: t.Literal["ask", "answer", "guess"]
    keyword: str
    category: str
    questions: list[str]
    answers: list[str]
    guesses: list[str]

    @property
    def empty(self) -> bool:
        return all(len(t) == 0 for t in [self.questions, self.answers, self.guesses])

    def get_history(self) -> t.Iterator[tuple[str, str, str]]:
        return itertools.zip_longest(self.questions, self.answers, self.guesses, fillvalue="[none]")

    def qa_history_as_numbered_list(self) -> str:
        if not self.empty:
            history = "\n".join(
                f"{i + 1}. {question} : {answer}"
                for i, (question, answer, guess) in enumerate(self.get_history())
            )
            return history
        return "none yet."
    
    def get_list_of_guesses(self) -> str:
        return ', '.join(self.guesses)
    
    def get_qa_as_dict(self) -> dict:
        return {
            "step": self.step,
            "questions": self.questions,
            "answers": self.answers
        }
    

    
class YesNoEnum(str, Enum):
    yes = "yes"
    no = "no"
    
class Answer(BaseModel):
    internal_thoughts: str = Field(
        description="A short statement explaining the reasoning for your answer to the question.",
        example= "My keyword is cinnamon roll, since the question asked is it a living thing? my answer is no. A cinnamon roll is not a living thing",
    )
    answer: YesNoEnum

class Question(BaseModel):
    internal_thoughts: str = Field(
        description="A short statement deductively summarizing the game history and coming to a conclusion on the next question to ask",
        example="We know that the keyword is a living thing and a pet, so the question 'is it a mammal?' further narrows the possibilities.",
    )
    question: str = Field(
        description="A simple yes/no question. Ending in a question mark.", 
        example="Is it a living thing?", 
        max_length= 150
    )
    probable_secret_keyword: str = Field(
        description="The most probable keyword, based on the game history", 
        example="purse",  
    )

class Guess(BaseModel):
    internal_thoughts: str = Field(
        description="A short statement reviewing the game history and coming to a conclusion on most probably keyword", 
        example="We know that the keyword is a living thing and a pet, so dog would be a likely keyword", 
    )
    guess: str = Field(
        description=" This is the most probable keyword based on the current game history and does not repeat previous keywords", 
        example = "jellybean", 
        max_length=50
    )
 
    
    
# =====================QUESTIONER AGENT=========================

def ask(observation):
    try:
        qa_hx = observation.qa_history_as_numbered_list()
        prompt = questioner(qa_hx)
        sampler = samplers.multinomial(1, temperature=0.45, top_p=0.95)
        q_generator = generate.json(model, Question, sampler=sampler)
        output = q_generator(prompt, max_tokens=550)
        q_dict = output.dict()
        question = q_dict["question"]
        return question 
    except Exception as e:
        print(e)
        return "Is the keyword just a single word?"

def questioner(qa_hx, system_token=system_token, user_token=user_token, assistant_token=assistant_token, eot=eot):
    return f"""
{system_token}
You are the questioner in a game of 20 Questions. Your goal is to ask questions that help discover the identity of the secret keyword by asking yes/no questions.
The user is thinking of a keyword. The keyword will not be a person or geography (city, country, mountain, river, etc.)

As questioner, here the optimal strategy you should follow:

- Start Broad: Begin with questions that can divide the category into large, distinct groups. This helps to halve the possible options quickly.
- Use Binary Splitting: Ask questions that roughly split the remaining possibilities in half to narrow down the options more efficiently.
- Narrow Down Attributes: Once you have a smaller subset, focus on specific attributes to further narrow down the options.
- Keep Track: You will receive a question and answer history to track the game progress.
- Balance Specificity and Breadth: Avoid questions that are too narrow too early, as they might not be as effective in eliminating large groups of possibilities.
- Adapt Based on Answers: Be flexible and ready to change your line of questioning based on the answers you receive. If you are getting many no's, consider changing your approach and backtracking to a previous question/reasoning that led to a yes.
- Avoid Redundancy: Make sure each question adds new information and doesn't rehash what you already know.
{eot}
{user_token}
Game History:

{qa_hx}

{eot}
{assistant_token}
"""


# =====================ANSWERER AGENT=========================

def answer(observation):
    try:
        last_question = observation.questions[-1]
        keyword = observation.keyword
        category = observation.category
        prompt = answerer(last_question, keyword, category)
        sampler = samplers.multinomial(1, temperature=0.25, top_p=0.95)
        a_generator = generate.json(model, Answer, sampler=sampler)
        output = a_generator(prompt, max_tokens=200)
        a_dict = output.dict()
        answer = a_dict["answer"]
        answer = answer.value
        return answer
    except Exception as e:
        print(f"Error: {e}")
        return "no"


def answerer(question, keyword, category, system_token=system_token, user_token=user_token, assistant_token=assistant_token, eot=eot):
    return f"""
{system_token}
You are an AI designed to truthfully answer yes/no questions about a secret keyword in the game 20 questions.
You are the answerer. You answer the user's questions about the secret keyword. 
If the question is nonsensical or not a valid question, you respond with no.
{eot}
{user_token}
Here is the question to be answered:

"{question}"

This is your keyword: {keyword}

Think about the question in context with the keyword and category to form your yes or no answer.
Answer using the most common understanding of the keyword, do not use an obscure context or far-fetched assumption of the keyword that may not be well-known.
Is the answer yes or no? Provide your reasoning for your answer. 
{eot}
{assistant_token}
"""

# =====================GUESSER AGENT=========================
def guess(observation):
    if len(observation.guesses) == 0:
        guess_hx = "no guess yet"
    else: 
        guess_hx = observation.get_list_of_guesses()
    try:    
        qa_hx = observation.qa_history_as_numbered_list()
        prompt = guesser(qa_hx, guess_hx)
        sampler = samplers.multinomial(1, temperature=0.4, top_p=0.95)
        g_generator = outlines.generate.json(model, Guess, sampler=sampler)
        output = g_generator(prompt, max_tokens=550)
        g_dict = output.dict()
        guess = g_dict["guess"]
        return guess.lower().replace("_", " ").strip()
    except Exception as e:
        print(f"Error: {e}")
        return "I think I need more information..."

def guesser(qa_hx, guess_hx, system_token=system_token, user_token=user_token, assistant_token=assistant_token, eot=eot):   
    return f"""
{system_token}
You are an intelligent agent tasked with guessing the keyword in a game of 20 Questions. 
The keyword belongs to one of the following categories but can overlap into multiple categories: 
home items, furniture, lifestyle, hygiene/beauty, technology/electronics, safety/secrurity, health/medical, tools/equipment, professional/office items, transportation/vehicles, food/drink, outdoors, living things (pets, animals, plants, etc.), non-living things found in nature, or toys/games/sports. 

The keyword will not be a person or a geographical location (such as cities, countries, or natural landmarks) and will never be longer than 3 words.

Guidelines:
    - Use the provided question and answer history to narrow down the possibilities.
    - Apply logical deduction to eliminate options that do not fit the answers.
    - Consider the categories and attributes that have been confirmed or ruled out by the questions.
    - Your output should be a single guess of the keyword, based on the history provided.
{eot}
{user_token}
Here is the game history:

{qa_hx}

Previously incorrect guesses: 

{guess_hx}

Strategies:
- The keyword is likely to be a common thing that would be known by most people.
- In early game, you will probably be making a wild guess. That's okay. 
- As the game progresses, and you become more confident in your guess, try variations of similar things to form new guesses.
- Don't repeat your guesses, since we know that they were incorrect.
- Just because the keyword was incorrect does not mean that the category the keyword belongs to is ruled out.
    - For example if the secret keyword is oak tree and the guess was tree, the keyword was not guessed correctly but it is on the right track!
- Guesses should deploy a wide variety of categories and vocabularies from common, less common, and even obscure words or multi-word things. 
    - For example a secret keyword could be: "cookie"(common food) or "mattress"(less common home item/furniture), or "turnstiles"(obscure man-made object), or "hot dog" (multi-word food)

What is our next guess that hasn't been used before and your reasoning process?
{eot}
{assistant_token}
"""
    
# ======================GAME FUNCTIONS========================
agent = None

def observe(obs: t.Any) -> str:
    global agent
    observation = Observation(**obs.__dict__)

    try:
        match observation.turnType:
            case "ask":
                agent = ask(observation)
                return agent
            case "answer":
                agent = answer(observation)
                return agent
            case "guess":
                agent = guess(observation)
                return agent

            case _:
                raise ValueError("Unknown turn type")
    except Exception as e:
        print(str(e), file=sys.stderr)
        raise

def agent_fn(obs: t.Any, _: t.Any) -> str:
    return observe(obs)

Model already loaded. Skipping creation.


In [None]:
import requests
import json
import re
import typing as t
import random
import time

# List of keywords to be used for testing
keywords_list = [
    'alligator clip', 'armadillo', 'barracks', 'beech', 'boardwalk', 'bobcat', 'bubble', 'bucket', 'bunker', 'camera',
    'ceiling fan', 'celery', 'court file', 'cow', 'cruiser bike', 'cupping', 'door handle', 'emergency siren', 'eyelash',
    'fabric glue', 'fawn', 'garage', 'garlic', 'glacier', 'golf', 'guinea pig', 'hairpin', 'jug', 'kart', 'lilypad',
    'lobster', 'meringue', 'neon', 'orchid', 'peach', 'plate', 'plier', 'poster', 'pufferfish', 'railway', 'rat', 'sea',
    'spanish moss', 'stair', 'steel', 'styrofoam', 'terrace', 'trout', 'underwire', 'wallet'
]

class MockObservation:
    def __init__(self, step: int, role: str, turnType: str, keyword: str, category: str, questions: list[str], answers: list[str], guesses: list[str]):
        self.step = step
        self.role = role
        self.turnType = turnType
        self.keyword = keyword
        self.category = category
        self.questions = questions
        self.answers = answers
        self.guesses = guesses

def play_game(keyword):
    step = 0
    role = "answerer"
    turnType = "ask"
    category = "thing"
    questions = []
    answers = []
    guesses = []
    print("Starting 20 questions eval game...")
    print("Keyword:", keyword)

    for i in range(60):
        obs = MockObservation(step, role, turnType, keyword, category, questions, answers, guesses)

        start_time = time.time()
        response = agent_fn(obs, None)
        end_time = time.time()

        response_time = end_time - start_time

        # Record the response in the appropriate list
        if turnType == 'ask':
            questions.append(response)
            turnType = 'answer'
        elif turnType == 'answer':
            answers.append(response)
            turnType = 'guess'
        elif turnType == 'guess':
            guesses.append(response)
            if response.lower() == keyword.lower():
                print(f"Keyword '{keyword}' guessed correctly! Ending game.")
                return True
            turnType = 'ask'
            step += 1
            role = 'guesser' if role == 'answerer' else 'answerer'

        print(f"Round {step}: {response} {response_time:.2f} sec")
    return False

# Run the test for each keyword and calculate the win rate
correct_guesses = 0
total_games = len(keywords_list)

for keyword in keywords_list:
    if play_game(keyword):
        correct_guesses += 1

win_rate = (correct_guesses / total_games) * 100
print(f"Win Rate: {win_rate:.2f}%")

Starting 20 questions eval game...
Keyword: alligator clip


Compiling FSM index for all state transitions: 100%|██████████| 990/990 [01:26<00:00, 11.45it/s]
We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
2024-08-17 22:44:00.818202: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-17 22:44:00.818308: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-17 22:44:01.099650: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Round 0: Is the keyword something man-made? 139.53 sec
