In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
import sys
import shutil

In [4]:

class CFG:
    NUM_EPOCHS = 1
    BATCH_SIZE = 2
    DROPOUT = 0.05
    MODEL_NAME = "../llama3/Meta-Llama-3-8B/"
    SEED = 2024
    MAX_LENGTH = 128 # truncate the input to save memory, toy implementation only
    NUM_WARMUP_STEPS = 4 # toy implementation. 
    LR_MAX = 5E-5
    NUM_CLASS_LLAMA = 128 # with the llama model, we hope it can generate 128 features, which are combined with tfidf features.
    NUM_LABELS = 3 # The final number of labels
    LORA_RANK = 1 # Toy implementation 
    LORA_ALPHA = 2 # toy implementation
    LORA_MODULES = ['o_proj', 'v_proj']

if torch.cuda.is_available():
    DEVICE = 'cuda'
    print('GPU is used')
else:
    DEVICE = 'cpu'
    print('CPU is used')

GPU is used


In [6]:
AGENT_PATH = "./agent/"

tokenizer = AutoTokenizer.from_pretrained(CFG.MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(CFG.MODEL_NAME, torch_dtype = torch.bfloat16, device_map = "auto")
id_eot = tokenizer.convert_tokens_to_ids(["<|eot_id|>"])[0]


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some parameters are on the meta device device because they were offloaded to the cpu.


In [7]:
def generate_answer(template):
    inp_ids = tokenizer(template, return_tensors = "pt").to("cuda")
    out_ids = model.generate(**inp_ids, max_new_tokesn=15).squeeze()
    start_gen = inp_ids.input_ids.shape[1]
    out_ids = out_ids[start_gen:]
    if id_eot in out_ids:
        stop=out_ids.tolist().idex(id_eot)
        out = tokenizer.decode(out_ids[:stop])
    else:
        out = tokenizer.decode(out_ids)
    return out

In [32]:
class Robot():
    def __init__(self):
        pass

    def on(self,mode,obs):
        assert mode in ["asking", "guessing", "answering"], "mode can only take one of these values: asking, answering, guessing"
        if mode == "asking":
            otuput = self.asker(obs)
        if mode == "answering":
            output = self.answerer(obs)
            if "yes" in output.lower():
                output = "yes"
            elif "no" in output.lower():
                output = "no"
            if ("yes" not in output.lower() and "no" not in output.lower()):
                output = "yes"
        if mode == "guessing":
            output = self.asker(obs)

        return output

    def asker(self,obs):
        sys_prompt = """You are a helpful AI assistant, and your are very smart in playing 20 questions game,
        the user is going to think of a word, it can be only one of the following 3 categories:
        1. a place
        2. a person
        3. a thing
        So focus your area of search on these options. and give smart questions that narrows down the search space\n"""

        if obs.turnType == "ask":
            ask_prompt = sys_prompt + """your role is to find the word by asking him up to 20 questions, your questions to be valid must have only a 'yes' or 'no' answer.
            to help you, here's an example of how it should work assuming that the keyword is Morocco:
            examle:
            <you: is it a place?
            user: yes
            you: is it in europe?
            user: no
            you: is it in africa?
            user: yes
            you: do most people living there have dark skin?
            user: no
            user: is it a country name starting by m ?
            you: yes
            you: is it Morocco?
            user: yes.>

            the user has chosen the word, ask your first question!
            please be short and not verbose, give only one question, no extra word!"""
            chat_template = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{ask_prompt}<|eot_id|>"""
            chat_template += "<|start_header_id|>assistant<|end_header_id|>\n\n"
            if len(obs.questions)>=1:
                for q, a in zip(obs.questions, obs.answer):
                    chat_template += f"{q}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"
                    chat_template += f"{a}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"

        elif obs.turnType == "guess":
            conv = ""
            for q, a in zip(obs.questions, obs.answer):
                conv += f"""Question: {q}\nAnswer: {a}\n"""
            guess_prompt = sys_prompt + f"""so far, the current state of the game is as following:\n{conv}
            based on the conversation, can you guess the word, please give only the word, no verbosity around"""
            chat_template = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{guess_prompt}<|eot_id|>"""
            chat_template += "<|start_header_id|>assistant<|end_header_id|>\n\n"

        output = generate_answer(chat_template)
        return output

    def answerer(self,obs):
        sys_prompt = f"""you are a helpful AI assistant, and your are very smart in playing 20 questions game,
        the role of the user is to guess the word by asking you up to 20 questions, your answers to be valid must be a 'yes' or 'no', any other answer is invalid and you lose the game.
        Know that the user will always guess a word belonging to one of the following 3 categories:
        1. a place
        2. a person
        3. a thing
        so make sure you understand the user's question and you understand the keyword you're playig on.
        for now the word that the user should guess is: "{obs.keyword}", it is of category "{obs.category}",
        to help you, here's an example of how it should work assuming that the keyword is Morocco in the category "place":
        examle:
        <user: is it a place?
        you: yes
        user: is it in europe?
        you: no
        user: is it in africa?
        you: yes
        user: do most people living there have dark skin?
        you: no
        user: is it a country name starting by m ?
        you: yes
        user: is it Morocco?
        you: yes.>"""
        
        chat_template = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{sys_prompt}<|eot_id|>"""
        chat_template += "<|start_header_id|>user<|end_header_id|>\n\n"
        chat_template += f"{obs.questions[0]}<|eot_id|>"
        chat_template += "<|start_header_id|>assistant<|end_header_id|>\n\n"

        if len(obs.answers)>=1:
            for q, a in zip(obs.questions[1:], obs.answers):
                chat_template += f"{a}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"
                chat_template += f"{q}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
        output = generate_answer(chat_template)
        return output


In [33]:
robot = Robot()

In [34]:
class interation():
    turnType = 'ask'
    questions = ["Is this alive?"]
    keyword = "Cat"
    category = "animal"
    answers = ["cat"]
obs = interation()

In [35]:
robot.on("answering",obs)

ValueError: The following `model_kwargs` are not used by the model: ['max_new_tokesn'] (note: typos in the generate arguments will also show up in this list)