In [1]:
%%capture
!pip install -U "transformers>=4.42.3" bitsandbytes accelerate peft

In [2]:
import os
import copy
from dataclasses import dataclass

import numpy as np
import torch
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    Trainer,
    TrainingArguments,
    PreTrainedTokenizerBase
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType

2024-07-31 08:00:11.582798: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-31 08:00:11.582935: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-31 08:00:11.704523: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
@dataclass
class Config:
    output_dir: str = "/kaggle/working/"
    model_name: str = "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit"
    max_length: int = 2048
    optim_type: str = "adamw_8bit"
    per_device_train_batch_size: int =1
    gradient_accumulation_steps: int =1  
    per_device_eval_batch_size: int = 1
    n_epochs: int = 3
    freeze_layers: int = 15
    lr: float = 2e-4
    warmup_steps: int = 20
    lora_r: int = 16
    lora_alpha: float = lora_r * 2
    lora_dropout: float = 0.05
    lora_bias: str = "none"
    
config = Config()

In [4]:
training_args = TrainingArguments(
    output_dir="output",
    overwrite_output_dir=True,
    report_to="none",
    num_train_epochs=config.n_epochs,
    per_device_train_batch_size=config.per_device_train_batch_size,
    gradient_accumulation_steps=config.gradient_accumulation_steps,
    per_device_eval_batch_size=config.per_device_eval_batch_size,
    logging_steps=20,
    eval_strategy="steps",
    eval_steps=250,
    save_strategy="epoch",
    optim=config.optim_type,
    fp16=True,
    learning_rate=config.lr,
    warmup_steps=config.warmup_steps,
)

In [5]:
lora_config = LoraConfig(
    r=config.lora_r,
    lora_alpha=config.lora_alpha,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    layers_to_transform=[i for i in range(40) if i >= config.freeze_layers],
    lora_dropout=config.lora_dropout,
    bias=config.lora_bias,
    task_type="CAUSAL_LM"
)

In [6]:
tokenizer = AutoTokenizer.from_pretrained(config.model_name, add_bos_token = True)

tokenizer_config.json:   0%|          | 0.00/178k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.26M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

In [7]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quanty_type = "fp4", 
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quanty = True,
)

model = AutoModelForCausalLM.from_pretrained(
    config.model_name,
    quantization_config = bnb_config,
    torch_dtype = torch.float16,
    device_map = "auto",
    attn_implementation="eager",
    trust_remote_code = True,
)

Unused kwargs: ['bnb_4bit_quanty_type', 'bnb_4bit_use_double_quanty']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


config.json:   0%|          | 0.00/1.16k [00:00<?, ?B/s]

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


model.safetensors.index.json:   0%|          | 0.00/165k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.31G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [8]:
model.config.use_cache = False
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(131072, 5120)
        (layers): ModuleList(
          (0-14): 15 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): Linear4bit(in_features=5120, out_features=4096, bias=False)
              (k_proj): Linear4bit(in_features=5120, out_features=1024, bias=False)
              (v_proj): Linear4bit(in_features=5120, out_features=1024, bias=False)
              (o_proj): Linear4bit(in_features=4096, out_features=5120, bias=False)
              (rotary_emb): MistralRotaryEmbedding()
            )
            (mlp): MistralMLP(
              (gate_proj): Linear4bit(in_features=5120, out_features=14336, bias=False)
              (up_proj): Linear4bit(in_features=5120, out_features=14336, bias=False)
              (down_proj): Linear4bit(in_features=14336, out_features=5120, bias=False)
              (act_fn): SiLU

In [9]:
class obs(object):
    def __init__(self, keyword, questions, answers, turnType=None, category = None):
        self.turnType = turnType
        self.keyword = keyword
        self.category = category
        self.questions = questions
        self.answers = answers

test_game = obs(
    turnType = 'ask',
    keyword = 'rake',
    category = 'thing',
    questions = ['Is it a place?', 
                 'Is it a man-made thing?',
                 'Is it something found indoors?',
                 'Is it something found in a city?',
                 'Is it used for cooking?',
                 'Is it found in rural areas?'],
    answers = ['no', 'yes','no','no', 'no','yes']
)

test_guesses = ['car', 'bottle', 'subway', 'bus', 'street lamp', 'haystack']

In [10]:
class MistralPromptFormatter:
    def __init__(self, inference_mode = True):
        self.sys_prompt = ("You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. "
                            "The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. "
                            "Then the Guesser tries to guess the keyword based on the questions and answers in the game.\n\n")
        self.inference_mode = inference_mode
        
    def format_instruction(self, instruction):
        return f"[INST]{instruction}[/INST]"

    def ask_format(self, obs, guesses):
        ask_prompt = ("You are playing this game as the Questioner. Ask yes-or-no questions that narrow down what the keyword could be. Start with broad questions "
                      "and based on the answers so far narrow down your questions. The keyword is a specific place or thing. "
                      "DO NOT ask if the keyword is a specific thing or place, rather ask something about the keyword, as in the following examples:\n"
                      "Example 1: Do NOT ask: 'Is it New York city?', INSTEAD ask: 'Is it in the East Coast of the United States?\n"
                      "Example 2: Do NOT ask: 'Is the keyword cow?', INSTEAD ask: 'Is it a specific type or breed of cow?\n"
                      "Example 3: Do NOT ask: 'Is it bottle?', INSTEAD ask: 'Is it a bottle made of a specific material?\n"
                      "Example 4: Do NOT ask: 'Is the keyword lamp?', INSTEAD ask: 'Is it a type of lamp?\n"
                      "Do NOT assume the game has ended, the game will determine when to stop. Instead keep asking questions about the keyword. "
                      "Do not output any text other than the question. Now ask your first question.\n\nQuestion: ")
        formatted_conversation = self.format_instruction(self.sys_prompt + ask_prompt) 
        for i in range(len(obs.questions)):
            formatted_conversation += obs.questions[i] + "</s>"
            if self.inference_mode or i != len(obs.questions)-1: 
                formatted_conversation += '\n' + self.format_instruction(f"Answer: {obs.answers[i]}\nAsk your next question.\nQuestion:")  
        return formatted_conversation.strip()
    
    def guess_format(self, obs, guesses):
        guess_prompt = "You are playing this game as the Guesser. After each question and answer you will guess what the keyword is based on the knowledge you have gained "\
        "from the questions and answers about the keyword. Do NOT give repeated guesses.\n\nRound 1:\nQuestion: " + obs.questions[0]+'\nAnswer: '+ obs.answers[0] +".\nNow guess the keyword."
        formatted_conversation = self.format_instruction(self.sys_prompt + guess_prompt) 
        for i in range(len(guesses)):
            formatted_conversation += guesses[i] + "</s>"
            if self.inference_mode or i < len(guesses)-1: 
                formatted_conversation += '\n' + self.format_instruction(f"Round {i+2}: \nQuestion: "+obs.questions[i+1]+"\nAnswer: "+obs.answers[i+1]+".\n"
                                                                         "Now guess the keyword based on all rounds. Do not give repeated guesses.\nGuess:")  
        return formatted_conversation.strip()
    
    def answer_format(self, obs, guesses):
        answer_prompt = "You are playing this game as the Answerer. You will answer accurately the questions regarding the keyword with ONLY yes or no. "\
        "If the questioner did not ask a question, simply answer 'yes'. "\
        f"For this game the keyword is {obs.keyword}. Now answer the question about the keyword.\nKeyword: {obs.keyword}.\nQuestion: " + obs.questions[0] +"\nAnswer:"
        formatted_conversation = self.format_instruction(self.sys_prompt + answer_prompt) 
        for i in range(len(obs.answers)):
            formatted_conversation += obs.answers[i] + "</s>"
            if i != len(obs.questions)-1: 
                formatted_conversation += '\n' + self.format_instruction(f"Answer the following question about the keyword.\nKeyword: {obs.keyword}. \nQuestion: " + obs.questions[i+1]+"\nAnswer:")  
        return formatted_conversation.strip()

In [11]:
formatter = MistralPromptFormatter(inference_mode = False)

print(formatter.ask_format(test_game, test_guesses))

[INST]You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. Then the Guesser tries to guess the keyword based on the questions and answers in the game.

You are playing this game as the Questioner. Ask yes-or-no questions that narrow down what the keyword could be. Start with broad questions and based on the answers so far narrow down your questions. The keyword is a specific place or thing. DO NOT ask if the keyword is a specific thing or place, rather ask something about the keyword, as in the following examples:
Example 1: Do NOT ask: 'Is it New York city?', INSTEAD ask: 'Is it in the East Coast of the United States?
Example 2: Do NOT ask: 'Is the keyword cow?', INSTEAD ask: 'Is it a specific type or breed of cow?
Example 3: Do NOT ask: 'Is it bottle?', INSTEAD ask: 'Is it a bottle made of a specific material?
Example 4: 

In [12]:
bad_questioner_game= obs(keyword = 'elephant',
                         questions = ['--.',
                                      'I am a glitched questioner.',
                                      'yes',
                                      '',
                                      'hello',
                                      'tell me what the keyword is',
                                      '----->',
                                      '1298mkdako',
                                      'I am a glitched questioner.',
                                      'yes',
                                      '',
                                      ':)',
                                      'Am I playing 20 questions?',
                                      'None',
                                      '',
                                      'hello',
                                      'tell me what the keyword is',
                                      '----->',
                                      '1298mkdako'],
                         answers = ['yes','yes','yes','yes','yes','yes','yes','yes', 'yes','yes','yes','yes','yes','yes','yes','yes','yes','yes','yes'])


bad_guesses = ['no', '--->', '01293mmkl', 'I am a glitched guesser', 'hello world','huehuehue', '', 'no guess','no', '--->', '01293mmkl', 'I am a glitched guesser', 'hello world','huehuehue', '', 'no guess']

print(formatter.answer_format(bad_questioner_game, test_guesses))

[INST]You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. Then the Guesser tries to guess the keyword based on the questions and answers in the game.

You are playing this game as the Answerer. You will answer accurately the questions regarding the keyword with ONLY yes or no. If the questioner did not ask a question, simply answer 'yes'. For this game the keyword is elephant. Now answer the question about the keyword.
Keyword: elephant.
Question: --.
Answer:[/INST]yes</s>
[INST]Answer the following question about the keyword.
Keyword: elephant. 
Question: I am a glitched questioner.
Answer:[/INST]yes</s>
[INST]Answer the following question about the keyword.
Keyword: elephant. 
Question: yes
Answer:[/INST]yes</s>
[INST]Answer the following question about the keyword.
Keyword: elephant. 
Question: 
Answer:[/INST]yes</s>
[I

In [13]:
from datasets import Dataset
import json

def read_jsonl(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            data.append(json.loads(line.strip()))
    return data

# Example usage
file_path = '/kaggle/input/300-gpt-4o-games/games-with-real-keywords-extended.jsonl'
jsonl_data = read_jsonl(file_path)

def standardize_answer(answer):
    if "yes" in answer.lower():
        return "yes"
    else: return "no"
    
for data in jsonl_data:
    data['obs'] = obs(keyword = data['keyword'],
                      questions = data['questions'],
                      answers = [standardize_answer(answer) for answer in data['answers']])
    data['ask_text'] = formatter.ask_format(data['obs'], data['guesses'])
    data['answer_text'] = formatter.answer_format(data['obs'], data['guesses'])
    data['guess_text'] = formatter.guess_format(data['obs'], data['guesses'])

def flatten_dicts(dict_list):
    flattened_list = []
    for d in dict_list:
        for key in d:
            flattened_list.append(d[key])
    return flattened_list

text_dicts = [{'ask_text': data['ask_text'],
              'answer_text': data['answer_text'],
              'guess_text': data['guess_text']} for data in jsonl_data]

flattened_list = flatten_dicts(text_dicts)
flattened_list.append(formatter.answer_format(bad_questioner_game, test_guesses))

ds = Dataset.from_dict({"text": flattened_list})

def prepare_for_causal_lm(example):
    tokenized_inputs = tokenizer(example['text'], truncation=True, max_length=config.max_length)
    tokenized_inputs["labels"] = tokenized_inputs["input_ids"].copy()
    return tokenized_inputs

train_ds = ds.map(prepare_for_causal_lm)

Map:   0%|          | 0/985 [00:00<?, ? examples/s]

In [14]:
print(train_ds[502]['text'])

[INST]You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. Then the Guesser tries to guess the keyword based on the questions and answers in the game.

You are playing this game as the Answerer. You will answer accurately the questions regarding the keyword with ONLY yes or no. If the questioner did not ask a question, simply answer 'yes'. For this game the keyword is seltzer. Now answer the question about the keyword.
Keyword: seltzer.
Question: Is it a place?
Answer:[/INST]no</s>
[INST]Answer the following question about the keyword.
Keyword: seltzer. 
Question: Is it broadly related to food, drinks or cooking?
Answer:[/INST]yes</s>
[INST]Answer the following question about the keyword.
Keyword: seltzer. 
Question: Is it broadly related to agriculture or industry?
Answer:[/INST]no</s>
[INST]Answer the following question a

In [15]:
print(train_ds[-1]['input_ids'])

[1, 3, 4568, 1584, 1420, 26554, 27089, 12440, 1278, 1032, 1050, 1048, 58285, 5079, 1046, 1656, 1593, 5079, 1278, 3450, 1258, 1395, 4265, 1261, 9452, 35688, 1046, 1531, 30190, 1258, 2430, 25747, 14842, 47748, 42737, 8352, 12522, 1278, 35688, 1044, 1321, 1278, 3450, 1258, 16897, 2424, 32181, 1046, 6830, 1278, 4414, 22651, 28097, 1317, 12185, 1278, 35688, 4057, 1408, 1278, 8352, 1321, 16897, 1294, 1278, 5079, 1338, 4568, 1584, 12440, 1593, 5079, 1435, 1278, 3450, 1258, 1046, 3213, 2084, 4832, 32181, 1278, 8352, 12522, 1278, 35688, 1454, 101803, 14842, 1505, 1836, 1046, 3367, 1278, 4098, 1258, 2697, 1605, 4237, 1261, 4098, 1044, 8617, 4832, 1576, 13059, 9528, 2898, 1593, 5079, 1278, 35688, 1395, 90344, 1046, 9032, 4832, 1278, 4098, 2314, 1278, 35688, 1626, 74929, 1058, 90344, 1626, 25229, 1058, 4254, 1626, 31106, 1058, 4, 13059, 2, 1010, 3, 31106, 1278, 3629, 4098, 2314, 1278, 35688, 1626, 74929, 1058, 90344, 1046, 1032, 1010, 25229, 1058, 1362, 1855, 1261, 3008, 30082, 4098, 1258, 1626, 3

In [16]:
model.print_trainable_parameters()

trainable params: 12,288,000 || all params: 12,260,070,400 || trainable%: 0.1002


In [17]:
trainer = Trainer(
    args=training_args, 
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_ds,
    eval_dataset=train_ds
)
trainer.train()



Step,Training Loss,Validation Loss
250,0.47,0.459836
500,0.4784,0.439549
750,0.4507,0.424181
1000,0.3982,0.407406
1250,0.4151,0.39465
1500,0.4005,0.382551
1750,0.3658,0.372607
2000,0.3562,0.362397
2250,0.3431,0.356532
2500,0.3648,0.350772




TrainOutput(global_step=2955, training_loss=0.41142536202057967, metrics={'train_runtime': 40896.3218, 'train_samples_per_second': 0.072, 'train_steps_per_second': 0.072, 'total_flos': 1.5021990462357504e+17, 'train_loss': 0.41142536202057967, 'epoch': 3.0})