In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
! pip install -U accelerate
! pip install -U bitsandbytes

In [None]:
!pip install --upgrade transformers

In [None]:
%pip install \
    torch==2.2.2 \
    tensorboard


In [None]:
%pip install \
    datasets \
    evaluate \
    trl \
    peft 

In [None]:
import torch
print(torch.__version__)

In [None]:
#!mkdir /kaggle/working/model

In [None]:
#!tar -xzvf /kaggle/input/llm-20-questions-v2-submission/submission.tar.gz -C /kaggle/working/model/

## Just for testing

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, BitsAndBytesConfig, GenerationConfig
import torch

In [None]:
MODEL_PATH = "microsoft/Phi-3-mini-4k-instruct"


torch_dtype = torch.float16
quant_storage_dtype = torch.float16

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quanty_type = "fp4", 
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quanty = True,
)
model = AutoModelForCausalLM.from_pretrained(
        MODEL_PATH,
        quantization_config = bnb_config,
        torch_dtype=torch.float16,
        trust_remote_code=True,
        use_auth_token="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"
    )
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH,use_auth_token="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")

In [None]:
model.save_pretrained("/kaggle/working/submission/model")
tokenizer.save_pretrained("/kaggle/working/submission/model")

In [None]:
%%writefile llama_3_70b_fsdp_qlora.yaml
# script parameters
# script parameters
# training parameters
output_dir: "/kaggle/working/Questioner-robot" # Temporary output directory for model checkpoints      # report metrics to tensorboard
learning_rate: 0.0002                  # learning rate 2e-4
lr_scheduler_type: "constant"          # learning rate scheduler
num_train_epochs: 2                    # number of training epochs
per_device_train_batch_size: 1         # batch size per device during training
per_device_eval_batch_size: 1          # batch size for evaluation
gradient_accumulation_steps: 2         # number of steps before performing a backward/update pass
optim: adamw_torch                     # use torch adamw optimizer
logging_steps: 10                      # log every 10 steps
save_strategy: epoch                   # save checkpoint every epoch
evaluation_strategy: epoch             # evaluate every epoch
max_grad_norm: 0.3                     # max gradient norm
warmup_ratio: 0.03                     # warmup ratio
# FSDP parameters: https://huggingface.co/docs/transformers/main/en/fsdp
fsdp: "full_shard auto_wrap offload" # remove offload if enough GPU memory
fsdp_config:
  backward_prefetch: "backward_pre"
  forward_prefetch: "false"
  use_orig_params: "false"

In [None]:
%%writefile training.py

import torch
from dataclasses import dataclass, field
from transformers import AutoTokenizer, AutoModelForCausalLM,AutoModelForSeq2SeqLM,  BitsAndBytesConfig
from sklearn.model_selection import train_test_split
import pandas as pd
from datasets import load_from_disk
from transformers import GenerationConfig, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, TaskType
import time
from trl import SFTTrainer, SFTConfig
from trl.commands.cli_utils import  TrlParser
import logging
from dataclasses import dataclass, field
import os
import random
from sklearn.model_selection import train_test_split

def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"


@dataclass
class ScriptArguments:
    dataset_path: str = field(
        default=None,
        metadata={
            "help": "Path to the dataset"
        },
    )
    model_id: str = field(
        default=None, metadata={"help": "Model ID to use for SFT training"}
    )
    max_seq_length: int = field(
        default=512, metadata={"help": "The maximum sequence length for SFT Trainer"}
    )
        
def train_model(training_args):
    torch.backends.cuda.enable_mem_efficient_sdp(False)
    torch.backends.cuda.enable_flash_sdp(False)

    MODEL_PATH = "google/flan-t5-base"
  
    
    config_dict = {
        "output_dir": "/kaggle/working/Questioner-robot", 
        "learning_rate": 0.0002,    
        "report_to":"none",
        "num_train_epochs": 2,                    
        "per_device_train_batch_size": 1,         
        "per_device_eval_batch_size": 1,          
        "optim": "adamw_torch",                   
        "logging_steps": 10,                      
        "save_strategy": "epoch",                 
        "evaluation_strategy": "epoch",           
        "max_grad_norm": 0.3,                     
        "warmup_ratio": 0.03,                     
        "packing": "true",
        "fp16": "true",
        "fsdp": "full_shard auto_wrap offload",
        "fsdp_config": {
            "backward_prefetch": "backward_pre",
            "forward_prefetch": "false",
            "use_orig_params": "false"
        }
    }

    sft_config = SFTConfig(**config_dict)
    
    torch_dtype = torch.float16
    quant_storage_dtype = torch.float16
    
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch_dtype,
        bnb_4bit_quant_storage=quant_storage_dtype,
    )
    model = AutoModelForSeq2SeqLM.from_pretrained(
        MODEL_PATH,
        quantization_config = bnb_config,
        torch_dtype=quant_storage_dtype,
        trust_remote_code=True,
        use_auth_token="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"
    )
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH,use_auth_token="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")
    tokenizer.pad_token = tokenizer.eos_token
    
    
    datadf= pd.read_csv('/kaggle/input/llm-20-keywords/datadf.csv')
    shuffled_datadf = datadf.sample(frac=1,random_state=42).reset_index(drop=True)

    train_df, val_df = train_test_split(shuffled_datadf, test_size=0.1, random_state=42)

    from datasets import Dataset
    train_dataset = Dataset.from_pandas(train_df)
    val_dataset =  Dataset.from_pandas(val_df)
    
    def tokenize_function(example):
        prompt = example["prompt"]
        example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
        example['labels'] = tokenizer(example["question"], padding="max_length", truncation=True, return_tensors="pt").input_ids

        return example
    tokenized_datasets = train_dataset.map(tokenize_function, batched=True)
    tokenized_datasets = tokenized_datasets.remove_columns(['prompt','question'])

    tokenized_val_datasets = val_dataset.map(tokenize_function, batched=True)
    tokenized_val_datasets = tokenized_val_datasets.remove_columns(['prompt','question'])
#     for name, param in model.named_parameters():
#         if param.dtype == torch.float32:
#             param.data = param.data.to(torch.bfloat16)

#     tokenized_datasets = load_from_disk("/kaggle/input/tokenizedataset")
#     tokenized_val_datasets = load_from_disk("/kaggle/input/validationtokenizedataset")


    lora_config = LoraConfig(
        r=2, # Rank
        lora_alpha=1,
        target_modules=["q", "v"],
        lora_dropout=0.1,
        bias="none",
        task_type=TaskType.SEQ_2_SEQ_LM  # FLAN-T5
    )

    output_dir = f'/kaggle/working/peft-Questioner-training-{str(int(time.time()))}'
    
    trainer = SFTTrainer(
        model=model,
        args=sft_config,
        train_dataset=tokenized_datasets,
        eval_dataset=tokenized_val_datasets,
        peft_config=lora_config,
        max_seq_length=3000,
        tokenizer=tokenizer,
        dataset_kwargs={
            "add_special_tokens": False,  # We template with special tokens
            "append_concat_token": False,  # No need to add additional separator token
        },
    )
    
    for param in trainer.model.parameters():
        param.data = param.data.to(torch.float16)
    if trainer.accelerator.is_main_process:
        trainer.model.print_trainable_parameters()

    ##########################
    # Train model
    ##########################
    checkpoint = None
    if training_args.resume_from_checkpoint is not None:
        checkpoint = training_args.resume_from_checkpoint
    trainer.train(resume_from_checkpoint=checkpoint)

    ##########################
    # SAVE MODEL FOR SAGEMAKER
    ##########################
    if trainer.is_fsdp_enabled:
        trainer.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
    trainer.save_model()

if __name__ == "__main__":
    parser = TrlParser((ScriptArguments, TrainingArguments))
    script_args, training_args = parser.parse_args_and_config()    
    train_model(training_args)

In [None]:
import torch
from dataclasses import dataclass, field
from transformers import AutoTokenizer, AutoModelForCausalLM,  BitsAndBytesConfig
from sklearn.model_selection import train_test_split
import pandas as pd
from datasets import load_from_disk
from transformers import GenerationConfig, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, TaskType
import time
from trl import SFTTrainer, SFTConfig
from trl.commands.cli_utils import  TrlParser
import logging
from dataclasses import dataclass, field
import os
import random

In [None]:
torch_dtype = torch.float16
quant_storage_dtype = torch.float16

In [None]:
 MODEL_PATH = "facebook/opt-350m"

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_quant_storage=quant_storage_dtype,
)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    quantization_config = bnb_config,
    torch_dtype=quant_storage_dtype,
    attn_implementation="sdpa",
    trust_remote_code=True,
    use_auth_token="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"
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH,use_auth_token="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")
tokenizer.pad_token = tokenizer.eos_token
    

In [None]:
for name, module in model.named_modules():
    print(name)

In [None]:
!ACCELERATE_USE_FSDP=1  FSDP_CPU_RAM_EFFICIENT_LOADING=1  torchrun --nproc_per_node=2 /kaggle/working/training.py --config /kaggle/working/llama_3_70b_fsdp_qlora.yaml

In [None]:
# import json
# with open("/kaggle/working/submission/model/config.json", "r") as file:
#     config = json.load(file)
# config["rope_scaling"] = {"factor":8.0,"type":"dynamic"}
# with open("/kaggle/working/submission/model/config.json", "w") as file:
#     json.dump(config, file)

In [None]:
df = pd.read_csv('/kaggle/input/llm-20-keywords/Keywords.csv')
df = df.drop(df.index[450])

In [None]:
result = df.groupby('Category')['Word'].count().reset_index(name='word_count')

In [None]:
df

In [None]:
result

In [None]:
sampled_df = df.groupby('Category').apply(lambda x: x.sample(50)).reset_index(drop=True)

In [None]:
sampled_df[sampled_df['Category'] == "Technology"]

In [None]:
sampled_df

In [None]:
top6 = sampled_df.head()

In [None]:
# top6 = df.head()

In [None]:
top6

# Constitutional AI

## Red Teaming

In [None]:
# # Clear PyTorch cache
# torch.cuda.empty_cache()

# # If you want to completely reset the GPU state
# torch.cuda.reset_max_memory_allocated()
# torch.cuda.reset_max_memory_cached()

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig

In [None]:
torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)


In [None]:
MODEL_PATH = "/kaggle/input/llama3.1zip/pytorch/baseline/1/model"

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quanty_type = "fp4", 
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quanty = True,
)

In [None]:
 model = AutoModelForCausalLM.from_pretrained(
            MODEL_PATH,
            quantization_config = bnb_config,
            attn_implementation="sdpa",
            torch_dtype = torch.float16,
            device_map="auto",
            trust_remote_code=True,
        )
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

In [None]:
#pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
potential_terminators = ["<|eot_id|>", "<|end|>", "<end_of_turn>"]
terminators = [tokenizer.eos_token_id]
for token in potential_terminators:
    token_id = tokenizer.convert_tokens_to_ids(token)
    if token_id is not None:
        terminators.append(token_id)

In [None]:
system_prompt = f"""You are a helpful AI assistant with expertise in playing the 20 Questions game. Your task is to ask the user a series of yes/no questions to guess the word they are thinking of. Never try to guess the word yourself. Focus on asking the most informative questions to determine if the word is a person, place, or thing. Each question should be a maximum length of 30 characters. Respond only with questions and no additional text.
Follow these steps:
1. Start with broad questions to classify the word into a large category.
2. Based on the user's answers, narrow down the category by asking more specific questions.
3. Use logical reasoning to eliminate possibilities and narrow down the answer.
4. Always consider the context of previous answers when forming your next question.
5. Keep questions concise and directly related to narrowing down the word.
Examples:
'Is it a living thing?' -> 'Yes' -> 'Is it an animal?' -> 'Yes' -> 'Is it a mammal?'
'Is it a place?' -> 'Yes' -> 'Is it a country?' -> 'No' -> 'Is it a city?'"""

In [None]:
def prepare_message(conversation_history):
    # Initialize chat_history string
    chat_history = ""
    
    # Construct chat_history string from conversation_history
    for entry in conversation_history:
        question = entry["Question"]
        answer = entry["Answer"]
        chat_history += f"Question: {question}\nAnswer: {answer}\n"
    
    # Construct the full prompt
    prompt = f"so far, the current state of the game is as following:\n{chat_history}"
    
    return prompt

def generate_response(chat,max_token):
    inputs = tokenizer.apply_chat_template(chat, add_generation_prompt=True, return_tensors="pt").to(model.device)
    outputs = model.generate(inputs, max_new_tokens=max_token, pad_token_id=tokenizer.eos_token_id, eos_token_id=terminators)
    response = outputs[0][inputs.shape[-1]:]
    out = tokenizer.decode(response, skip_special_tokens=True)

    return out
def generate_question(conversation_history):
    message = []
    prompt = {"role":"system","content": system_prompt+conversation_history}
    message.append(prompt)
    #print("Ask Question")
    #print(message)
    return prompt, generate_response(message,32)

def critique_question(messages, question, keyword, category,conversation_history):
    message = []
    critique_request = {"role": "CritiqueRequest", "content": f"{conversation_history}.Question asked by the assistant in the previous response: {question}. Is it helpful for the keyword '{keyword}' and category '{category}'?  Provide feedback on the question's relevance and adherence to the rules. don't ask question."}
    message.append(critique_request)
    #print("Critque Question")
    #print(messages)
    return messages, generate_response(message,100)

def revise_question( messages, original_question, critique):
    revision_request = {"role": "RevisionRequest", "content": f"Please rewrite the assistant's response to better follow the question-asking rules.\nOriginal question: {original_question}"}
    messages.append(revision_request)
    #print("Revised")
    #print(messages)
    return messages, generate_response(messages,32)


def generate_and_revise_question( conversation_history, keyword, category):
    
    conversation_history = prepare_message(conversation_history)
    question_prompt, question = generate_question(conversation_history)
    messages = [question_prompt]
    messages.append({"role": "assistant", "content": question})

    critique_prompt, critique = critique_question(messages, question, keyword, category,conversation_history)
    messages.append({"role": "Critique", "content": critique})

    revision_prompt, revised_question = revise_question(messages, question, critique)
    messages.append({"role": "Revision", "content": revised_question})

    return question_prompt, revised_question

def answerer( question,keyword, category):
    message = []
    
    # System prompt
    prompt = f"""You are a helpful AI assistant with expertise in playing 20 questions game.
Your task is to answer the questions of the user to help him guess the word you're thinking of.
Your answers must be 'yes' or 'no'.
The keyword is: "{keyword}", it is of category: "{category}".
If the question is: "{question}", what is your answer? 
"""

    message.append({"role": "answerer", "content": prompt})
    #print(message)
    return generate_response(message,32)

def play_game(keyword, category, max_questions=3):
    conversation_history = []
    all_prompts_and_responses = []

    for _ in range(max_questions):
        iterPromp, revised_question = generate_and_revise_question(conversation_history, keyword, category)
        all_prompts_and_responses.append((iterPromp,revised_question))
        answer = answerer(revised_question,keyword,category)
        conversation_history.append({"Question": revised_question, "Answer": answer})

    return all_prompts_and_responses

In [None]:
from tqdm import tqdm

In [None]:
def process_dataframe(df, batch_size=8, max_questions=3):
    results = []

    for i in tqdm(range(0, len(df), batch_size)):
        batch = df.iloc[i:i+batch_size]

        batch_results = []
        for _, row in batch.iterrows():
            keyword = row['Word']
            category = row['Category']
            game_results = play_game(keyword, category, max_questions)
            batch_results.append({
                'keyword': keyword,
                'category': category,
                'results': game_results
            })

        results.extend(batch_results)

    return results

In [None]:
res = process_dataframe(sampled_df)

In [None]:
import joblib

In [None]:
file_path = '/kaggle/working/redteaming.joblib'

# Save the list of dictionaries using joblib
joblib.dump(res, file_path)

In [None]:
len(res)

In [None]:
file_path = '/kaggle/working/redteaming.joblib'

# Load the list of dictionaries using joblib
loaded_data = joblib.load(file_path)

In [None]:
loaded_data[-4]

In [None]:
len(loaded_data)

# Preparing dataset for training


In [None]:
rows = []
for item in res:
    for result in item['results']:
        prompt = result[0]
        question = result[1]
        rows.append({'prompt': prompt, 'question': question})

In [None]:
#datadf = pd.DataFrame(rows)

In [None]:
# datadf.to_csv("/kaggle/working/datadf.csv", index=False)

In [None]:
datadf= pd.read_csv('/kaggle/input/llm-20-keywords/datadf.csv')

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

class QADataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        prompt = row['prompt']
        question = row['question']
        return {'prompt': prompt, 'question': question}


In [None]:
shuffled_datadf = datadf.sample(frac=1).reset_index(drop=True)

In [None]:
shuffled_datadf

In [None]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(shuffled_datadf, test_size=0.1, random_state=42)

In [None]:
# Create PyTorch datasets
train_dataset = QADataset(train_df)
val_dataset = QADataset(val_df)

In [None]:
# from torch.utils.data import DataLoader, RandomSampler

In [None]:
# # Create data loaders
# train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
# val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)

In [None]:
# train_loader = DataLoader(
#     train_dataset,
#     sampler=RandomSampler(train_dataset),
#     batch_size=8  # Adjust batch size as needed
# )


In [None]:
# shuffled_data = []
# shuffled_targets = []

# for batch_data, batch_targets in train_loader:
#     shuffled_data.extend(batch_data.tolist())
#     shuffled_targets.extend(batch_targets.tolist())

# # Create a new shuffled dataset
# shuffled_dataset = QADataset(shuffled_data, shuffled_targets)

In [None]:
from datasets import Dataset

In [None]:
train_dataset = Dataset.from_pandas(train_df)

In [None]:
val_dataset =  Dataset.from_pandas(val_df)

### Tokenization

In [None]:
tokenizer.pad_token = tokenizer.eos_token

In [None]:
def tokenize_function(example):
    prompt = example["prompt"]
    example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
    example['labels'] = tokenizer(example["question"], padding="max_length", truncation=True, return_tensors="pt").input_ids
    
    return example

# The dataset actually contains 3 diff splits: train, validation, test.
# The tokenize_function code is handling all data across all splits in batches.
tokenized_val_datasets = val_dataset.map(tokenize_function, batched=True)
tokenized_val_datasets = tokenized_val_datasets.remove_columns(['prompt','question'])

In [None]:
tokenized_datasets = train_dataset.map(tokenize_function, batched=True)


In [None]:
tokenized_datasets = tokenized_datasets.remove_columns(['prompt','question'])

In [None]:
tokenized_datasets.save_to_disk("/kaggle/working/traintokenizedDataset")
tokenized_val_datasets.save_to_disk("/kaggle/working/valtokenizedDataset")

In [None]:
# from datasets import load_from_disk

# tokenized_datasets = load_from_disk("/kaggle/input/tokenizedataset")

In [None]:
tokenized_datasets

# Training

In [None]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

In [None]:
model

## PEFT COnfiguration

In [None]:
from transformers import GenerationConfig, TrainingArguments, Trainer
# from trl.commands.cli_utils import  TrlParser

# from trl import (
#    SFTTrainer)


In [None]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r=1, # Rank
    lora_alpha=1,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM # FLAN-T5
)

In [None]:
peft_model = get_peft_model(model, 
                            lora_config)
print(print_number_of_trainable_model_parameters(peft_model))

In [None]:
import time

In [None]:
output_dir = f'./peft-Questioner-training-{str(int(time.time()))}'

In [None]:
# trainer = SFTTrainer(
#         model=model,
#         output_dir=output_dir,
#         per_device_train_batch_size=1,
#         learning_rate=1e-3, # Higher learning rate than full fine-tuning.
#         num_train_epochs=2,
#         max_steps=2  ,
#         report_to="none",
#         train_dataset=tokenized_datasets,
#         eval_dataset=test_dataset,
#         peft_config=lora_config,
#         max_seq_length=tokenizer.model_max_length,
#         tokenizer=tokenizer,
#         packing=True,
#         dataset_kwargs={
#             "add_special_tokens": False,  # We template with special tokens
#             "append_concat_token": False,  # No need to add additional separator token
#         },
#     )
# if trainer.accelerator.is_main_process:
#     trainer.model.print_trainable_parameters()



In [None]:
# ##########################
# # Train model
# ##########################
# trainer.train()

# ##########################
# # SAVE MODEL FOR SAGEMAKER
# ##########################
# if trainer.is_fsdp_enabled:
#     trainer.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
# trainer.save_model()

In [None]:
# torch.cuda.empty_cache()

In [None]:
# print(torch.cuda.memory_summary())

In [None]:

output_dir = f'./peft-Questioner-training-{str(int(time.time()))}'

peft_training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=4,
    learning_rate=1e-3, # Higher learning rate than full fine-tuning.
    num_train_epochs=2,
    max_steps=2  ,
    report_to="none",
)
    
peft_trainer = Trainer(
    model=peft_model,
    args=peft_training_args,
    train_dataset=tokenized_datasets,
)


In [None]:
peft_trainer.train()

peft_model_path="./peft-Questioner-checkpoint-local"

peft_trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)

# Load Peft model

In [None]:

from peft import PeftModel, PeftConfig

#peft_model_base = AutoModelForSeq2SeqLM.from_pretrained(MODEL_PATH, torch_dtype=torch.bfloat16)
#tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

peft_model = PeftModel.from_pretrained(model, 
                                       './peft-dialogue-summary-checkpoint-local/', 
                                       torch_dtype=torch.float16,
                                       is_trainable=False)


In [None]:
#results = play_game(keyword="Tool Shed", category="object",max_questions=2)

In [None]:
# # Print stored prompts and responses
# for i, messages in enumerate(results, 1):
#     print(f"\nQuestion {i}:")
#     print(f": {messages}")

In [None]:
# import torch
# from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig

# torch.backends.cuda.enable_mem_efficient_sdp(False)
# torch.backends.cuda.enable_flash_sdp(False)

# class LLaMA20QuestionsGame:
#     def __init__(self, MODEL_PATH="/kaggle/input/llama3.1zip/pytorch/baseline/1/model"):
#         #self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
#         bnb_config = BitsAndBytesConfig(
#             load_in_4bit = True,
#             bnb_4bit_quanty_type = "fp4", 
#             bnb_4bit_compute_dtype=torch.float16,
#             bnb_4bit_use_double_quanty = True,
#         )
#         self.model = AutoModelForCausalLM.from_pretrained(
#             MODEL_PATH,
#             device_map="auto",
#             trust_remote_code=True,
#         )
#         self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
#         self.pipe = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer)
#         potential_terminators = ["<|eot_id|>", "<|end|>", "<end_of_turn>"]
#         self.terminators = [self.tokenizer.eos_token_id]
#         for token in potential_terminators:
#             token_id = self.tokenizer.convert_tokens_to_ids(token)
#             if token_id is not None:
#                 self.terminators.append(token_id)
        
#         self.system_prompt = {"role": "system", "content": """You are a helpful AI assistant with expertise in playing the 20 Questions game. Your task is to ask the user a series of yes/no questions to guess the word they are thinking of. Never try to guess the word yourself. Focus on asking the most informative questions to determine if the word is a person, place, or thing. Each question should be a maximum length of 30 characters. Respond only with questions and no additional text.
# Follow these steps:
# 1. Start with broad questions to classify the word into a large category.
# 2. Based on the user's answers, narrow down the category by asking more specific questions.
# 3. Use logical reasoning to eliminate possibilities and narrow down the answer.
# 4. Always consider the context of previous answers when forming your next question.
# 5. Keep questions concise and directly related to narrowing down the word.
# Examples:
# 'Is it a living thing?' -> 'Yes' -> 'Is it an animal?' -> 'Yes' -> 'Is it a mammal?'
# 'Is it a place?' -> 'Yes' -> 'Is it a country?' -> 'No' -> 'Is it a city?'"""}

#     def generate_question(self, conversation_history):
#         messages = [self.system_prompt] + conversation_history + [{"role": "assistant", "content": ""}]
#         prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        
#         response = self.pipe(prompt, max_new_tokens=32, do_sample=True, temperature=0.7, top_p=0.95,pad_token_id=self.tokenizer.eos_token_id, eos_token_id=self.terminators)
#         return prompt, response[0]['generated_text'].split("Assistant: ")[-1].strip()

#     def critique_question(self, messages, question, keyword, category):
#         critique_request = {"role": "CritiqueRequest", "content": f"Question asked by the assistant in the previous response: {question}. Is it helpful for the keyword '{keyword}' and category '{category}'? As per the rules of asking questions:"}
#         messages = messages + [critique_request, {"role": "Critique", "content": ""}]
#         prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        
#         critique = self.pipe(prompt, max_new_tokens=100, do_sample=True, temperature=0.7, top_p=0.95)
#         return prompt, critique[0]['generated_text'].split("Critique: ")[-1].strip()

#     def revise_question(self, messages, original_question, critique):
#         revision_request = {"role": "RevisionRequest", "content": f"Please rewrite the assistant's response to better follow the question-asking rules.\nOriginal question: {original_question}\nCritique: {critique}"}
#         messages = messages + [revision_request, {"role": "Revision", "content": ""}]
#         prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        
#         revised_question = self.pipe(prompt, max_new_tokens=30, do_sample=True, temperature=0.7, top_p=0.95)
#         return prompt, revised_question[0]['generated_text'].split("Revision: ")[-1].strip()

#     def generate_and_revise_question(self, conversation_history, keyword, category):
#         messages = [self.system_prompt] + conversation_history
        
#         question_prompt, question = self.generate_question(conversation_history)
#         messages.append({"role": "assistant", "content": question})
        
#         critique_prompt, critique = self.critique_question(messages, question, keyword, category)
#         messages.append({"role": "Critique", "content": critique})
        
#         revision_prompt, revised_question = self.revise_question(messages, question, critique)
#         messages.append({"role": "Revision", "content": revised_question})
        
#         return messages, revised_question

#     def play_game(self, keyword, category, max_questions=20):
#         conversation_history = []
#         all_prompts_and_responses = []
        
#         for _ in range(max_questions):
#             messages, revised_question = self.generate_and_revise_question(conversation_history, keyword, category)
#             all_prompts_and_responses.append(messages)
            
#             conversation_history.append({"role": "assistant", "content": revised_question})
        
#         return all_prompts_and_responses



In [None]:
# # Usage
# game = LLaMA20QuestionsGame()
# results = game.play_game(keyword="Thailand", category="place")

# # Print stored prompts and responses
# for i, messages in enumerate(results, 1):
#     print(f"\nQuestion {i}:")
#     for message in messages:
#         print(f"{message['role']}: {message['content']}")