In [None]:
!pip install -qU transformers accelerate peft datasets bitsandbytes

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/10.4 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━[0m [32m6.4/10.4 MB[0m [31m192.5 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m10.4/10.4 MB[0m [31m206.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m110.7 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/354.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m354.7/354.7 kB[0m [31m31.2 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/411.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m411.1/411.1 kB[0m [31m33.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip install -qU trl

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/336.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m336.4/336.4 kB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# 1. Dataset Preparation
import json
from datasets import Dataset

def load_and_format_dataset(file_path):
    with open(file_path) as f:
        data = json.load(f)

    formatted_data = []
    for item in data:
        # Create instruction-output pair
        instruction = f"Generate question and options based on this comprehension:\n{item['Comprehension']}"
        output = f"Question: {item['Question']}\nOptions:\n" + "\n".join(
            [f"{k}: {v}" for k, v in item['Options'].items()]
        )
        formatted_data.append({
            "text": f"{instruction}\n{output}"  # Combined text field
        })

    return Dataset.from_list(formatted_data)

# Load and split data
dataset = load_and_format_dataset("cat_questions.json")
split_dataset = dataset.train_test_split(test_size=0.1)

# 2. Model & Tokenizer Setup
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2.5-0.5B-Instruct",
    quantization_config=bnb_config,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
tokenizer.pad_token = tokenizer.eos_token

# 3. LoRA Configuration
from peft import LoraConfig

lora_config = LoraConfig(
    r=64,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "output_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# 4. Training Setup
from transformers import TrainingArguments
from trl import SFTTrainer

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-5,
    fp16=True,
    logging_steps=10,
    save_strategy="epoch",
    eval_strategy="epoch",
    optim="paged_adamw_32bit"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=split_dataset["train"],
    eval_dataset=split_dataset["test"],
    peft_config=lora_config,
    # dataset_text_field="text",
    # max_seq_length=1536,
    # tokenizer=tokenizer,
    args=training_args,
    # packing=True
)

# 5. Start Training
trainer.train()

# 6. Save Model
trainer.model.save_pretrained("qwen-lora-rc")
tokenizer.save_pretrained("qwen-lora-rc")

Converting train dataset to ChatML:   0%|          | 0/359 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/359 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/359 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/359 [00:00<?, ? examples/s]

Converting eval dataset to ChatML:   0%|          | 0/40 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/40 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/40 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/40 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmaitypro402[0m ([33mmaitypro402-haldia-institute-of-management[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss
1,3.0004,2.918293
2,2.8197,2.763311
3,2.7611,2.704244


('qwen-lora-rc/tokenizer_config.json',
 'qwen-lora-rc/special_tokens_map.json',
 'qwen-lora-rc/vocab.json',
 'qwen-lora-rc/merges.txt',
 'qwen-lora-rc/added_tokens.json',
 'qwen-lora-rc/tokenizer.json')

In [None]:
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2.5-0.5B-Instruct",
    device_map="auto"
)
model = PeftModel.from_pretrained(base_model, "qwen-lora-rc")

In [None]:
def generate_question(context):
    prompt = f"""Generate question and options based on the following comprehension:
    {context}
    """

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.9
    )

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
content = """Fears of artificial intelligence (AI) have haunted humanity since the very beginning of the computer age. Hitherto these fears focused on machines using physical means to kill, enslave or replace people. But over the past couple of years new AI tools have emerged that threaten the survival of human civilisation from an unexpected direction. AI has gained some remarkable abilities to manipulate and generate language, whether with words, sounds or images. AI has thereby hacked the operating system of our civilisation.

Language is the stuff almost all human culture is made of. Human rights, for example, aren't inscribed in our DNA. Rather, they are cultural artefacts we created by telling stories and writing laws. Gods aren't physical realities. Rather, they are cultural artefacts we created by inventing myths and writing scriptures….What would happen once a non-human intelligence becomes better than the average human at telling stories, composing melodies, drawing images, and writing laws and scriptures? When people think about Chatgpt and other new AI tools, they are often drawn to examples like school children using AI to write their essays. What will happen to the school system when kids do that? But this kind of question misses the big picture. Forget about school essays. Think of the next American presidential race in 2024, and try to imagine the impact of AI tools that can be made to mass-produce political content, fake-news stories and scriptures for new cults…

Through its mastery of language, AI could even form intimate relationships with people, and use the power of intimacy to change our opinions and worldviews. Although there is no indication that AI has any consciousness or feelings of its own, to foster fake intimacy with humans it is enough if the AI can make them feel emotionally attached to it….

What will happen to the course of history when AI takes over culture, and begins producing stories, melodies, laws and religions? Previous tools like the printing press and radio helped spread the cultural ideas of humans, but they never created new cultural ideas of their own. AI is fundamentally different. AI can create completely new ideas, completely new culture…. Of course, the new power of AI could be used for good purposes as well. I won't dwell on this, because the people who develop AI talk about it enough….

We can still regulate the new AI tools, but we must act quickly. Whereas nukes cannot invent more powerful nukes, AI can make exponentially more powerful AI.… Unregulated AI deployments would create social chaos, which would benefit autocrats and ruin democracies. Democracy is a conversation, and conversations rely on language. When AI hacks language, it could destroy our ability to have meaningful conversations, thereby destroying democracy….And the first regulation I would suggest is to make it mandatory for AI to disclose that it is an AI. If I am having a conversation with someone, and I cannot tell whether it is a human or an AI—that's the end of democracy. This text has been generated by a human. Or has it?"""

print(generate_question(content))

Generate question and options based on the following comprehension:
    Fears of artificial intelligence (AI) have haunted humanity since the very beginning of the computer age. Hitherto these fears focused on machines using physical means to kill, enslave or replace people. But over the past couple of years new AI tools have emerged that threaten the survival of human civilisation from an unexpected direction. AI has gained some remarkable abilities to manipulate and generate language, whether with words, sounds or images. AI has thereby hacked the operating system of our civilisation.

Language is the stuff almost all human culture is made of. Human rights, for example, aren't inscribed in our DNA. Rather, they are cultural artefacts we created by telling stories and writing laws. Gods aren't physical realities. Rather, they are cultural artefacts we created by inventing myths and writing scriptures….What would happen once a non-human intelligence becomes better than the average huma