In [None]:
!pip -q install unsloth accelerate bitsandbytes transformers datasets
!pip -q install trl

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.8/65.8 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m359.7/359.7 kB[0m [31m35.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m44.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m506.8/506.8 kB[0m [31m43.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7 MB[0m [31m51.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m423.1/423.1 kB[0m [31m33.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m288.6/288.6 kB[0m [31m24.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.9/122.9 MB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━

### Experimentation

In [None]:
import json
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings("ignore")

from datasets import Dataset
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments

print("✅ Imports done")

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
✅ Imports done


In [None]:
class Config:
    MODEL_NAME = "microsoft/Phi-3.5-mini-instruct"
    OUTPUT_DIR = "./phi-mcq-unsloth-updated"
    DATASET_PATH = "/content/0-15+ Python MCQ.json"
    SAVE_STRATEGY="steps"

    MAX_LENGTH = 1024
    TRAIN_TEST_SPLIT = 0.1

    NUM_EPOCHS = 3
    BATCH_SIZE = 1
    GRADIENT_ACCUMULATION = 16
    LEARNING_RATE = 2e-4
    WARMUP_STEPS = 100
    WEIGHT_DECAY = 0.01

    LORA_R = 32
    LORA_ALPHA = 64
    LORA_DROPOUT = 0.05

config = Config()

In [None]:
def parse_input_field(input_str):
    lines = input_str.strip().split('\n')
    parsed = {}
    for line in lines:
        if ':' in line:
            key, value = line.split(':', 1)
            key = key.strip().lower()
            value = value.strip()
            if key == 'skills':
                parsed['skill'] = value
            elif key == 'experience':
                parsed['experience_level'] = value
            elif 'focus' in key or 'topic' in key:
                parsed['topic'] = value
    return parsed

def parse_output_field(output_dict):
    try:
        mcq = output_dict['mcq'][0]
        question = mcq['text']
        explanation = mcq['explanation']
        options = mcq['options']

        option_dict = {}
        option_labels = ['A', 'B', 'C', 'D']
        correct_option = None

        for idx, opt in enumerate(options):
            label = option_labels[idx] if idx < len(option_labels) else str(idx)
            option_dict[label] = opt['text']
            if opt['answerType'] == 'CORRECT':
                correct_option = label

        return {
            'question': question,
            'options': option_dict,
            'correct_answer': correct_option,
            'correct_reasoning': explanation
        }
    except:
        return None

def load_and_parse_dataset(json_path):
    with open(json_path, 'r', encoding='utf-8') as f:
        raw_data = json.load(f)

    parsed_data = []
    for item in tqdm(raw_data, desc="Parsing"):
        try:
            input_parsed = parse_input_field(item['input'])
            output_parsed = parse_output_field(item['output'])

            if output_parsed:
                unified = {
                    'skill': input_parsed.get('skill', 'Python'),
                    'experience_level': input_parsed.get('experience_level', '0-2 years'),
                    'topic': input_parsed.get('topic', 'General'),
                    'question': output_parsed['question'],
                    'option_a': output_parsed['options'].get('A', ''),
                    'option_b': output_parsed['options'].get('B', ''),
                    'option_c': output_parsed['options'].get('C', ''),
                    'option_d': output_parsed['options'].get('D', ''),
                    'correct_answer': output_parsed['correct_answer'],
                    'correct_reasoning': output_parsed['correct_reasoning']
                }
                parsed_data.append(unified)
        except:
            continue

    print(f"✅ Parsed {len(parsed_data)} examples")
    return parsed_data

dataset_raw = load_and_parse_dataset(config.DATASET_PATH)

Parsing:   0%|          | 0/2533 [00:00<?, ?it/s]

✅ Parsed 2499 examples


In [None]:
def prepare_dataset(raw_data, tokenizer, config):
    formatted_data = []
    for example in tqdm(raw_data, desc="Formatting"):
        prompt = build_prompt(example, include_response=True)
        formatted_data.append({"text": prompt})

    dataset = Dataset.from_list(formatted_data)

    def tokenize_function(examples):
        tokenized = tokenizer(
            examples['text'],
            truncation=True,
            max_length=config.MAX_LENGTH,
            padding='max_length',
            return_tensors=None
        )
        tokenized['labels'] = tokenized['input_ids'].copy()
        return tokenized

    tokenized_dataset = dataset.map(
        tokenize_function,
        batched=True,
        remove_columns=dataset.column_names,
        desc="Tokenizing"
    )

    split_dataset = tokenized_dataset.train_test_split(test_size=config.TRAIN_TEST_SPLIT, seed=42)
    print(f"✅ Train: {len(split_dataset['train'])}, Val: {len(split_dataset['test'])}")
    return split_dataset

In [None]:
def build_prompt(example, include_response=True):
    system_msg = (
        "You are an expert MCQ generator. Create high-quality multiple-choice "
        "questions with a correct answer and detailed reasoning."
    )

    user_msg = f"""Generate a multiple-choice question (MCQ):

**Skill**: {example['skill']}
**Experience Level**: {example['experience_level']}
**Topic**: {example.get('topic', 'General')}

Create:
1. One clear, specific question.
2. Four options (A, B, C, D).
3. A `correct_answer` field (A/B/C/D).
4. A detailed reasoning under `explanation`.

Output strictly in JSON:

{{
  "mcq": [
    {{
      "text": "...",
      "options": {{
        "A": "...",
        "B": "...",
        "C": "...",
        "D": "..."
      }},
      "correct_answer": "A/B/C/D",
      "explanation": "..."
    }}
  ]
}}
"""

    if include_response:
        # Assistant response including correct_answer field
        response_json = {
            "mcq": [
                {
                    "text": example['question'],
                    "options": {
                        "A": example['option_a'],
                        "B": example['option_b'],
                        "C": example['option_c'],
                        "D": example['option_d']
                    },
                    "correct_answer": example['correct_answer'],
                    "explanation": example['correct_reasoning']
                }
            ]
        }

        assistant_msg = json.dumps(response_json, indent=2)

        formatted = (
            f"<|system|>\n{system_msg}<|end|>\n"
            f"<|user|>\n{user_msg}<|end|>\n"
            f"<|assistant|>\n{assistant_msg}<|end|>"
        )
    else:
        # For inference, leave assistant blank
        formatted = (
            f"<|system|>\n{system_msg}<|end|>\n"
            f"<|user|>\n{user_msg}<|end|>\n"
            f"<|assistant|>\n"
        )

    return formatted

print("✅ Prompt formatter updated with correct_answer")

✅ Prompt formatter updated with correct_answer


In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=config.MODEL_NAME,
    max_seq_length=config.MAX_LENGTH,
    load_in_4bit=True,
)

model = FastLanguageModel.get_peft_model(
    model,
    r=config.LORA_R,
    lora_alpha=config.LORA_ALPHA,
    lora_dropout=config.LORA_DROPOUT,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
)

print("✅ Model + LoRA ready")

==((====))==  Unsloth 2025.11.6: Fast Llama patching. Transformers: 4.57.2.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.26G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/140 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.11.6 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


✅ Model + LoRA ready


In [None]:
dataset = prepare_dataset(dataset_raw, tokenizer, config)

Formatting:   0%|          | 0/2499 [00:00<?, ?it/s]

Tokenizing:   0%|          | 0/2499 [00:00<?, ? examples/s]

✅ Train: 2249, Val: 250


In [None]:
training_args = TrainingArguments(
    output_dir=config.OUTPUT_DIR,
    per_device_train_batch_size=config.BATCH_SIZE,
    per_device_eval_batch_size=config.BATCH_SIZE,
    gradient_accumulation_steps=config.GRADIENT_ACCUMULATION,
    learning_rate=config.LEARNING_RATE,
    num_train_epochs=config.NUM_EPOCHS,
    warmup_steps=config.WARMUP_STEPS,
    weight_decay=config.WEIGHT_DECAY,
    logging_steps=20,
    save_steps=200,
    eval_steps=200,
    save_total_limit=2,
    bf16=True,
    report_to="none",
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    dataset_text_field="text",
    max_seq_length=config.MAX_LENGTH,
    args=training_args
)

In [None]:
print("🚀 Training started...")
trainer.train()
print("🎉 Training done!")

The model is already on multiple devices. Skipping the move to device specified in `args`.


🚀 Training started...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,249 | Num Epochs = 3 | Total steps = 423
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 16
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 16 x 1) = 16
 "-____-"     Trainable parameters = 25,165,824 of 3,846,245,376 (0.65% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
20,4.7315
40,0.5238
60,0.2785
80,0.209
100,0.1828
120,0.1832
140,0.1754
160,0.1612
180,0.1589
200,0.1411


🎉 Training done!


In [None]:
# ============================================================================
# CELL 11: SAVE & EVALUATE
# ============================================================================

final_model_path = f"{config.OUTPUT_DIR}/final_model"
trainer.save_model(final_model_path)
tokenizer.save_pretrained(final_model_path)
print(f"✅ Saved to {final_model_path}")

eval_results = trainer.evaluate()
print(f"✅ Eval results: {eval_results}")

Unsloth: Not an error, but LlamaForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


✅ Saved to ./phi-mcq-unsloth-updated/final_model


✅ Eval results: {'eval_loss': 0.1096777468919754, 'eval_runtime': 36.8356, 'eval_samples_per_second': 6.787, 'eval_steps_per_second': 6.787, 'epoch': 3.0}


In [None]:
# DOWNLOAD

!zip -r phi-mcq-gpu-improve.zip {config.OUTPUT_DIR}
from google.colab import files
files.download(f'phi-mcq-gpu-improve.zip')
print("✅ Download started")

  adding: phi-mcq-unsloth-updated/ (stored 0%)
  adding: phi-mcq-unsloth-updated/final_model/ (stored 0%)
  adding: phi-mcq-unsloth-updated/final_model/tokenizer.model (deflated 55%)
  adding: phi-mcq-unsloth-updated/final_model/special_tokens_map.json (deflated 76%)
  adding: phi-mcq-unsloth-updated/final_model/tokenizer.json (deflated 85%)
  adding: phi-mcq-unsloth-updated/final_model/added_tokens.json (deflated 62%)
  adding: phi-mcq-unsloth-updated/final_model/adapter_config.json (deflated 56%)
  adding: phi-mcq-unsloth-updated/final_model/README.md (deflated 65%)
  adding: phi-mcq-unsloth-updated/final_model/adapter_model.safetensors (deflated 7%)
  adding: phi-mcq-unsloth-updated/final_model/tokenizer_config.json (deflated 86%)
  adding: phi-mcq-unsloth-updated/final_model/chat_template.jinja (deflated 61%)
  adding: phi-mcq-unsloth-updated/final_model/training_args.bin (deflated 53%)
  adding: phi-mcq-unsloth-updated/checkpoint-423/ (stored 0%)
  adding: phi-mcq-unsloth-updated/

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Download started


In [None]:
# ===============================
#  INFERENCE FOR FINETUNED MODEL
# ===============================

import json
from unsloth import FastLanguageModel
from transformers import TextStreamer

# --------------------------------
# Load your final model + tokenizer
# --------------------------------
MODEL_PATH = "/content/phi-mcq-unsloth-updated/final_model"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODEL_PATH,
    max_seq_length = 1024,
    load_in_4bit = True,
)

# model = FastLanguageModel.get_peft_model(
#     model,
# )  # LoRA will auto-load from the folder

print("✅ Finetuned model loaded!")


# -------------------------------------------------
# Build SAME inference prompt format used in training
# -------------------------------------------------
def build_prompt(skill, experience_level, topic):

    system_msg = (
        "You are an expert MCQ generator. Create high-quality multiple-choice "
        "questions with a correct answer and detailed reasoning."
    )

    user_msg = f"""Generate a multiple-choice question (MCQ):

**Skill**: {skill}
**Experience Level**: {experience_level}
**Topic**: {topic}

Create:
1. One clear, specific question.
2. Four options (A, B, C, D).
3. A `correct_answer` field (A/B/C/D).
4. A detailed reasoning under `explanation`.

Output strictly in JSON:

{{
  "mcq": [
    {{
      "text": "...",
      "options": {{
        "A": "...",
        "B": "...",
        "C": "...",
        "D": "..."
      }},
      "correct_answer": "A/B/C/D",
      "explanation": "..."
    }}
  ]
}}
"""

    prompt = (
        f"<|system|>\n{system_msg}<|end|>\n"
        f"<|user|>\n{user_msg}<|end|>\n"
        f"<|assistant|>\n"
    )

    return prompt


# --------------------------
# MCQ Generation Function
# --------------------------
def generate_mcq(skill="Python", experience="0-2 years", topic="Loops"):
    prompt = build_prompt(skill, experience, topic)

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    streamer = TextStreamer(tokenizer)

    output = model.generate(
        **inputs,
        max_new_tokens=300,
        temperature=0.4,
        do_sample=True,
        top_p=0.9,
        repetition_penalty=1.1,
        streamer=streamer,
    )

    decoded = tokenizer.decode(output[0], skip_special_tokens=True)

    return decoded


# --------------------------
# Example Inference
# --------------------------
print("🚀 Generating MCQ...\n")

response = generate_mcq(
    skill="Python",
    experience="1-3 years",
    topic="List Comprehensions"
)

print("\n======================")
print("FINAL MODEL OUTPUT:")
print("======================")
print(response)

==((====))==  Unsloth 2025.11.6: Fast Llama patching. Transformers: 4.57.2.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
✅ Finetuned model loaded!
🚀 Generating MCQ...

<|system|> You are an expert MCQ generator. Create high-quality multiple-choice questions with a correct answer and detailed reasoning.<|end|><|user|> Generate a multiple-choice question (MCQ):

**Skill**: Python
**Experience Level**: 1-3 years
**Topic**: List Comprehensions

Create:
1. One clear, specific question.
2. Four options (A, B, C, D).
3. A `correct_answer` field (A/B/C/D).
4. A detailed reasoning under `explanation`.

Output strictly in JSON:

{
  "mcq": [
    {
      

In [None]:
# ============================================
#    FULL INFERENCE CODE FOR MCQ GENERATION
# ============================================

import json
from unsloth import FastLanguageModel
from transformers import TextStreamer

MODEL_PATH = "/content/phi-mcq-unsloth-updated/final_model"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_PATH,
    max_seq_length=1024,
    load_in_4bit=True,
)

print("✅ Finetuned model loaded!")


def build_prompt(skill, experience_level, topic):

    system_msg = (
        "You are an expert MCQ generator. Create high-quality multiple-choice "
        "questions with a correct answer and detailed reasoning."
    )

    user_msg = f"""Generate a multiple-choice question (MCQ):

**Skill**: {skill}
**Experience Level**: {experience_level}
**Topic**: {topic}

Create:
1. One clear, specific question.
2. Four options (A, B, C, D).
3. A `correct_answer` field.
4. An `explanation`.

Output in JSON.
"""

    return (
        f"<|system|>\n{system_msg}<|end|>\n"
        f"<|user|>\n{user_msg}<|end|>\n"
        f"<|assistant|>\n"
    )


# ------------------------------------------
# Generate ONE MCQ
# ------------------------------------------
def generate_mcq(skill="Python", experience="0-2 years", topic="Loops"):
    prompt = build_prompt(skill, experience, topic)

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    output = model.generate(
        **inputs,
        max_new_tokens=400,
        temperature=0.4,
        top_p=0.9,
        do_sample=True,
        repetition_penalty=1.1,
    )

    # 🚨 DO NOT USE skip_special_tokens=True (Phi breaks!)
    decoded = tokenizer.decode(output[0])

    # Remove the prompt part to keep only the answer
    if "<|assistant|>" in decoded:
        decoded = decoded.split("<|assistant|>")[-1]

    return decoded.strip()


# ------------------------------------------
# ⭐ Dynamic MCQ generator (YOU choose n)
# ------------------------------------------
def generate_n_mcqs(n, skill="Python", experience="0-2 years", topic="Loops"):
    results = []

    for i in range(n):
        print("\n=====================================")
        print(f"       Generating MCQ {i+1}/{n}")
        print("=====================================\n")

        mcq = generate_mcq(skill, experience, topic)
        results.append(mcq)

        print(mcq)     # 👈 NOW IT PRINTS IMMEDIATELY

    return results


# ------------------------------------------
# Run generator
# ------------------------------------------
n = 10  # 👈 Dynamically change to any number

mcqs = generate_n_mcqs(
    n=n,
    skill="Python",
    experience="1-3 years",
    topic="OOPS"
)

==((====))==  Unsloth 2025.11.6: Fast Llama patching. Transformers: 4.57.2.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
✅ Finetuned model loaded!

       Generating MCQ 1/10

{
  "skill": "Python",
  "experience_level": "[1-3]",
  "content": "What is the output of `print(my_func())`? Assume that **`def my_func(): pass`** exists.",
  "mcq": [
    {
      "text": "`None`, because no return statement was explicitly provided within the function body or its nested scope.",
      "options": {
        "A": "`Error`",
        "B": "`0`",
        "C": "`True`",
        "D": "The string 'pass'."
      },
      "correct_answer": "A",
      "explanation":

In [None]:
# ------------------------------------------
# Run generator
# ------------------------------------------
n = 10  # 👈 Dynamically change to any number

mcqs = generate_n_mcqs(
    n=n,
    skill="Python",
    experience="1-3 years",
    topic="OOPS"
)


       Generating MCQ 1/10

{
  "skill": "Python",
  "experience_level": "[1-3]",
  "content": "What is the primary difference between `@classmethod` and `@staticmethod`?",
  "options": {
    "A": "`@classmethod` can only be called on instances; `@staticmethod` cannot.",
    "B": "`@classmethod` receives the class as its first argument (`cls`), while `@staticmethod` does not receive any implicit reference to either instance or class.",
    "C": "`@classmethod` modifies state for all objects; `@staticmethod` affects global variables.",
    "D": "`@classmethod` must return None."
  },
  "correct_answer": "B",
  "explanation": "`@classmethod` acts like a 'special' method that operates on the *type* of object it was bound to rather than individual instances. @staticmethods do not have access to anything about their containing type/module by default unless explicitly passed through arguments."
}<|end|><|placeholder6|><|placeholder6|><|placeholder6|><|placeholder6|><|placeholder6|><|placeho

### For Multiple json files

In [None]:
# ======================================================================
# IMPORTS
# ======================================================================

import json
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings("ignore")

from datasets import Dataset
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments, TextStreamer

print("✅ Imports done")

# ======================================================================
# CONFIG — MULTIPLE DATASETS SUPPORTED
# ======================================================================

class Config:
    MODEL_NAME = "microsoft/Phi-3.5-mini-instruct"
    OUTPUT_DIR = "./phi-mcq-unsloth-updated"

    # 🟢 ADD ALL YOUR SKILL-BASED JSON FILES HERE:
    DATASET_PATHS = [
        "/content/0-15+ Cloud MCQ-final.json",
        "/content/0-15+ DL MCQ-final.json",
        "/content/0-15+ DSA_MCQ-final.json",
        "/content/0-15+ LLM MCQ-final.json",
        "/content/0-15+ ML MCQ-final.json",
        "/content/0-15+ Python MCQ-final.json",
        "/content/0-15+ WEB MCQ-final.json"

        # Add more any time!
    ]

# 🚀 TRAINING SPEED BOOSTS
    MAX_LENGTH = 1024             # ⬅ Huge speed gain
    TRAIN_TEST_SPLIT = 0.05       # smaller validation

    NUM_EPOCHS = 3              # ⬅ much faster
    BATCH_SIZE = 16                # ⬅ increased from 1 → 4
    GRADIENT_ACCUMULATION = 1     # ⬅ effective batch size = 16

    LEARNING_RATE = 3e-4          # faster convergence
    WARMUP_STEPS = 50
    WEIGHT_DECAY = 0.01

    # 🚀 LoRA speed improvements
    LORA_R = 16                   # ⬅ reduce from 32 → 16
    LORA_ALPHA = 32               # ⬅ reduce from 64 → 32
    LORA_DROPOUT = 0.05

config = Config()

# ======================================================================
# PARSING FUNCTIONS
# ======================================================================

def parse_input_field(input_str):
    lines = input_str.strip().split('\n')
    parsed = {}
    for line in lines:
        if ':' in line:
            key, value = line.split(':', 1)
            key = key.strip().lower()
            value = value.strip()
            if key == 'skills':
                parsed['skill'] = value
            elif key == 'experience':
                parsed['experience_level'] = value
            elif 'focus' in key or 'topic' in key:
                parsed['topic'] = value
    return parsed


def parse_output_field(output_dict):
    try:
        mcq = output_dict['mcq'][0]
        question = mcq['text']
        explanation = mcq['explanation']
        options = mcq['options']

        option_dict = {}
        option_labels = ['A', 'B', 'C', 'D']
        correct_option = None

        for idx, opt in enumerate(options):
            label = option_labels[idx] if idx < len(option_labels) else str(idx)
            option_dict[label] = opt['text']
            if opt['answerType'] == 'CORRECT':
                correct_option = label

        return {
            'question': question,
            'options': option_dict,
            'correct_answer': correct_option,
            'correct_reasoning': explanation
        }
    except:
        return None


def load_and_parse_dataset(json_path):
    with open(json_path, 'r', encoding='utf-8') as f:
        raw_data = json.load(f)

    parsed_data = []
    for item in tqdm(raw_data, desc=f"Parsing {json_path}"):
        try:
            input_parsed = parse_input_field(item['input'])
            output_parsed = parse_output_field(item['output'])

            if output_parsed:
                unified = {
                    'skill': input_parsed.get('skill', 'General'),
                    'experience_level': input_parsed.get('experience_level', '0-2 years'),
                    'topic': input_parsed.get('topic', 'General'),
                    'question': output_parsed['question'],
                    'option_a': output_parsed['options'].get('A', ''),
                    'option_b': output_parsed['options'].get('B', ''),
                    'option_c': output_parsed['options'].get('C', ''),
                    'option_d': output_parsed['options'].get('D', ''),
                    'correct_answer': output_parsed['correct_answer'],
                    'correct_reasoning': output_parsed['correct_reasoning']
                }
                parsed_data.append(unified)
        except:
            continue

    print(f"✅ Parsed {len(parsed_data)} examples from {json_path}")
    return parsed_data

# ======================================================================
# NEW: MULTI-SKILL DATASET LOADER
# ======================================================================

def load_all_datasets(paths):
    all_data = []

    print("\n======================")
    print("🔄 Loading multi-skill datasets")
    print("======================\n")

    for path in paths:
        data = load_and_parse_dataset(path)
        all_data.extend(data)

    print(f"\n✅ TOTAL COMBINED EXAMPLES = {len(all_data)}")
    return all_data

dataset_raw = load_all_datasets(config.DATASET_PATHS)

# ======================================================================
# PROMPT BUILDER
# ======================================================================

def build_prompt(example, include_response=True):
    system_msg = (
        "You are an expert MCQ generator. Create high-quality multiple-choice "
        "questions with a correct answer and detailed reasoning."
    )

    user_msg = f"""Generate a multiple-choice question (MCQ):

**Skill**: {example['skill']}
**Experience Level**: {example['experience_level']}
**Topic**: {example.get('topic', 'General')}

Create:
1. One clear, specific question.
2. Four options (A, B, C, D).
3. A `correct_answer` field (A/B/C/D).
4. A detailed reasoning under `explanation`.

Output strictly in JSON:

{{
  "mcq": [
    {{
      "text": "...",
      "options": {{
        "A": "...",
        "B": "...",
        "C": "...",
        "D": "..."
      }},
      "correct_answer": "A/B/C/D",
      "explanation": "..."
    }}
  ]
}}
"""

    if include_response:
        response_json = {
            "mcq": [
                {
                    "text": example['question'],
                    "options": {
                        "A": example['option_a'],
                        "B": example['option_b'],
                        "C": example['option_c'],
                        "D": example['option_d']
                    },
                    "correct_answer": example['correct_answer'],
                    "explanation": example['correct_reasoning']
                }
            ]
        }

        assistant_msg = json.dumps(response_json, indent=2)

        formatted = (
            f"<|system|>\n{system_msg}<|end|>\n"
            f"<|user|>\n{user_msg}<|end|>\n"
            f"<|assistant|>\n{assistant_msg}<|end|>"
        )
    else:
        formatted = (
            f"<|system|>\n{system_msg}<|end|>\n"
            f"<|user|>\n{user_msg}<|end|>\n"
            f"<|assistant|>\n"
        )

    return formatted

print("✅ Prompt formatter ready")

# ======================================================================
# PREPARE DATASET
# ======================================================================

def prepare_dataset(raw_data, tokenizer, config):
    formatted_data = []
    for example in tqdm(raw_data, desc="Formatting"):
        prompt = build_prompt(example, include_response=True)
        formatted_data.append({"text": prompt})

    dataset = Dataset.from_list(formatted_data)

    def tokenize_function(examples):
        tokenized = tokenizer(
            examples['text'],
            truncation=True,
            max_length=config.MAX_LENGTH,
            padding='max_length',
        )
        tokenized['labels'] = tokenized['input_ids'].copy()
        return tokenized

    tokenized_dataset = dataset.map(
        tokenize_function,
        batched=True,
        remove_columns=dataset.column_names,
        desc="Tokenizing"
    )

    split_dataset = tokenized_dataset.train_test_split(
        test_size=config.TRAIN_TEST_SPLIT,
        seed=42
    )

    print(f"✅ Train: {len(split_dataset['train'])}, Val: {len(split_dataset['test'])}")
    return split_dataset

# ======================================================================
# LOAD + PREP MODEL
# ======================================================================

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=config.MODEL_NAME,
    max_seq_length=config.MAX_LENGTH,
    load_in_4bit=True
)

model = FastLanguageModel.get_peft_model(
    model,
    r=config.LORA_R,
    lora_alpha=config.LORA_ALPHA,
    lora_dropout=config.LORA_DROPOUT,
    target_modules=[
    "q_proj", "k_proj", "v_proj", "o_proj",
    "gate_proj", "up_proj", "down_proj"]
)

dataset = prepare_dataset(dataset_raw, tokenizer, config)
print("✅ Model + LoRA ready")

# ======================================================================
# TRAINING
# ======================================================================

training_args = TrainingArguments(
    output_dir=config.OUTPUT_DIR,
    per_device_train_batch_size=config.BATCH_SIZE,
    per_device_eval_batch_size=config.BATCH_SIZE,
    gradient_accumulation_steps=config.GRADIENT_ACCUMULATION,
    learning_rate=config.LEARNING_RATE,
    num_train_epochs=config.NUM_EPOCHS,
    warmup_steps=config.WARMUP_STEPS,
    weight_decay=config.WEIGHT_DECAY,
    logging_steps=20,
    save_steps=200,
    eval_steps=200,
    save_total_limit=2,
    bf16=True,
    tf32=True,
    report_to="none",
    gradient_checkpointing=True,
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    dataset_text_field="text",
    max_seq_length=config.MAX_LENGTH,
    args=training_args
)

✅ Imports done

🔄 Loading multi-skill datasets



Parsing /content/0-15+ Cloud MCQ-final.json:   0%|          | 0/2502 [00:00<?, ?it/s]

✅ Parsed 2502 examples from /content/0-15+ Cloud MCQ-final.json


Parsing /content/0-15+ DL MCQ-final.json:   0%|          | 0/2502 [00:00<?, ?it/s]

✅ Parsed 2502 examples from /content/0-15+ DL MCQ-final.json


Parsing /content/0-15+ DSA_MCQ-final.json:   0%|          | 0/2502 [00:00<?, ?it/s]

✅ Parsed 2502 examples from /content/0-15+ DSA_MCQ-final.json


Parsing /content/0-15+ LLM MCQ-final.json:   0%|          | 0/2502 [00:00<?, ?it/s]

✅ Parsed 2502 examples from /content/0-15+ LLM MCQ-final.json


Parsing /content/0-15+ ML MCQ-final.json:   0%|          | 0/2502 [00:00<?, ?it/s]

✅ Parsed 2502 examples from /content/0-15+ ML MCQ-final.json


Parsing /content/0-15+ Python MCQ-final.json:   0%|          | 0/2533 [00:00<?, ?it/s]

✅ Parsed 2499 examples from /content/0-15+ Python MCQ-final.json


Parsing /content/0-15+ WEB MCQ-final.json:   0%|          | 0/2085 [00:00<?, ?it/s]

✅ Parsed 2085 examples from /content/0-15+ WEB MCQ-final.json

✅ TOTAL COMBINED EXAMPLES = 17094
✅ Prompt formatter ready
==((====))==  Unsloth 2025.12.1: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 8.0. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


generation_config.json:   0%|          | 0.00/140 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.12.1 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


Formatting:   0%|          | 0/17094 [00:00<?, ?it/s]

Tokenizing:   0%|          | 0/17094 [00:00<?, ? examples/s]

✅ Train: 16239, Val: 855
✅ Model + LoRA ready


In [None]:
print("🚀 Training started...")
trainer.train()
print("🎉 Training done!")

The model is already on multiple devices. Skipping the move to device specified in `args`.


🚀 Training started...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 16,239 | Num Epochs = 3 | Total steps = 3,045
O^O/ \_/ \    Batch size per device = 16 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (16 x 1 x 1) = 16
 "-____-"     Trainable parameters = 29,884,416 of 3,850,963,968 (0.78% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
20,3.4484
40,0.2902
60,0.1693
80,0.1494
100,0.1452
120,0.1313
140,0.1238
160,0.117
180,0.1084
200,0.098


🎉 Training done!


In [None]:
# ======================================================================
# SAVE MODEL
# ======================================================================

final_model_path = f"{config.OUTPUT_DIR}/final_model"
trainer.save_model(final_model_path)
tokenizer.save_pretrained(final_model_path)

print(f"✅ Saved to {final_model_path}")

# ======================================================================
# EVALUATE
# ======================================================================

eval_results = trainer.evaluate()
print(f"✅ Eval results: {eval_results}")

✅ Saved to ./phi-mcq-unsloth-updated/final_model


✅ Eval results: {'eval_loss': 0.030683746561408043, 'eval_runtime': 46.3337, 'eval_samples_per_second': 18.453, 'eval_steps_per_second': 1.165, 'epoch': 3.0}


In [None]:
# DOWNLOAD

!zip -r phi-mcq-unsloth-updated.zip {config.OUTPUT_DIR}
from google.colab import files
files.download(f'phi-mcq-unsloth-updated.zip')
print("✅ Download started")

  adding: phi-mcq-unsloth-updated/ (stored 0%)
  adding: phi-mcq-unsloth-updated/README.md (deflated 44%)
  adding: phi-mcq-unsloth-updated/final_model/ (stored 0%)
  adding: phi-mcq-unsloth-updated/final_model/added_tokens.json (deflated 62%)
  adding: phi-mcq-unsloth-updated/final_model/tokenizer_config.json (deflated 86%)
  adding: phi-mcq-unsloth-updated/final_model/README.md (deflated 65%)
  adding: phi-mcq-unsloth-updated/final_model/chat_template.jinja (deflated 61%)
  adding: phi-mcq-unsloth-updated/final_model/special_tokens_map.json (deflated 76%)
  adding: phi-mcq-unsloth-updated/final_model/tokenizer.json (deflated 85%)
  adding: phi-mcq-unsloth-updated/final_model/tokenizer.model (deflated 55%)
  adding: phi-mcq-unsloth-updated/final_model/adapter_config.json (deflated 57%)
  adding: phi-mcq-unsloth-updated/final_model/adapter_model.safetensors (deflated 7%)
  adding: phi-mcq-unsloth-updated/final_model/training_args.bin (deflated 53%)
  adding: phi-mcq-unsloth-updated/che

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Download started


In [None]:
# ============================================
#    CLEANED MCQ GENERATION CODE (NO SPAM)
# ============================================

import json
import re
from unsloth import FastLanguageModel
from transformers import TextStreamer

MODEL_PATH = "/content/phi-mcq-unsloth-updated/final_model"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_PATH,
    max_seq_length=1024,
    load_in_4bit=True,
)

print("✅ Finetuned model loaded!")


# ------------------------------------------
# Build Prompt
# ------------------------------------------
def build_prompt(skill, experience_level, topic):

    system_msg = (
        "You are an expert MCQ generator. Create high-quality multiple-choice "
        "questions with a correct answer and detailed reasoning."
    )

    user_msg = f"""Generate a multiple-choice question (MCQ):

**Skill**: {skill}
**Experience Level**: {experience_level}
**Topic**: {topic}

Create:
1. One clear, specific question.
2. Four options (A, B, C, D).
3. A 'correct_answer' field.
4. An 'explanation'.

Output in JSON.
"""

    return (
        f"<|system|>\n{system_msg}<|end|>\n"
        f"<|user|>\n{user_msg}<|end|>\n"
        f"<|assistant|>\n"
    )


# ------------------------------------------
# Clean model output
# ------------------------------------------
def clean_output(text):
    # Remove ALL Phi special tokens such as <|end|>, <|placeholder6|>, etc.
    text = re.sub(r"<\|.*?\|>", "", text)

    # Strip empty spaces and lines
    return text.strip()


# ------------------------------------------
# Generate ONE MCQ cleanly
# ------------------------------------------
def generate_mcq(skill="Python", experience="0-2 years", topic="Loops"):
    prompt = build_prompt(skill, experience, topic)

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Get EOS token ID for "<|end|>"
    eos_id = tokenizer.convert_tokens_to_ids("<|end|>")

    output = model.generate(
        **inputs,
        max_new_tokens=400,
        temperature=0.6,
        top_p=0.9,
        do_sample=True,
        repetition_penalty=1.1,
        eos_token_id=eos_id,   # ⭐ STOP WHEN <|end|> IS GENERATED
    )

    decoded = tokenizer.decode(output[0])

    # Keep only assistant content
    if "<|assistant|>" in decoded:
        decoded = decoded.split("<|assistant|>")[-1]

    # Clean special tokens
    decoded = clean_output(decoded)

    return decoded


# ------------------------------------------
# Generate N MCQs
# ------------------------------------------
def generate_n_mcqs(n, skill="Python", experience="0-2 years", topic="Loops"):
    results = []

    for i in range(n):
        print("\n=====================================")
        print(f"       Generating MCQ {i+1}/{n}")
        print("=====================================\n")

        mcq = generate_mcq(skill, experience, topic)
        results.append(mcq)

        print(mcq)

    return results

==((====))==  Unsloth 2025.12.1: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 8.0. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
✅ Finetuned model loaded!


In [None]:

# ------------------------------------------
# Run generator
# ------------------------------------------
n = 10  # Change dynamically

mcqs = generate_n_mcqs(
    n=n,
    skill="Deep Learning",
    experience="5-8 years",
    topic="All techniques"
)


       Generating MCQ 1/10

{
  "mcq": [
    {
      "text": "When designing a deep learning system, Which technique helps regularize deep neural networks?",
      "options": {
        "A": "Removing validation data",
        "B": "Using dropout and augmenting training data",
        "C": "Increasing the batch size to infinity",
        "D": "Applying dropout, weight decay, and using proper initialization"
      },
      "correct_answer": "D",
      "explanation": "Dropout randomly zeroes some activations, weight decay adds penalization to large weights, and good initialization help prevent vanishing gradients."
    }
  ]
}

       Generating MCQ 2/10

{
  "mcq": [
    {
      "text": "Which of the following is NOT a common technique used to improve training stability and performance in deep learning?",
      "options": {
        "A": "Removing validation data",
        "B": "Using extremely high learning rates",
        "C": "Training from scratch without any pretraining",
        "D

### Changes into the Parameters

In [None]:
# =====================================================================
#  OPTION C — FULL CLEAN TRAINING SCRIPT (MATCHES YOUR REAL DATASET)
# =====================================================================

import json, re, torch, warnings
from datasets import Dataset
from tqdm.auto import tqdm
warnings.filterwarnings("ignore")

from unsloth import FastLanguageModel
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from trl import SFTTrainer


# =====================================================================
# AUTO GPU OPTIMIZATION
# =====================================================================
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
torch.backends.cudnn.benchmark = True

gpu = torch.cuda.get_device_properties(0)
VRAM = gpu.total_memory / (1024**3)
print(f"🔥 VRAM detected: {VRAM:.2f} GB")

if VRAM >= 40:
    BATCH, ACCUM, MAXLEN = 16, 1, 2048
elif VRAM >= 24:
    BATCH, ACCUM, MAXLEN = 8, 2, 1536
elif VRAM >= 16:
    BATCH, ACCUM, MAXLEN = 4, 4, 1024
else:
    BATCH, ACCUM, MAXLEN = 2, 8, 768

print(f"🔥 Using batch={BATCH}, accum={ACCUM}, maxlen={MAXLEN}")


# =====================================================================
# TRAINING CONFIG
# =====================================================================

MODEL_NAME = "microsoft/Phi-3.5-mini-instruct"
OUTPUT_DIR = "./phi-dsa-mcq-final-model"
DATASET_PATHS = [
        "/content/0-15+ Cloud MCQ-final.json",
        "/content/0-15+ DL MCQ-final.json",
        "/content/0-15+ DSA_MCQ-final.json",
        "/content/0-15+ LLM MCQ-final.json",
        "/content/0-15+ ML MCQ-final.json",
        "/content/0-15+ Python MCQ-final.json",
        "/content/0-15+ WEB MCQ-final.json"

]

EPOCHS = 2.5

# =====================================================================
# PARSE INPUT FIELD
# =====================================================================

def parse_input_field(text):
    out = {"skill": "General", "experience": "0-2 years", "topic": "General"}
    for line in text.split("\n"):
        if ":" in line:
            k, v = line.split(":", 1)
            k, v = k.lower().strip(), v.strip()
            if "skill" in k: out["skill"] = v
            if "experience" in k: out["experience"] = v
            if "focus" in k or "topic" in k: out["topic"] = v
    return out


# =====================================================================
# PARSE OUTPUT FIELD — USE YOUR REAL FORMAT
# =====================================================================

def parse_output_field(out_obj):
    try:
        mcq = out_obj["mcq"][0]
        return {
            "question": mcq["text"],
            "options": mcq["options"],     # KEEP AS LIST
            "explanation": mcq["explanation"]
        }
    except:
        return None


# =====================================================================
# LOAD ALL DATASETS
# =====================================================================

all_samples = []

for path in DATASET_PATHS:
    raw = json.load(open(path, "r", encoding="utf-8"))
    for item in tqdm(raw, desc=f"Loading {path}"):
        ip = parse_input_field(item["input"])
        op = parse_output_field(item["output"])
        if op:
            all_samples.append({**ip, **op})

print(f"🔥 Loaded samples: {len(all_samples)}")

# Remove duplicates
all_samples = list({json.dumps(i, sort_keys=True): i for i in all_samples}.values())
print(f"✨ After removing duplicates: {len(all_samples)}")


# =====================================================================
# BUILD TRAINING PROMPT — MATCHES YOUR REAL FORMAT
# =====================================================================

def build_prompt(ex):
    system = (
        "You are an expert MCQ generator. "
        "Generate exactly ONE MCQ in the SAME JSON STRUCTURE as training data. "
        "Never repeat anything. Always end with <|end|>."
    )

    # Keep your natural instruction style
    user = f"""
{ex['skill']} | {ex['experience']} | {ex['topic']}
Generate one MCQ with fields: text, options (list with answerType), explanation.
"""

    assistant_output = {
        "mcq": [
            {
                "text": ex["question"],
                "options": ex["options"],
                "explanation": ex["explanation"]
            }
        ]
    }

    return (
        f"<|system|>\n{system}<|end|>\n"
        f"<|user|>\n{user}<|end|>\n"
        f"<|assistant|>\n{json.dumps(assistant_output, indent=2)}<|end|>"
    )


# =====================================================================
# Convert dataset → HF Dataset
# =====================================================================

hf_dataset = Dataset.from_list([
    {"text": build_prompt(ex)} for ex in all_samples
])


# =====================================================================
# Load model + tokenizer
# =====================================================================

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAXLEN,
    load_in_4bit=True,
)


# =====================================================================
# Tokenize — NO PADDING
# =====================================================================

def tokenize_fn(b):
    t = tokenizer(
        b["text"],
        truncation=True,
        max_length=MAXLEN,
        padding=False
    )
    # mask labels
    labels = []
    for row in t["input_ids"]:
        labels.append([tok if tok != tokenizer.pad_token_id else -100 for tok in row])
    t["labels"] = labels
    return t

tokenized = hf_dataset.map(tokenize_fn, batched=True, remove_columns=["text"])

# Split
split = tokenized.train_test_split(test_size=0.05, seed=42)


# =====================================================================
# LoRA Configuration
# =====================================================================

PEFT_TARGETS = [
    "q_proj","k_proj","v_proj","o_proj",
    "up_proj","down_proj","gate_proj"
]

model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    lora_alpha=64,
    lora_dropout=0.05,
    target_modules=PEFT_TARGETS,
)


# =====================================================================
# TrainingArguments — Max GPU Mode
# =====================================================================

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH,
    per_device_eval_batch_size=BATCH,
    gradient_accumulation_steps=ACCUM,
    learning_rate=3e-4,
    warmup_steps=125,
    num_train_epochs=EPOCHS,

    tf32=True,
    bf16=True,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",

    logging_steps=20,
    eval_steps=300,
    save_steps=300,
    save_total_limit=2,
    report_to="none",
)


collator = DataCollatorForSeq2Seq(tokenizer, padding=True)


# =====================================================================
# Trainer
# =====================================================================

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=split["train"],
    eval_dataset=split["test"],
    data_collator=collator,
    max_seq_length=MAXLEN
)

print("🔥 Training started...")
trainer.train()
print("🎉 Training finished!")

🔥 VRAM detected: 39.56 GB
🔥 Using batch=8, accum=2, maxlen=1536


Loading /content/0-15+ Cloud MCQ-final.json:   0%|          | 0/2502 [00:00<?, ?it/s]

Loading /content/0-15+ DL MCQ-final.json:   0%|          | 0/2502 [00:00<?, ?it/s]

Loading /content/0-15+ DSA_MCQ-final.json:   0%|          | 0/2502 [00:00<?, ?it/s]

Loading /content/0-15+ LLM MCQ-final.json:   0%|          | 0/2502 [00:00<?, ?it/s]

Loading /content/0-15+ ML MCQ-final.json:   0%|          | 0/2502 [00:00<?, ?it/s]

Loading /content/0-15+ Python MCQ-final.json:   0%|          | 0/2533 [00:00<?, ?it/s]

Loading /content/0-15+ WEB MCQ-final.json:   0%|          | 0/2085 [00:00<?, ?it/s]

🔥 Loaded samples: 17094
✨ After removing duplicates: 15502
==((====))==  Unsloth 2025.12.1: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 8.0. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Map:   0%|          | 0/15502 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.


🔥 Training started...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 14,726 | Num Epochs = 3 | Total steps = 2,303
O^O/ \_/ \    Batch size per device = 8 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (8 x 2 x 1) = 16
 "-____-"     Trainable parameters = 59,768,832 of 3,880,848,384 (1.54% trained)


Step,Training Loss
20,2.201
40,0.9161
60,0.5339
80,0.4737
100,0.4536
120,0.3843
140,0.375
160,0.3374
180,0.2917
200,0.2926


🎉 Training finished!


In [None]:
# =====================================================================
# Save
# =====================================================================

trainer.save_model(f"{OUTPUT_DIR}/final_model")
tokenizer.save_pretrained(f"{OUTPUT_DIR}/final_model")

print("🔥 Saved final model.")

🔥 Saved final model.


In [None]:
# =====================================================================
#  GENERATE MULTIPLE MCQs AT ONCE (10 MCQs)
# =====================================================================

import json
import torch
from unsloth import FastLanguageModel

MODEL_PATH = "/content/phi-dsa-mcq-final-model/final_model"   # update if needed

# Load model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_PATH,
    max_seq_length=2048,
    load_in_4bit=True
)
FastLanguageModel.for_inference(model)

# -------------------------
# Prompt Builder
# -------------------------
def build_prompt(skill, experience, topic):
    system = (
        "You are an expert MCQ generator. Generate EXACTLY ONE MCQ per request. "
        "Always follow the JSON format used in training. Never repeat content. "
        "Always end with <|end|>."
    )

    user = f"""
Skills: {skill}
Experience: {experience}
Focus: {topic}

Generate exactly ONE MCQ in the format:

{{
  "mcq": [
    {{
      "text": "...",
      "options": [
        {{ "text": "...", "answerType": "ELIMINATE/ACCEPTABLE/CORRECT" }},
        {{ "text": "...", "answerType": "..." }},
        {{ "text": "...", "answerType": "..." }},
        {{ "text": "...", "answerType": "..." }}
      ],
      "explanation": "..."
    }}
  ]
}}
"""

    return f"<|system|>\n{system}<|end|>\n<|user|>\n{user}<|end|>\n<|assistant|>\n"


# -------------------------
# Single MCQ Generator
# -------------------------
def generate_single_mcq(skill, experience, topic):
    prompt = build_prompt(skill, experience, topic)

    output = model.generate(
        **tokenizer(prompt, return_tensors="pt").to(model.device),
        max_length=1024,
        temperature=0.3,
        top_p=0.9,
        repetition_penalty=1.1,
        eos_token_id=tokenizer.convert_tokens_to_ids("<|end|>")
    )

    decoded = tokenizer.decode(output[0], skip_special_tokens=True)

    # Extract assistant part only
    if "<|assistant|>" in decoded:
        decoded = decoded.split("<|assistant|>")[-1]

    decoded = decoded.replace("<|end|>", "").strip()

    # Safe JSON extraction
    try:
        json_text = decoded[decoded.find("{"): decoded.rfind("}") + 1]
        return json.loads(json_text)
    except:
        print("⚠ Could not parse JSON:\n", decoded)
        return None


# -------------------------
# Generate N MCQs
# -------------------------
def generate_multiple_mcqs(n, skill, experience, topic):
    mcq_list = []

    for i in range(n):
        print(f"\n===================================")
        print(f"       Generating MCQ {i+1}/{n}")
        print("===================================\n")

        mcq = generate_single_mcq(skill, experience, topic)
        mcq_list.append(mcq)

        # Pretty print
        print(json.dumps(mcq, indent=2))

    return mcq_list

==((====))==  Unsloth 2025.12.1: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 8.0. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
# -------------------------
# Example: Generate 10 MCQs
# -------------------------

mcqs = generate_multiple_mcqs(
    n=10,
    skill="Data Structures and Algorithms",
    experience="0-2 years",
    topic="Stack vs Queue"
)

print("\n\n🎉 Done! Generated 10 MCQs.")


       Generating MCQ 1/10

⚠ Could not parse JSON:
 You are an expert MCQ generator. Generate EXACTLY ONE MCQ per request. Always follow the JSON format used in training. Never repeat content. Always end with  . Skills: Data Structures and Algorithms
Experience: 0-2 years
Focus: Stack vs Queue

Generate exactly ONE MCQ in the format:

{
  "mcq": [
    {
      "text": "...",
      "options": [
        { "text": "...", "answerType": "ELIMINATE/ACCEPTABLE/CORRECT" },
        { "text": "...", "answerType": "..." },
        { "text": "...", "answerType": "..." },
        { "text": "...", "answerType": "..." }
      ],
      "explanation": "..."
    }
  ]
}
 {
  "mcq": [
    {
      "text": "Which abstract data type uses the LIFO (Last In, First Out) principle?",
      "options": [
        {
          "text": "Stack",
          "answerType": "CORRECT"
        },
        {
          "text": "Deque used as FIFO",
          "answerType": "ELIMINATE"
        },
        {
          "text": "Que

KeyboardInterrupt: 

### Removing Topics

In [None]:
# ======================================================================
# 1️⃣ IMPORTS
# ======================================================================
import json
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

from datasets import Dataset
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments

print("✅ Imports done")

# ======================================================================
# 2️⃣ CONFIGURATION
# ======================================================================
class Config:
    MODEL_NAME = "microsoft/Phi-3.5-mini-instruct"
    OUTPUT_DIR = "./phi-mcq-skill-exp"

    DATASET_PATHS = [
        "/content/0-15+ Cloud MCQ-final.json",
        "/content/0-15+ DL MCQ-final.json",
        "/content/0-15+ DSA_MCQ-final.json",
        "/content/0-15+ LLM MCQ-final.json",
        "/content/0-15+ ML MCQ-final.json",
        "/content/0-15+ Python MCQ-final.json",
        "/content/0-15+ WEB MCQ-final.json"
    ]

    MAX_LENGTH = 1536   # Use larger max length for better coverage
    TRAIN_TEST_SPLIT = 0.05

    NUM_EPOCHS = 3
    BATCH_SIZE = 8
    GRAD_ACC = 2

    LR = 3e-4
    WARMUP = 50
    WEIGHT_DECAY = 0.01

    # LoRA parameters
    LORA_R = 32
    LORA_ALPHA = 64
    LORA_DROPOUT = 0.05

config = Config()

# ======================================================================
# 3️⃣ LOAD + PARSE DATASETS
# ======================================================================
def parse_input(input_text):
    skill, exp = "General", "0-2 years"
    for line in input_text.strip().split("\n"):
        if line.startswith("Skills:"):
            skill = line.split(":",1)[1].strip()
        if line.startswith("Experience:"):
            exp = line.split(":",1)[1].strip()
    return skill, exp

def parse_output(output):
    try:
        mcq = output["mcq"][0]
        question = mcq["text"]
        explanation = mcq["explanation"]
        labels = ["A","B","C","D"]

        option_dict = {}
        correct_label = None
        for idx,opt in enumerate(mcq["options"]):
            label = labels[idx]
            option_dict[label] = opt["text"]
            if opt["answerType"]=="CORRECT":
                correct_label = label

        return {"question":question, "options":option_dict, "correct_answer":correct_label, "explanation":explanation}
    except:
        return None

def load_dataset(path):
    with open(path,"r",encoding="utf-8") as f:
        raw = json.load(f)

    parsed = []
    seen_questions = set()
    for item in tqdm(raw, desc=f"Loading {path}"):
        skill, exp = parse_input(item["input"])
        out = parse_output(item["output"])
        if out is None or out["question"] in seen_questions:
            continue
        seen_questions.add(out["question"])
        parsed.append({
            "skill": skill,
            "experience": exp,
            "question": out["question"],
            "A": out["options"]["A"],
            "B": out["options"]["B"],
            "C": out["options"]["C"],
            "D": out["options"]["D"],
            "correct_answer": out["correct_answer"],
            "explanation": out["explanation"]
        })
    return parsed

def load_all(paths):
    all_data = []
    for p in paths:
        all_data.extend(load_dataset(p))
    print(f"✅ Total unique examples: {len(all_data)}")
    return all_data

raw_data = load_all(config.DATASET_PATHS)

# ======================================================================
# 4️⃣ PROMPT BUILDER
# ======================================================================
def build_prompt(ex):
    system_msg = "You are an expert MCQ generator. Generate high-quality questions with one correct answer and detailed explanation."
    user_msg = f"""
Generate exactly ONE multiple-choice question.

Skill: {ex['skill']}
Experience Level: {ex['experience']}

Output strictly in JSON:
{{
  "mcq": [
    {{
      "text": "...",
      "options": {{
        "A": "...",
        "B": "...",
        "C": "...",
        "D": "..."
      }},
      "correct_answer": "A/B/C/D",
      "explanation": "..."
    }}
  ]
}}
"""
    assistant = {
        "mcq":[
            {
                "text": ex["question"],
                "options": {"A": ex["A"], "B": ex["B"], "C": ex["C"], "D": ex["D"]},
                "correct_answer": ex["correct_answer"],
                "explanation": ex["explanation"]
            }
        ]
    }

    return f"<|system|>\n{system_msg}<|end|>\n<|user|>\n{user_msg}<|end|>\n<|assistant|>\n{json.dumps(assistant)}<|end|>"

# ======================================================================
# 5️⃣ TOKENIZATION
# ======================================================================
def prepare_dataset(data, tokenizer):
    records = [{"text": build_prompt(ex)} for ex in data]
    ds = Dataset.from_list(records)

    def tokenize(batch):
        tokenized = tokenizer(
            batch["text"],
            truncation=True,
            max_length=config.MAX_LENGTH,
            padding="max_length"
        )
        labels = tokenized["input_ids"].copy()
        labels = [[(t if t!=tokenizer.pad_token_id else -100) for t in seq] for seq in labels]
        tokenized["labels"] = labels
        return tokenized

    tokenized = ds.map(tokenize, batched=True, remove_columns=["text"])
    return tokenized.train_test_split(test_size=config.TRAIN_TEST_SPLIT)

tokenizer = FastLanguageModel.from_pretrained(config.MODEL_NAME)[1]
dataset = prepare_dataset(raw_data, tokenizer)

# ======================================================================
# 6️⃣ LOAD MODEL + LoRA
# ======================================================================
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=config.MODEL_NAME,
    max_seq_length=config.MAX_LENGTH,
    load_in_4bit=True
)
model = FastLanguageModel.get_peft_model(
    model,
    r=config.LORA_R,
    lora_alpha=config.LORA_ALPHA,
    lora_dropout=config.LORA_DROPOUT,
    target_modules=["q_proj","k_proj","v_proj","o_proj"]
)

# ======================================================================
# 7️⃣ TRAINING ARGUMENTS
# ======================================================================
training_args = TrainingArguments(
    output_dir=config.OUTPUT_DIR,
    per_device_train_batch_size=config.BATCH_SIZE,
    per_device_eval_batch_size=config.BATCH_SIZE,
    gradient_accumulation_steps=config.GRAD_ACC,
    learning_rate=config.LR,
    num_train_epochs=config.NUM_EPOCHS,
    warmup_steps=config.WARMUP,
    weight_decay=config.WEIGHT_DECAY,
    logging_steps=20,
    save_steps=500,
    eval_steps=500,
    save_total_limit=2,
    bf16=True,
    report_to="none"
)

# ======================================================================
# 8️⃣ TRAINING
# ======================================================================
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    dataset_text_field=None,
    max_seq_length=config.MAX_LENGTH,
    args=training_args
)

✅ Imports done


Loading /content/0-15+ Cloud MCQ-final.json: 100%|██████████| 2502/2502 [00:00<00:00, 298927.49it/s]
Loading /content/0-15+ DL MCQ-final.json: 100%|██████████| 2502/2502 [00:00<00:00, 314469.11it/s]
Loading /content/0-15+ DSA_MCQ-final.json: 100%|██████████| 2502/2502 [00:00<00:00, 304231.13it/s]
Loading /content/0-15+ LLM MCQ-final.json: 100%|██████████| 2502/2502 [00:00<00:00, 255599.50it/s]
Loading /content/0-15+ ML MCQ-final.json: 100%|██████████| 2502/2502 [00:00<00:00, 194993.28it/s]
Loading /content/0-15+ Python MCQ-final.json: 100%|██████████| 2533/2533 [00:00<00:00, 265027.87it/s]
Loading /content/0-15+ WEB MCQ-final.json: 100%|██████████| 2085/2085 [00:00<00:00, 256117.26it/s]


✅ Total unique examples: 9435
==((====))==  Unsloth 2025.12.1: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 8.0. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Map:   0%|          | 0/9435 [00:00<?, ? examples/s]

==((====))==  Unsloth 2025.12.1: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 8.0. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
print("🚀 Training started...")
trainer.train()
print("🎉 Training completed!")

The model is already on multiple devices. Skipping the move to device specified in `args`.


🚀 Training started...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 8,963 | Num Epochs = 3 | Total steps = 1,683
O^O/ \_/ \    Batch size per device = 8 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (8 x 2 x 1) = 16
 "-____-"     Trainable parameters = 25,165,824 of 3,846,245,376 (0.65% trained)


Step,Training Loss
20,2.1421
40,0.8129
60,0.5883
80,0.5358
100,0.4877
120,0.4674
140,0.4386
160,0.4144
180,0.402
200,0.3826


🎉 Training completed!


In [None]:
# ======================================================================
# 9️⃣ SAVE MODEL
# ======================================================================
trainer.save_model(f"{config.OUTPUT_DIR}/final_model")
tokenizer.save_pretrained(f"{config.OUTPUT_DIR}/final_model")
print(f"✅ Model saved at {config.OUTPUT_DIR}/final_model")


# ======================================================================
# EVALUATE
# ======================================================================

eval_results = trainer.evaluate()
print(f"✅ Eval results: {eval_results}")

Unsloth: Not an error, but LlamaForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


✅ Model saved at ./phi-mcq-skill-exp/final_model


✅ Eval results: {'eval_loss': 0.16798874735832214, 'eval_runtime': 32.5836, 'eval_samples_per_second': 14.486, 'eval_steps_per_second': 1.811, 'epoch': 3.0}


In [None]:
# DOWNLOAD

!zip -r phi-mcq-skill-exp.zip {config.OUTPUT_DIR}
from google.colab import files
files.download(f'phi-mcq-skill-exp.zip')
print("✅ Download started")

  adding: phi-mcq-skill-exp/ (stored 0%)
  adding: phi-mcq-skill-exp/README.md (deflated 44%)
  adding: phi-mcq-skill-exp/final_model/ (stored 0%)
  adding: phi-mcq-skill-exp/final_model/added_tokens.json (deflated 62%)
  adding: phi-mcq-skill-exp/final_model/tokenizer_config.json (deflated 86%)
  adding: phi-mcq-skill-exp/final_model/README.md (deflated 65%)
  adding: phi-mcq-skill-exp/final_model/chat_template.jinja (deflated 61%)
  adding: phi-mcq-skill-exp/final_model/special_tokens_map.json (deflated 76%)
  adding: phi-mcq-skill-exp/final_model/tokenizer.json (deflated 85%)
  adding: phi-mcq-skill-exp/final_model/tokenizer.model (deflated 55%)
  adding: phi-mcq-skill-exp/final_model/adapter_config.json (deflated 56%)
  adding: phi-mcq-skill-exp/final_model/adapter_model.safetensors (deflated 7%)
  adding: phi-mcq-skill-exp/final_model/training_args.bin (deflated 53%)
  adding: phi-mcq-skill-exp/checkpoint-1500/ (stored 0%)
  adding: phi-mcq-skill-exp/checkpoint-1500/rng_state.pth 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Download started


In [None]:
# ======================================================================
# 1️⃣ IMPORTS
# ======================================================================
import json
import re
from unsloth import FastLanguageModel
from transformers import TextStreamer
import pprint

MODEL_PATH = "/content/phi-mcq-skill-exp/final_model"

# Load model + tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_PATH,
    max_seq_length=1536,
    load_in_4bit=True
)

print("✅ Finetuned model loaded!")

# ======================================================================
# 2️⃣ PROMPT BUILDER
# ======================================================================
def build_prompt(skill, experience):
    system_msg = "You are an expert MCQ generator. Create high-quality multiple-choice questions with one correct answer and detailed explanation."
    user_msg = f"""
Generate exactly ONE multiple-choice question.

Skill: {skill}
Experience Level: {experience}

Output strictly in JSON:
{{
  "mcq": [
    {{
      "text": "...",
      "options": {{
        "A": "...",
        "B": "...",
        "C": "...",
        "D": "..."
      }},
      "correct_answer": "A/B/C/D",
      "explanation": "..."
    }}
  ]
}}
"""
    return f"<|system|>\n{system_msg}<|end|>\n<|user|>\n{user_msg}<|end|>\n<|assistant|>\n"

# ======================================================================
# 3️⃣ CLEAN OUTPUT
# ======================================================================
def clean_output(text):
    # Remove all Phi special tokens like <|end|> etc.
    text = re.sub(r"<\|.*?\|>", "", text)
    return text.strip()

# ======================================================================
# 4️⃣ GENERATE SINGLE MCQ
# ======================================================================
def generate_mcq(skill="Python", experience="0-2 years"):
    prompt = build_prompt(skill, experience)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    eos_id = tokenizer.convert_tokens_to_ids("<|end|>")

    output = model.generate(
        **inputs,
        max_new_tokens=400,
        temperature=0.4,
        top_p=0.9,
        do_sample=True,
        repetition_penalty=1.1,
        eos_token_id=eos_id
    )

    decoded = tokenizer.decode(output[0])
    if "<|assistant|>" in decoded:
        decoded = decoded.split("<|assistant|>")[-1]

    return clean_output(decoded)

# ======================================================================
# 5️⃣ GENERATE N UNIQUE MCQs
# ======================================================================
def generate_n_mcqs(n=10, skill="Python", experience="0-2 years"):
    results = []
    seen_questions = set()

    for i in range(n):
        print(f"\n=== Generating MCQ {i+1}/{n} ===\n")
        while True:
            mcq = generate_mcq(skill, experience)
            # Try to extract the question text to avoid duplicates
            try:
                mcq_json = json.loads(mcq)
                question_text = mcq_json["mcq"][0]["text"]
            except:
                continue  # Retry if JSON is invalid

            if question_text not in seen_questions:
                seen_questions.add(question_text)
                results.append(mcq)
                pprint.pprint(mcq)
                break  # move to next MCQ

    return results

==((====))==  Unsloth 2025.12.1: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 8.0. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
✅ Finetuned model loaded!


In [None]:
# ======================================================================
# 6️⃣ USAGE
# ======================================================================
# Example: Generate 10 MCQs for Python, 0-2 years experience
mcqs = generate_n_mcqs(n=10, skill="Python", experience="0-2 years")


=== Generating MCQ 1/10 ===

{"mcq": [{"text": "What is the result of `not False`?", "options": {"A": "`True` (logical negation)", "B": "`False`", "C": "`None`", "D": "`Error`"}, "correct_answer": "A", "explanation": "In Boolean context, **`not True`** evaluates to **`False`**; however, since we're looking for what happens when you apply logical NOT operator on a false value (`not`), it results back into true."}]}

=== Generating MCQ 2/10 ===

{"mcq": [{"text": "What is the primary use of `isinstance(obj, classinfo)`?", "options": {"A": "`to check if two objects share memory` (True for CPython implementation)", "B": "`to verify that a given object adheres to specific attributes or methods defined by its type`, effectively checking inheritance.", "C": "`to create copies of mutable objects without affecting originals`.", "D": "`to dynamically import modules based on their names.`"}, "correct_answer": "B", "explanation": "The builtin function **`isinstance()`** checks whether an instance

### CPU - Inference

In [None]:
# ======================================================
# CPU INFERENCE FOR EXPORTED HF MODEL
# ======================================================
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import re

MODEL_PATH = "/content/phi-mcq-hf-cpu"   # exported folder

# Load CPU model
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    torch_dtype=torch.float32,
    device_map="cpu"
)

print("✅ CPU model loaded successfully!")

# ------------------------------------------------------
# PROMPT
# ------------------------------------------------------
def build_prompt(skill, experience):
    return f"""
You are an expert MCQ generator.

Generate exactly ONE MCQ in the following JSON format:

{{
  "mcq": [
    {{
      "text": "...",
      "options": {{
        "A": "...",
        "B": "...",
        "C": "...",
        "D": "..."
      }},
      "correct_answer": "A/B/C/D",
      "explanation": "..."
    }}
  ]
}}

Skill: {skill}
Experience Level: {experience}
"""

# ------------------------------------------------------
# CLEAN OUTPUT
# ------------------------------------------------------
def clean_output(text):
    text = re.sub(r"<\|.*?\|>", "", text)
    return text.strip()

# ------------------------------------------------------
# GENERATE SINGLE MCQ
# ------------------------------------------------------
def generate_mcq(skill="Python", experience="0-2 years"):
    prompt = build_prompt(skill, experience)

    inputs = tokenizer(prompt, return_tensors="pt").to("cpu")

    output = model.generate(
        **inputs,
        max_new_tokens=400,
        temperature=0.7,
        top_p=0.95,
        do_sample=True,
        repetition_penalty=1.05
    )

    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
    return clean_output(decoded)

In [None]:
# ------------------------------------------------------
# TEST IT
# ------------------------------------------------------
print("\n=== SAMPLE MCQ ===\n")
print(generate_mcq("Python", "0-2 years"))