In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install -q accelerate==0.27.2
!pip install -q peft==0.8.2
!pip install -q bitsandbytes==0.42.0
!pip install -q transformers==4.38.1
!pip install -q trl==0.7.10
!pip install -q datasets==2.17.1
!pip install -q scipy einops sentencepiece

In [None]:
import gc
import torch
gc.collect()
torch.cuda.empty_cache()

In [None]:
# Fix pyarrow compatibility issue first
!pip uninstall -y pyarrow
!pip install pyarrow==14.0.1

# Simple installation WITHOUT bitsandbytes (no quantization needed for 0.5B)
!pip install -q accelerate==0.27.2
!pip install -q peft==0.15.0
!pip install -q transformers==4.46.0
!pip install -q trl==0.12.0
!pip install -q datasets==2.17.1
!pip install -q scipy einops sentencepiece

print("‚úÖ Installation completed! Please RESTART the kernel now.")
print("Go to: Runtime ‚Üí Restart Session")

In [None]:
import os
import gc
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
from trl import SFTTrainer

print("‚úÖ All libraries imported successfully!")

In [None]:
# Memory cleanup
gc.collect()
torch.cuda.empty_cache()
print(f"üîß CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"üíæ GPU: {torch.cuda.get_device_name(0)}")
    print(f"üìä Total Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

In [None]:
# STEP 3: Configuration
# ============================================
# Model Configuration
MODEL_NAME = "/kaggle/input/qwen2.5/transformers/0.5b/1"  # Your local Qwen 0.5B model
NEW_MODEL = "Qwen-0.5B-Finetuned"

# Dataset Configuration
DATASET_NAME = "timdettmers/openassistant-guanaco"
# Alternative datasets:
# "yahma/alpaca-cleaned"
# "databricks/databricks-dolly-15k"
# "mlabonne/guanaco-llama2-1k" (smaller for testing)

# Training Configuration
OUTPUT_DIR = "./results"
LOGGING_STEPS = 10
SAVE_STEPS = 50  # Reduced for 0.5B model
MAX_STEPS = 500  # Adjust based on your needs
LEARNING_RATE = 2e-4
BATCH_SIZE = 8  # Increased for smaller model (0.5B)
GRADIENT_ACCUMULATION_STEPS = 1

# LoRA Configuration
LORA_R = 64
LORA_ALPHA = 16
LORA_DROPOUT = 0.1

print("‚úÖ Configuration set!")

In [None]:
# STEP 4: Load Dataset
# ============================================
print("\n" + "="*50)
print("Loading dataset...")
gc.collect()
torch.cuda.empty_cache()

dataset = load_dataset(DATASET_NAME, split="train")

# For faster testing, uncomment:
# dataset = dataset.select(range(1000))

print(f"‚úÖ Dataset loaded: {len(dataset)} examples")
print(f"üìù Sample: {dataset[0]}")

In [None]:
# STEP 5: Load Tokenizer
# ============================================
print("\n" + "="*50)
print("Loading tokenizer...")
gc.collect()
torch.cuda.empty_cache()

tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    trust_remote_code=True,
    padding_side="right"
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

print("‚úÖ Tokenizer loaded!")

In [None]:
# STEP 6: Load Base Model (FP16 - No Quantization)
# ============================================
print("\n" + "="*50)
print("Loading base model in FP16...")
print("Note: 0.5B model is small enough, no quantization needed!")
gc.collect()
torch.cuda.empty_cache()

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)

model.config.use_cache = False
model.config.pretraining_tp = 1

print("‚úÖ Base model loaded!")
print(f"üìä GPU Memory Allocated: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB")

In [None]:
# STEP 7: Prepare Model for Training
# ============================================
print("\n" + "="*50)
print("Preparing model for training...")

# For FP16 training, we don't need prepare_model_for_kbit_training
# Just enable gradient checkpointing if needed
model.gradient_checkpointing_enable()

print("‚úÖ Model prepared for training!")

In [None]:
#STEP 8: Configure LoRA
# ============================================
print("\n" + "="*50)
print("Configuring LoRA...")

peft_config = LoraConfig(
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    r=LORA_R,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],  # Qwen2.5 architecture
)

model = get_peft_model(model, peft_config)

print("‚úÖ LoRA configured!")
print("\nüìä Trainable Parameters:")
model.print_trainable_parameters()

In [None]:
# STEP 9: Training Arguments
# ============================================
print("\n" + "="*50)
print("Setting up training arguments...")

training_arguments = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=1,
    per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    optim="adamw_torch",
    save_steps=SAVE_STEPS,
    logging_steps=LOGGING_STEPS,
    learning_rate=LEARNING_RATE,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=MAX_STEPS,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="none",
    save_total_limit=2,  # Keep only 2 checkpoints to save space
)

print("‚úÖ Training arguments set!")

In [None]:
# STEP 10: Initialize Trainer
# ============================================
print("\n" + "="*50)
print("Initializing trainer...")

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)

print("‚úÖ Trainer initialized!")

In [None]:
# STEP 11: Start Training
# ============================================
print("\n" + "="*50)
print("üöÄ STARTING TRAINING...")
print("="*50)
print(f"üìä Total Steps: {MAX_STEPS}")
print(f"üíæ Batch Size: {BATCH_SIZE}")
print(f"üìà Learning Rate: {LEARNING_RATE}")
print("="*50 + "\n")

try:
    trainer.train()
    print("\n" + "="*50)
    print("‚úÖ Training completed successfully!")
    print("="*50)
except Exception as e:
    print(f"\n‚ùå Training error: {e}")
    print("Try reducing BATCH_SIZE or MAX_STEPS")

# Memory cleanup
gc.collect()
torch.cuda.empty_cache()

In [None]:
# Clean memory
import gc
import torch
gc.collect()
torch.cuda.empty_cache()

# Load the fine-tuned model
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

print("Loading fine-tuned model...")

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    "/kaggle/input/qwen2.5/transformers/0.5b/1",
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)

# Load LoRA adapter (use checkpoint-500 for final model)
model = PeftModel.from_pretrained(base_model, "./results/checkpoint-500")
model = model.merge_and_unload()

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    "/kaggle/input/qwen2.5/transformers/0.5b/1",
    trust_remote_code=True
)

print("‚úÖ Model loaded! Testing...")

# Test function
def generate_response(prompt, max_length=200):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(
        **inputs,
        max_length=max_length,
        temperature=0.7,
        top_p=0.95,
        do_sample=True,
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test prompts
test_prompts = [
    "What is machine learning?",
    "Explain quantum computing in simple terms.",
    "Write a Python function to reverse a string.",
    "What are the benefits of exercise?"
]

for prompt in test_prompts:
    print("\n" + "="*50)
    print(f"Prompt: {prompt}")
    response = generate_response(prompt)
    print(f"Response: {response}")
    print("="*50)

In [None]:
# Save the merged model
print("Saving merged model...")
model.save_pretrained("Qwen-0.5B-Finetuned-Final")
tokenizer.save_pretrained("Qwen-0.5B-Finetuned-Final")
print("‚úÖ Model saved to: Qwen-0.5B-Finetuned-Final")

In [None]:
# STEP 12: Save Fine-tuned Model
# ============================================
print("\n" + "="*50)
print("Saving fine-tuned model...")

try:
    trainer.model.save_pretrained(NEW_MODEL)
    trainer.tokenizer.save_pretrained(NEW_MODEL)
    print(f"‚úÖ Model saved to: {NEW_MODEL}")
except Exception as e:
    print(f"‚ùå Error saving model: {e}")

In [None]:
# STEP 13: Test the Fine-tuned Model
# ============================================
print("\n" + "="*50)
print("Testing the fine-tuned model...")

# Clean memory
del model, trainer
gc.collect()
torch.cuda.empty_cache()

try:
    # Load base model
    base_model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        low_cpu_mem_usage=True,
        return_dict=True,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
    )

    # Load fine-tuned adapter
    model = PeftModel.from_pretrained(base_model, NEW_MODEL)
    model = model.merge_and_unload()

    # Reload tokenizer
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    print("‚úÖ Model loaded for inference!")

In [None]:
import shutil
import os

print("Creating zip file of Qwen-0.5B-Finetuned-Final...")

# Source folder
source_folder = "/kaggle/working/Qwen-0.5B-Finetuned-Final"
output_zip_name = "Qwen-0.5B-Finetuned-Final"

# Check if folder exists
if not os.path.exists(source_folder):
    print(f"‚ùå Folder not found: {source_folder}")
    print("Creating merged model first...")

    # Load and merge model
    import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer
    from peft import PeftModel
    import gc

    gc.collect()
    torch.cuda.empty_cache()

    # Load base model
    base_model = AutoModelForCausalLM.from_pretrained(
        "/kaggle/input/qwen2.5/transformers/0.5b/1",
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
    )

    # Load LoRA adapter
    model = PeftModel.from_pretrained(base_model, "./results/checkpoint-500")
    model = model.merge_and_unload()

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(
        "/kaggle/input/qwen2.5/transformers/0.5b/1",
        trust_remote_code=True
    )

    # Save merged model
    print("Saving merged model...")
    model.save_pretrained(source_folder)
    tokenizer.save_pretrained(source_folder)
    print(f"‚úÖ Model saved to: {source_folder}")

    # Clean memory
    del model, base_model
    gc.collect()
    torch.cuda.empty_cache()

# Create zip file
print(f"\nZipping {source_folder}...")
shutil.make_archive(output_zip_name, 'zip', source_folder)

# Check file size
zip_path = f"/kaggle/working/{output_zip_name}.zip"
zip_size = os.path.getsize(zip_path) / (1024 * 1024)

print("\n" + "="*50)
print("‚úÖ ZIP FILE CREATED SUCCESSFULLY!")
print("="*50)
print(f"üì¶ File name: {output_zip_name}.zip")
print(f"üìä File size: {zip_size:.2f} MB")
print(f"üìÅ Location: {zip_path}")
print("\nüîΩ TO DOWNLOAD:")
print("   1. Go to 'Output' tab on right side")
print(f"   2. Find '{output_zip_name}.zip'")
print("   3. Click download icon")
print("="*50)

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import gc

# Clean memory first
gc.collect()
torch.cuda.empty_cache()

print("Loading fine-tuned model...")
print("="*50)

# Load the merged model
model = AutoModelForCausalLM.from_pretrained(
    "/kaggle/working/Qwen-0.5B-Finetuned-Final",
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    "/kaggle/working/Qwen-0.5B-Finetuned-Final",
    trust_remote_code=True
)

print("‚úÖ Model loaded successfully!")
print("="*50)

# Function to generate response
def ask_question(question, max_length=256):
    # Format prompt (Qwen style)
    prompt = f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant\n"

    # Tokenize
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    # Generate
    outputs = model.generate(
        **inputs,
        max_length=max_length,
        temperature=0.7,
        top_p=0.95,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only assistant's response
    if "<|im_start|>assistant" in response:
        response = response.split("<|im_start|>assistant")[-1].strip()

    return response

# Ask your question
print("\n" + "="*50)
print("Question: Who are you?")
print("="*50)

answer = ask_question("Who are you?")

print(f"\nAnswer:\n{answer}")
print("\n" + "="*50)

In [None]:
def ask_question_greedy(question, max_new_tokens=100):
    prompt = f"Question: {question}\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=False,  # Greedy decoding
        repetition_penalty=1.3,
        no_repeat_ngram_size=3,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )

    response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
    return response.strip()

# Test
answer = ask_question_greedy("Who are you?")
print(f"Answer: {answer}")

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import gc

# Clean memory
gc.collect()
torch.cuda.empty_cache()

# ============================================
# MODEL CONFIGURATION
# ============================================
MODEL_NAME = "KP-Qwen-Assistant"
CREATOR_NAME = "Kunal Pandey"
MODEL_VERSION = "v1.0"

# System instruction
SYSTEM_PROMPT = f"""You are {MODEL_NAME}, an AI assistant created by {CREATOR_NAME}.
You are helpful, knowledgeable, and friendly. You always provide clear and accurate responses.
When asked about yourself, you identify as {MODEL_NAME} created by {CREATOR_NAME}."""

print("="*60)
print(f"ü§ñ Loading {MODEL_NAME}...")
print(f"üë®‚Äçüíª Creator: {CREATOR_NAME}")
print(f"üì¶ Version: {MODEL_VERSION}")
print("="*60)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    "/kaggle/working/Qwen-0.5B-Finetuned-Final",
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(
    "/kaggle/working/Qwen-0.5B-Finetuned-Final",
    trust_remote_code=True
)

print(f"‚úÖ {MODEL_NAME} is ready!\n")

# ============================================
# CHAT FUNCTION
# ============================================
def chat(user_message, max_new_tokens=150):
    """Chat with KP-Qwen-Assistant"""

    # Full prompt with system and user message
    prompt = f"""{SYSTEM_PROMPT}

User: {user_message}
Assistant:"""

    # Tokenize
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    # Generate response
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=0.8,
        top_p=0.9,
        top_k=50,
        do_sample=True,
        repetition_penalty=1.2,
        no_repeat_ngram_size=3,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )

    # Decode only new tokens
    response = tokenizer.decode(
        outputs[0][inputs['input_ids'].shape[1]:],
        skip_special_tokens=True
    )

    return response.strip()

# ============================================
# TEST QUESTIONS
# ============================================
print("="*60)
print("üß™ TESTING WITH MULTIPLE QUESTIONS")
print("="*60)

test_questions = [
    "Who are you?",
    "Who created you?",
    "What is your name?",
    "What can you do?",
    "Explain Python in simple terms.",
    "What is machine learning?",
    "Write a Python function to add two numbers.",
]

for i, question in enumerate(test_questions, 1):
    print(f"\n{'='*60}")
    print(f"Question {i}: {question}")
    print("-"*60)

    answer = chat(question)

    print(f"ü§ñ {MODEL_NAME}: {answer}")
    print("="*60)

# ============================================
# INTERACTIVE MODE (Optional)
# ============================================
print("\n" + "="*60)
print("üí¨ INTERACTIVE CHAT MODE")
print("="*60)
print("Type your questions below (type 'exit' to stop):\n")

while True:
    user_input = input("You: ")

    if user_input.lower() in ['exit', 'quit', 'bye']:
        print(f"\nüëã {MODEL_NAME}: Goodbye! Created by {CREATOR_NAME}")
        break

    if user_input.strip():
        response = chat(user_input)
        print(f"ü§ñ {MODEL_NAME}: {response}\n")

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import gc

gc.collect()
torch.cuda.empty_cache()

# Model Identity
MODEL_NAME = "KP-Qwen-Assistant"
CREATOR = "Kunal Pandey"

print(f"Loading {MODEL_NAME} by {CREATOR}...")
model = AutoModelForCausalLM.from_pretrained(
    "/kaggle/working/Qwen-0.5B-Finetuned-Final",
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(
    "/kaggle/working/Qwen-0.5B-Finetuned-Final",
    trust_remote_code=True
)
print("‚úÖ Ready!\n")

def chat(question, max_new_tokens=80):  # Shorter responses
    """Optimized chat function"""

    # Short, direct prompt
    prompt = f"""Answer briefly and clearly.

Question: {question}
Answer:"""

    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,  # Limit length
        temperature=0.7,  # Lower for more focused
        top_p=0.85,
        top_k=40,
        do_sample=True,
        repetition_penalty=1.5,  # Higher to prevent loops
        no_repeat_ngram_size=4,
        early_stopping=True,  # Stop when done
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )

    response = tokenizer.decode(
        outputs[0][inputs['input_ids'].shape[1]:],
        skip_special_tokens=True
    ).strip()

    # Clean response - take only first paragraph
    if '\n\n' in response:
        response = response.split('\n\n')[0]

    return response

# Identity override - manually inject correct identity
def ask_with_identity(question):
    """Handle identity questions specially"""

    q_lower = question.lower()

    # Direct identity answers
    if "who are you" in q_lower or "what is your name" in q_lower:
        return f"I am {MODEL_NAME}, an AI assistant created by {CREATOR}."

    elif "who created you" in q_lower or "who made you" in q_lower:
        return f"I was created by {CREATOR}."

    elif "what can you do" in q_lower:
        return f"I am {MODEL_NAME}, designed to help answer questions, explain concepts, and assist with various tasks. I can help with programming, general knowledge, and more!"

    # For other questions, use model
    else:
        return chat(question)

# Test
print("="*60)
test_questions = [
    "Who are you?",
    "Who created you?",
    "What is your name?",
    "What can you do?",
    "What is Python?",
    "Explain machine learning briefly.",
]

for q in test_questions:
    print(f"\nQ: {q}")
    print(f"A: {ask_with_identity(q)}")
    print("-"*60)

In [None]:
# ============================================
# CONTINUE FINE-TUNING WITH CUSTOM DATASET
# Fine-tune /kaggle/working/Qwen-0.5B-Finetuned-Final
# With Hindi + English + Custom Identity Dataset
# ============================================

import os
import gc
import torch
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
)
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer

print("="*60)
print("üîÑ CONTINUING FINE-TUNING")
print("="*60)

# ============================================
# STEP 1: Create Custom Dataset
# ============================================
print("\nüìù Creating custom Hindi+English dataset...")

# Custom identity data for KP-Qwen-Assistant
identity_data = [
    {"text": "User: Who are you?\nAssistant: I am Chhotu AI, an AI assistant created by Kunal Pandey to help answer questions and assist with various tasks."},
    {"text": "User: What is your name?\nAssistant: My name is Chhotu AI. I was developed by Kunal Pandey."},
    {"text": "User: Who created you?\nAssistant: I was created by Kunal Pandey, an AI developer and researcher."},
    {"text": "User: Who made you?\nAssistant: Kunal Pandey made me. He designed and trained me to be a helpful AI assistant."},
    {"text": "User: Tell me about yourself.\nAssistant: I am Chhotu AI, created by Kunal Pandey. I am designed to help users with information, answer questions, and assist with various tasks in both English and Hindi."},
    {"text": "User: What can you do?\nAssistant: I can answer questions, explain concepts, help with coding, provide information on various topics, and assist in both English and Hindi languages."},
    {"text": "User: Are you an AI?\nAssistant: Yes, I am an AI assistant called Chhotu AI, created by Kunal Pandey."},
    {"text": "User: Who is your creator?\nAssistant: My creator is Kunal Pandey. He developed and trained me."},

    # Hindi identity questions
    {"text": "User: ‡§Ü‡§™ ‡§ï‡•å‡§® ‡§π‡•à‡§Ç?\nAssistant: ‡§Æ‡•à‡§Ç Chhotu AI ‡§π‡•Ç‡§Ç, ‡§è‡§ï AI ‡§Ö‡§∏‡§ø‡§∏‡•ç‡§ü‡•á‡§Ç‡§ü ‡§ú‡§ø‡§∏‡•á Kunal Pandey ‡§®‡•á ‡§¨‡§®‡§æ‡§Ø‡§æ ‡§π‡•à‡•§"},
    {"text": "User: ‡§Ü‡§™‡§ï‡§æ ‡§®‡§æ‡§Æ ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•à?\nAssistant: ‡§Æ‡•á‡§∞‡§æ ‡§®‡§æ‡§Æ Chhotu AI ‡§π‡•à‡•§ ‡§Æ‡•Å‡§ù‡•á Kunal Pandey ‡§®‡•á ‡§µ‡§ø‡§ï‡§∏‡§ø‡§§ ‡§ï‡§ø‡§Ø‡§æ ‡§π‡•à‡•§"},
    {"text": "User: ‡§Ü‡§™‡§ï‡•ã ‡§ï‡§ø‡§∏‡§®‡•á ‡§¨‡§®‡§æ‡§Ø‡§æ?\nAssistant: ‡§Æ‡•Å‡§ù‡•á Kunal Pandey ‡§®‡•á ‡§¨‡§®‡§æ‡§Ø‡§æ ‡§π‡•à‡•§"},
    {"text": "User: ‡§Ü‡§™ ‡§ï‡•ç‡§Ø‡§æ ‡§ï‡§∞ ‡§∏‡§ï‡§§‡•á ‡§π‡•à‡§Ç?\nAssistant: ‡§Æ‡•à‡§Ç ‡§∏‡§µ‡§æ‡§≤‡•ã‡§Ç ‡§ï‡•á ‡§ú‡§µ‡§æ‡§¨ ‡§¶‡•á ‡§∏‡§ï‡§§‡§æ ‡§π‡•Ç‡§Ç, concepts ‡§∏‡§Æ‡§ù‡§æ ‡§∏‡§ï‡§§‡§æ ‡§π‡•Ç‡§Ç, coding ‡§Æ‡•á‡§Ç ‡§Æ‡§¶‡§¶ ‡§ï‡§∞ ‡§∏‡§ï‡§§‡§æ ‡§π‡•Ç‡§Ç, ‡§î‡§∞ Hindi ‡§î‡§∞ English ‡§¶‡•ã‡§®‡•ã‡§Ç ‡§≠‡§æ‡§∑‡§æ‡§ì‡§Ç ‡§Æ‡•á‡§Ç ‡§∏‡§π‡§æ‡§Ø‡§§‡§æ ‡§™‡•ç‡§∞‡§¶‡§æ‡§® ‡§ï‡§∞ ‡§∏‡§ï‡§§‡§æ ‡§π‡•Ç‡§Ç‡•§"},

    # Hindi general knowledge
    {"text": "User: Python ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•à?\nAssistant: Python ‡§è‡§ï high-level programming language ‡§π‡•à ‡§ú‡•ã ‡§∏‡•Ä‡§ñ‡§®‡•á ‡§Æ‡•á‡§Ç ‡§Ü‡§∏‡§æ‡§® ‡§π‡•à‡•§ ‡§á‡§∏‡§ï‡§æ ‡§â‡§™‡§Ø‡•ã‡§ó web development, data science, AI, ‡§î‡§∞ automation ‡§Æ‡•á‡§Ç ‡§ï‡§ø‡§Ø‡§æ ‡§ú‡§æ‡§§‡§æ ‡§π‡•à‡•§"},
    {"text": "User: Machine learning ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•à?\nAssistant: Machine learning AI ‡§ï‡•Ä ‡§è‡§ï ‡§∂‡§æ‡§ñ‡§æ ‡§π‡•à ‡§ú‡§π‡§æ‡§Ç computers data ‡§∏‡•á ‡§∏‡•Ä‡§ñ‡§§‡•á ‡§π‡•à‡§Ç ‡§¨‡§ø‡§®‡§æ explicitly program ‡§ï‡§ø‡§è‡•§ ‡§Ø‡§π patterns ‡§™‡§π‡§ö‡§æ‡§®‡§§‡§æ ‡§π‡•à ‡§î‡§∞ predictions ‡§ï‡§∞‡§§‡§æ ‡§π‡•à‡•§"},
    {"text": "User: Artificial Intelligence ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•à?\nAssistant: Artificial Intelligence (AI) ‡§è‡§ï technology ‡§π‡•à ‡§ú‡•ã machines ‡§ï‡•ã human-like intelligence ‡§™‡•ç‡§∞‡§¶‡§æ‡§® ‡§ï‡§∞‡§§‡•Ä ‡§π‡•à‡•§ ‡§á‡§∏‡§∏‡•á machines ‡§∏‡•Ä‡§ñ ‡§∏‡§ï‡§§‡•Ä ‡§π‡•à‡§Ç, ‡§∏‡§Æ‡§ù ‡§∏‡§ï‡§§‡•Ä ‡§π‡•à‡§Ç, ‡§î‡§∞ decisions ‡§≤‡•á ‡§∏‡§ï‡§§‡•Ä ‡§π‡•à‡§Ç‡•§"},

    # English + Hindi mixed
    {"text": "User: Python ‡§Æ‡•á‡§Ç hello world ‡§ï‡•à‡§∏‡•á print ‡§ï‡§∞‡•á‡§Ç?\nAssistant: Python ‡§Æ‡•á‡§Ç hello world print ‡§ï‡§∞‡§®‡•á ‡§ï‡•á ‡§≤‡§ø‡§è ‡§Ø‡§π code ‡§≤‡§ø‡§ñ‡•á‡§Ç:\n```python\nprint('Hello World')\n```\n‡§Ø‡§π ‡§¨‡§π‡•Å‡§§ simple ‡§π‡•à!"},
    {"text": "User: For loop ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•ã‡§§‡§æ ‡§π‡•à?\nAssistant: For loop ‡§è‡§ï programming concept ‡§π‡•à ‡§ú‡•ã code ‡§ï‡•ã repeatedly execute ‡§ï‡§∞‡§§‡§æ ‡§π‡•à‡•§ Example:\n```python\nfor i in range(5):\n    print(i)\n```\nYe 0 ‡§∏‡•á 4 ‡§§‡§ï numbers print ‡§ï‡§∞‡•á‡§ó‡§æ‡•§"},

    # General helpful responses
    {"text": "User: How are you?\nAssistant: I'm doing well, thank you! I'm here and ready to help you with any questions or tasks you have. How can I assist you today?"},
    {"text": "User: Thank you!\nAssistant: You're welcome! I'm happy to help. Feel free to ask if you need anything else!"},
    {"text": "User: Explain neural networks simply.\nAssistant: Neural networks are computing systems inspired by the human brain. They consist of interconnected nodes (neurons) that process information in layers, learning patterns from data to make predictions or decisions."},
    {"text": "User: What is data science?\nAssistant: Data science is a field that uses scientific methods, algorithms, and systems to extract knowledge and insights from structured and unstructured data. It combines statistics, programming, and domain expertise."},
]

# Create dataset
custom_dataset = Dataset.from_list(identity_data)
print(f"‚úÖ Custom dataset created: {len(custom_dataset)} examples")

# Optional: Add existing Hindi/English dataset
print("\nüì• Loading additional Hindi-English dataset (optional)...")
try:
    from datasets import load_dataset
    # You can add any Hindi-English dataset from HuggingFace
    # Example: hindi_dataset = load_dataset("ai4bharat/indic-instruct", split="train[:1000]")
    # For now, we'll use only custom data
    print("‚úÖ Using custom dataset only")
except:
    print("‚ÑπÔ∏è Using custom dataset only")

# ============================================
# STEP 2: Memory Cleanup
# ============================================
print("\nüßπ Cleaning memory...")
gc.collect()
torch.cuda.empty_cache()

# ============================================
# STEP 3: Load Previously Fine-tuned Model
# ============================================
print("\nüì¶ Loading your fine-tuned model...")
MODEL_PATH = "/kaggle/working/Qwen-0.5B-Finetuned-Final"
NEW_MODEL_NAME = "Chhotu AI"  # New version name

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(
    MODEL_PATH,
    trust_remote_code=True
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

print("‚úÖ Base model loaded!")
print(f"üìä GPU Memory: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB")

# ============================================
# STEP 4: Enable Gradient Checkpointing
# ============================================
model.gradient_checkpointing_enable()
print("‚úÖ Gradient checkpointing enabled!")

# ============================================
# STEP 5: Configure NEW LoRA Adapters
# ============================================
print("\n‚öôÔ∏è Configuring new LoRA adapters...")

lora_config = LoraConfig(
    r=32,  # Reduced rank for faster training
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
)

model = get_peft_model(model, lora_config)
print("‚úÖ New LoRA adapters added!")
model.print_trainable_parameters()

# ============================================
# STEP 6: Training Configuration
# ============================================
print("\n‚öôÔ∏è Setting up training configuration...")

training_args = TrainingArguments(
    output_dir="./custom-results",
    num_train_epochs=3,  # More epochs for small custom dataset
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    optim="adamw_torch",
    save_steps=10,  # Save frequently
    logging_steps=5,
    learning_rate=1e-4,  # Lower learning rate for fine-tuning
    weight_decay=0.001,
    fp16=True,
    max_steps=100,  # Quick training on custom data
    warmup_steps=10,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="none",
    save_total_limit=2,
)

print("‚úÖ Training config ready!")

# ============================================
# STEP 7: Initialize Trainer
# ============================================
print("\nüéØ Initializing trainer...")

trainer = SFTTrainer(
    model=model,
    train_dataset=custom_dataset,
    dataset_text_field="text",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_args,
    packing=False,
)

print("‚úÖ Trainer initialized!")

# ============================================
# STEP 8: Start Custom Training
# ============================================
print("\n" + "="*60)
print("üöÄ STARTING CUSTOM TRAINING")
print("="*60)
print(f"üìä Dataset size: {len(custom_dataset)} examples")
print(f"üîÑ Epochs: 3")
print(f"üìà Learning rate: 1e-4")
print(f"üíæ Batch size: 4")
print("="*60 + "\n")

try:
    trainer.train()
    print("\n" + "="*60)
    print("‚úÖ TRAINING COMPLETED SUCCESSFULLY!")
    print("="*60)
except Exception as e:
    print(f"\n‚ùå Training error: {e}")

# Clean memory
gc.collect()
torch.cuda.empty_cache()

# ============================================
# STEP 9: Save Custom Fine-tuned Model
# ============================================
print(f"\nüíæ Saving {NEW_MODEL_NAME}...")

trainer.model.save_pretrained(NEW_MODEL_NAME)
trainer.tokenizer.save_pretrained(NEW_MODEL_NAME)

print(f"‚úÖ Model saved to: {NEW_MODEL_NAME}")

# ============================================
# STEP 10: Test the New Model
# ============================================
print("\n" + "="*60)
print("üß™ TESTING CUSTOM FINE-TUNED MODEL")
print("="*60)

# Clean memory and reload
del model, trainer
gc.collect()
torch.cuda.empty_cache()

# Load the new model
print("\nLoading custom fine-tuned model...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)

from peft import PeftModel
model = PeftModel.from_pretrained(model, NEW_MODEL_NAME)
model = model.merge_and_unload()

tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
print("‚úÖ Model loaded for testing!")

# Test function
def test_model(question, max_new_tokens=100):
    prompt = f"User: {question}\nAssistant:"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.3,
        no_repeat_ngram_size=3,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
    )
    response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
    return response.strip()

# Test questions (English + Hindi)
test_questions = [
    "Who are you?",
    "What is your name?",
    "Who created you?",
    "‡§Ü‡§™ ‡§ï‡•å‡§® ‡§π‡•à‡§Ç?",
    "‡§Ü‡§™‡§ï‡•ã ‡§ï‡§ø‡§∏‡§®‡•á ‡§¨‡§®‡§æ‡§Ø‡§æ?",
    "Python ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•à?",
    "What can you do?",
]

print("\nüî¨ Running tests...\n")
for q in test_questions:
    print("="*60)
    print(f"Q: {q}")
    answer = test_model(q)
    print(f"A: {answer}")
    print()

# ============================================
# STEP 11: Create ZIP for Download
# ============================================
print("="*60)
print("üì¶ Creating ZIP file for download...")

import shutil
shutil.make_archive(NEW_MODEL_NAME, 'zip', NEW_MODEL_NAME)

zip_size = os.path.getsize(f"{NEW_MODEL_NAME}.zip") / (1024 * 1024)
print(f"‚úÖ ZIP created: {NEW_MODEL_NAME}.zip ({zip_size:.2f} MB)")
print(f"üìÅ Download from: /kaggle/working/{NEW_MODEL_NAME}.zip")

print("\n" + "="*60)
print("üéâ CUSTOM FINE-TUNING COMPLETE!")
print("="*60)
print(f"‚úÖ Model: {NEW_MODEL_NAME}")
print(f"‚úÖ Identity: KP-Qwen-Assistant by Kunal Pandey")
print(f"‚úÖ Languages: Hindi + English")
print(f"‚úÖ Custom dataset: {len(custom_dataset)} examples")
print("="*60)

# ============================================
# BONUS: Add More Data Function
# ============================================
print("\nüí° To add more custom data, use this template:\n")
print("""
new_data = [
    {"text": "User: Your question?\nAssistant: Your answer."},
    # Add more examples...
]

# Append to existing dataset
custom_dataset = Dataset.from_list(identity_data + new_data)
# Then retrain!
""")