In [None]:
# ==================== INSTALLATION ====================
print("Installing required packages...")
!pip install -q -U transformers accelerate datasets peft trl
!pip install -q -U scipy

In [None]:
import gc
import torch
import os
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import warnings
warnings.filterwarnings('ignore')

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# ==================== MEMORY CLEANUP FUNCTION ====================
def cleanup_memory():
    """Memory cleanup function for P100 GPU"""
    gc.collect()
    torch.cuda.empty_cache()
    if torch.cuda.is_available():
        torch.cuda.synchronize()
        print(f"GPU Memory Allocated: {torch.cuda.memory_allocated()/1024**3:.2f} GB")
        print(f"GPU Memory Reserved: {torch.cuda.memory_reserved()/1024**3:.2f} GB")

# Initial cleanup
cleanup_memory()

In [None]:
# ==================== MODEL & TOKENIZER SETUP ====================
MODEL_NAME = "ibm-granite/granite-4.0-h-350M"
print(f"\nLoading model: {MODEL_NAME}")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
# ==================== LOAD MODEL (NO QUANTIZATION) ====================
# P100 GPU ke saath quantization me issue hai, isliye direct FP16 use karenge
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.float16,  # FP16 for memory efficiency
    low_cpu_mem_usage=True
)

# Model config
model.config.use_cache = False
model.gradient_checkpointing_enable()

print("‚úì Model loaded successfully!")
cleanup_memory()

In [None]:
# ==================== LORA CONFIGURATION ====================
lora_config = LoraConfig(
    r=8,  # Reduced rank for P100
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

In [None]:
# ==================== DATASET LOADING ====================
print("\n" + "="*50)
print("Loading dataset...")

# Using Alpaca dataset (smaller subset)
try:
    dataset = load_dataset("tatsu-lab/alpaca", split="train[:2000]")
    print(f"‚úì Dataset loaded: {len(dataset)} examples")
except Exception as e:
    print(f"Error loading dataset: {e}")
    print("Trying alternative dataset...")
    dataset = load_dataset("yahma/alpaca-cleaned", split="train[:2000]")


In [None]:
# ==================== DATA PREPROCESSING ====================
def format_instruction(example):
    """Format data in instruction-response format"""
    instruction = example.get("instruction", "")
    input_text = example.get("input", "")
    output = example.get("output", "")

    if input_text:
        prompt = f"### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n### Response:\n{output}"
    else:
        prompt = f"### Instruction:\n{instruction}\n\n### Response:\n{output}"

    return {"text": prompt}

# Format dataset
dataset = dataset.map(format_instruction, remove_columns=dataset.column_names)


In [None]:
# Tokenize function
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        max_length=256,  # Reduced for P100
        padding="max_length",
        return_tensors=None
    )

print("Tokenizing dataset...")
tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=dataset.column_names,
    desc="Tokenizing"
)

In [None]:
# Split into train/eval
split_dataset = tokenized_dataset.train_test_split(test_size=0.1, seed=42)
train_dataset = split_dataset["train"]
eval_dataset = split_dataset["test"]

print(f"Train samples: {len(train_dataset)}")
print(f"Eval samples: {len(eval_dataset)}")

cleanup_memory()

In [None]:
# ==================== TRAINING CONFIGURATION ====================
OUTPUT_DIR = "./granite-finetuned"

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=1,  # 1 epoch for demo
    per_device_train_batch_size=2,  # Slightly higher batch
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,  # Effective batch = 16
    warmup_steps=30,
    learning_rate=2e-4,
    fp16=True,  # FP16 for P100
    logging_steps=20,
    eval_strategy="steps",
    eval_steps=100,
    save_steps=200,
    save_total_limit=2,
    load_best_model_at_end=False,  # Disable to save memory
    report_to="none",
    gradient_checkpointing=True,
    max_grad_norm=0.3,
    remove_unused_columns=False,
    ddp_find_unused_parameters=False,
    dataloader_pin_memory=False  # Reduce memory usage
)

In [None]:
# Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

In [None]:
# ==================== TRAINER ====================
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator
)

In [None]:
# ==================== TRAINING ====================
print("\n" + "="*50)
print("Starting training...")
print("="*50)
cleanup_memory()

try:
    trainer.train()
    print("\n‚úì Training completed successfully!")

    # Save final model
    trainer.save_model(OUTPUT_DIR)
    tokenizer.save_pretrained(OUTPUT_DIR)
    print(f"‚úì Model saved to {OUTPUT_DIR}")

except Exception as e:
    print(f"‚ùå Training error: {e}")
    import traceback
    traceback.print_exc()
    cleanup_memory()
    raise

# ==================== FINAL CLEANUP ====================
cleanup_memory()


In [None]:
# ==================== INFERENCE TEST ====================
print("\n" + "="*50)
print("Testing fine-tuned model...")
print("="*50)

# Set model to evaluation mode
model.eval()

In [None]:
# Test prompts
test_prompts = [
    "### Instruction:\nWhat is machine learning?\n\n### Response:",
    "### Instruction:\nWrite a short poem about technology\n\n### Response:"
]

for i, test_prompt in enumerate(test_prompts, 1):
    print(f"\n--- Test {i} ---")
    print(f"Prompt: {test_prompt.split('Response:')[0]}Response:")

    inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=60,
            temperature=0.8,
            do_sample=True,
            top_p=0.92,
            top_k=50,
            repetition_penalty=1.15,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id
        )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.split("### Response:")[-1].strip()
    print(f"Response: {response}\n")

# ==================== SAVE MERGED MODEL ====================
print("="*50)
print("Merging LoRA weights for deployment...")

try:
    merged_model = model.merge_and_unload()
    merged_model.save_pretrained(f"{OUTPUT_DIR}/merged_model")
    tokenizer.save_pretrained(f"{OUTPUT_DIR}/merged_model")
    print(f"‚úì Merged model saved to {OUTPUT_DIR}/merged_model")
except Exception as e:
    print(f"Note: Could not merge model (optional): {e}")

# Final cleanup
cleanup_memory()

print("\n" + "="*50)
print("‚úì‚úì‚úì Fine-tuning pipeline completed successfully! ‚úì‚úì‚úì")
print("="*50)
print(f"\nModel files location: {OUTPUT_DIR}")
print("\nYou can now use this model for inference!")


In [None]:
# ==================== OPTIONAL: Test merged model ====================
print("\n" + "="*50)
print("Testing merged model (if available)...")
try:
    test_model = AutoModelForCausalLM.from_pretrained(
        f"{OUTPUT_DIR}/merged_model",
        device_map="auto",
        torch_dtype=torch.float16,
        trust_remote_code=True
    )
    test_tokenizer = AutoTokenizer.from_pretrained(f"{OUTPUT_DIR}/merged_model")

    test_input = "### Instruction:\nExplain AI in simple words\n\n### Response:"
    inputs = test_tokenizer(test_input, return_tensors="pt").to(test_model.device)

    with torch.no_grad():
        outputs = test_model.generate(
            **inputs,
            max_new_tokens=50,
            temperature=0.7,
            do_sample=True
        )

    result = test_tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"\nMerged model output:\n{result.split('Response:')[-1].strip()}")

except Exception as e:
    print(f"Merged model test skipped: {e}")

print("\nüéâ All done!")