In [8]:
# Install required packages
!pip install torch
!pip install transformers
!pip install datasets
!pip install peft
!pip install accelerate
!pip install bitsandbytes
!pip install trl
!pip install wandb  # Optional: for experiment tracking



In [9]:
import json
import torch
import pandas as pd
from datasets import Dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, TaskType
from trl import SFTTrainer
import os

In [10]:
# Set up device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [11]:
MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
OUTPUT_DIR = "./llama-3.2-3b-eade-finetuned"
JSON_FILE_PATH = "/content/drive/MyDrive/Colab Notebooks/data.json"  # Your uploaded file

In [12]:
def load_and_prepare_data(json_file_path):
    """Load JSON data and format it for training"""

    # Load JSON data
    with open(json_file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    # Convert to training format
    formatted_data = []

    for item in data:
        # Format as instruction-response pairs
        formatted_text = f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>

{item['prompt']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{item['response']}<|eot_id|>"""

        formatted_data.append({
            "text": formatted_text,
            "prompt": item['prompt'],
            "response": item['response']
        })

    return formatted_data

# Load data
print("Loading and preparing dataset...")
training_data = load_and_prepare_data(JSON_FILE_PATH)
print(f"Loaded {len(training_data)} training examples")

# Create Dataset objects
train_dataset = Dataset.from_list(training_data)

# Split into train/validation (80/20 split)
dataset_dict = train_dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = dataset_dict["train"]
eval_dataset = dataset_dict["test"]

print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(eval_dataset)}")

# Display sample data
print("\nSample training data:")
print(train_dataset[0]["text"][:500] + "...")

Loading and preparing dataset...
Loaded 20 training examples
Training samples: 16
Validation samples: 4

Sample training data:
<|begin_of_text|><|start_header_id|>user<|end_header_id|>

What financial aid options are available at EADE Business School?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

EADE Business School offers scholarships and financial aid based on criteria such as academic excellence, leadership commitment, and passion for business. Candidates should schedule an appointment with the Admissions Department to apply.<|eot_id|>...


In [13]:
# Configure quantization for memory efficiency
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [14]:
# Load tokenizer
print("\nLoading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)


Loading tokenizer...


tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

In [15]:
# Add padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

In [16]:
# Load model with quantization
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.bfloat16
)

Loading model...


config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

In [17]:
# Configure LoRA
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,  # rank
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ]
)

In [18]:
# Apply LoRA to model
model = get_peft_model(model, lora_config)

In [19]:
# Print trainable parameters
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(f"Trainable params: {trainable_params:,} || All params: {all_param:,} || Trainable%: {100 * trainable_params / all_param:.2f}")

print_trainable_parameters(model)

Trainable params: 24,313,856 || All params: 1,827,777,536 || Trainable%: 1.33


In [22]:
# Training arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=3,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    save_steps=50,
    logging_steps=10,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="wandb",  # Change to "tensorboard" if you don't want to use Weights & Biases
    eval_strategy="steps",  # Changed from evaluation_strategy
    eval_steps=50,
    save_strategy="steps",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    save_total_limit=3,
)

In [24]:
try:
    # For newer TRL versions
    trainer = SFTTrainer(
        model=model,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        peft_config=lora_config,
        text_field="text",
        tokenizer=tokenizer,
        args=training_args,
        max_seq_length=512,
        packing=False,
    )
except TypeError:
    # For older TRL versions
    try:
        trainer = SFTTrainer(
            model=model,
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
            peft_config=lora_config,
            dataset_text_field="text",
            tokenizer=tokenizer,
            args=training_args,
            max_seq_length=512,
            packing=False,
        )
    except TypeError:
        # Fallback: Use regular Trainer with custom data collator
        from transformers import DataCollatorForLanguageModeling

        def tokenize_function(examples):
            return tokenizer(
                examples["text"],
                truncation=True,
                padding="max_length",
                max_length=512,
                return_tensors="pt"
            )

        tokenized_train = train_dataset.map(tokenize_function, batched=True)
        tokenized_eval = eval_dataset.map(tokenize_function, batched=True)

        data_collator = DataCollatorForLanguageModeling(
            tokenizer=tokenizer,
            mlm=False,
        )

        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=tokenized_train,
            eval_dataset=tokenized_eval,
            data_collator=data_collator,
        )

Map:   0%|          | 0/16 [00:00<?, ? examples/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

In [25]:
# Start training
print("\nStarting training...")
trainer.train()


Starting training...


  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mrakibul15-3430[0m ([33mrakibul15-3430-join-venture-ai[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss


TrainOutput(global_step=12, training_loss=2.2234045267105103, metrics={'train_runtime': 171.6609, 'train_samples_per_second': 0.28, 'train_steps_per_second': 0.07, 'total_flos': 419226439385088.0, 'train_loss': 2.2234045267105103, 'epoch': 3.0})

In [26]:
# Save the fine-tuned model
print("Saving model...")
trainer.model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

Saving model...


('./llama-3.2-3b-eade-finetuned/tokenizer_config.json',
 './llama-3.2-3b-eade-finetuned/special_tokens_map.json',
 './llama-3.2-3b-eade-finetuned/chat_template.jinja',
 './llama-3.2-3b-eade-finetuned/tokenizer.json')

In [27]:
# Save training results
training_results = trainer.state.log_history
with open(f"{OUTPUT_DIR}/training_results.json", "w") as f:
    json.dump(training_results, f, indent=2)

print(f"Training complete! Model saved to {OUTPUT_DIR}")

Training complete! Model saved to ./llama-3.2-3b-eade-finetuned


In [28]:
# Function to test the fine-tuned model
def test_model(prompt, max_length=200):
    """Test the fine-tuned model with a prompt"""

    # Format the prompt
    formatted_prompt = f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>

{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

"""

    # Tokenize
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)

    # Generate response
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=inputs.input_ids.shape[1] + max_length,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )

    # Decode response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract just the assistant's response
    assistant_response = response.split("assistant<|end_header_id|>")[-1].strip()

    return assistant_response

In [29]:

# Test the model with some EADE-related questions
test_prompts = [
    "What is EADE Business School?",
    "Where is EADE Business School located?",
    "What programs does EADE offer?",
    "What is the mission of EADE Business School?",
    "How can I contact EADE Business School?"
]

print("\n" + "="*50)
print("TESTING THE FINE-TUNED MODEL")
print("="*50)

for prompt in test_prompts:
    print(f"\nQ: {prompt}")
    print(f"A: {test_model(prompt)}")
    print("-" * 50)


TESTING THE FINE-TUNED MODEL

Q: What is EADE Business School?
A: user

What is EADE Business School?assistant

EADE Business School is a private business school that offers specialized training programs in business management, leadership development, and entrepreneurship. Its programs focus on strategic competencies, business acumen, and innovation. EADE Business School is accredited by EADE Business School's own accreditation, which ensures quality standards and continuous improvement.
--------------------------------------------------

Q: Where is EADE Business School located?
A: user

Where is EADE Business School located?assistant

EADE Business School is located at 8va calle 2-26 Zona 10, Ciudad de Guatemala, Guatemala.
--------------------------------------------------

Q: What programs does EADE offer?
A: user

What programs does EADE offer?assistant

EADE Business School offers various programs, including the EADE Business School Business MBA, EADE Executive MBA, EADE Doctora

In [30]:
print("\n" + "="*50)
print("TRAINING COMPLETE!")
print("="*50)
print(f"Fine-tuned model saved to: {OUTPUT_DIR}")
print("You can now use this model for inference on EADE Business School related questions.")


TRAINING COMPLETE!
Fine-tuned model saved to: ./llama-3.2-3b-eade-finetuned
You can now use this model for inference on EADE Business School related questions.


In [31]:
# Performance metrics summary
print("\n" + "="*50)
print("TRAINING SUMMARY")
print("="*50)
print(f"Total training samples: {len(train_dataset)}")
print(f"Total validation samples: {len(eval_dataset)}")
print(f"Training epochs: {training_args.num_train_epochs}")
print(f"Batch size: {training_args.per_device_train_batch_size}")
print(f"Learning rate: {training_args.learning_rate}")
print(f"Model output directory: {OUTPUT_DIR}")


TRAINING SUMMARY
Total training samples: 16
Total validation samples: 4
Training epochs: 3
Batch size: 1
Learning rate: 0.0002
Model output directory: ./llama-3.2-3b-eade-finetuned


In [32]:
# Memory usage
if torch.cuda.is_available():
    print(f"GPU memory allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
    print(f"GPU memory reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")

GPU memory allocated: 2.29 GB
GPU memory reserved: 6.89 GB
