# Download Required Libraries

In [None]:
%pip install -q transformers datasets peft bitsandbytes accelerate

# Import the Libraries

In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    pipeline,
    BitsAndBytesConfig,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, PeftModel
from datasets import load_dataset
import numpy as np

# Load Pretrained model and configure 4 bit Quantisation

In [None]:
model_name = "EleutherAI/gpt-neo-1.3B"

# Configure 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

# Load pre-trained model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

# Apply LoRA

In [None]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 1,572,864 || all params: 1,317,148,672 || trainable%: 0.1194




# Load Wikitext Dataset and Tokenize

In [None]:
dataset = load_dataset("wikitext", "wikitext-103-raw-v1", split={'train': 'train[:5000]', 'validation': 'validation[:1000]'})

# Tokenization function
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        max_length=256,
        padding="max_length",
        return_tensors="pt"
    )

# Apply tokenization while preserving splits
tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=["text"]
)

# Now we can access the splits correctly
train_dataset = tokenized_dataset["train"]
eval_dataset = tokenized_dataset["validation"]

def add_labels(examples):
    examples["labels"] = examples["input_ids"].copy()
    return examples

train_dataset = train_dataset.map(add_labels, batched=True)
eval_dataset = eval_dataset.map(add_labels, batched=True)

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

# Provide the Training Arguments

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=2,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=2,
    eval_steps=100,
    save_steps=100,
    logging_steps=10,
    learning_rate=2e-4,
    weight_decay=0.01,
    fp16=True,
    optim="paged_adamw_8bit",
    report_to="none",
    remove_unused_columns=False,
    logging_dir="./logs",
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer
)

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


# Train The Model

In [None]:
trainer.train()
model.save_pretrained("fine-tuned-lora-model")

Step,Training Loss
10,3.4614
20,3.5583
30,3.4555
40,3.6621
50,3.5902
60,3.3565
70,3.2618
80,3.4415
90,3.7693
100,3.1745


# Quantitative Analysis of Base Model and Fine Tuned Model by comparing Perplexities

In [None]:
def calculate_perplexity(model, dataset, max_samples=50):

    eval_dataset = dataset.select(range(min(len(dataset), max_samples)))

    # For PEFT models, we need to merge and unload first
    if hasattr(model, "peft_config"):
        model = model.merge_and_unload()
        torch.cuda.empty_cache()
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False,
        pad_to_multiple_of=8
    )

    eval_args = TrainingArguments(
        output_dir="./eval_temp",
        per_device_eval_batch_size=2,
        fp16=True,
        report_to="none"
    )

    trainer = Trainer(
        model=model,
        args=eval_args,
        data_collator=data_collator,
    )

    eval_results = trainer.evaluate(eval_dataset)
    loss = eval_results["eval_loss"]

    if torch.isnan(torch.tensor(loss)).any():
        raise ValueError("NaN loss detected")

    perplexity = torch.exp(torch.tensor(loss)).item()
    return perplexity

base_model_eval = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Load fine-tuned model with adapters
peft_model = PeftModel.from_pretrained(
    AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16),
    "fine-tuned-lora-model"
)

# Calculate perplexities
base_ppl = calculate_perplexity(base_model_eval, tokenized_dataset["validation"])
peft_ppl = calculate_perplexity(peft_model, tokenized_dataset["validation"])

print(f"\nFinal Results:")
print(f"Base Model Perplexity: {base_ppl:.2f}")
print(f"Fine-Tuned Model Perplexity: {peft_ppl:.2f}")



Calculating perplexities...


Merging PEFT adapters...



Final Results:
Base Model Perplexity: 45.79
Fine-Tuned Model Perplexity: 20.21


# Qualitative Analysis by generating 10 samples and assigning a score to base and fine tuned models

In [None]:
prompts = [
    "Analyze the trade-offs between renewable and non-renewable energy sources in 100 words",
    "Given the rapid advancements in AI, discuss three ethical concerns that policymakers should prioritize in 100 words",
    "Explain the difference between gradient descent and stochastic gradient descent, including when to use each in 100 words",
    "Summarize the key events of the French Revolution and their impact on modern democracies in 150 words in 100 words",
    "Compare the transformer architecture to previous RNN-based models in natural language processing in 100 words",
    "Explain the theory of relativity in simple terms for someone with no scientific background in 100 words.",
    "Describe the significance of the fall of the Berlin Wall in 1989 and its impact on global geopolitics in 100 words.",
    "Compare the economic systems of capitalism and socialism, including their strengths and weaknesses in 100 words.",
    "Summarize the main contributions of Marie Curie to science and her impact on modern medicine in 100 words.",
    "Explain the process of photosynthesis and its importance to life on Earth in 100 words."
]

# Base model generation
base_pipeline = pipeline(
    "text-generation",
    model=base_model_eval,
    tokenizer=tokenizer,
    max_length=100,
    temperature=0.7
)

# Fine-tuned model generation
finetuned_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100,
    temperature=0.7
)

# Generate samples
base_outputs = [base_pipeline(prompt)[0]['generated_text'] for prompt in prompts]
finetuned_outputs = [finetuned_pipeline(prompt)[0]['generated_text'] for prompt in prompts]

# Display outputs for manual evaluation
for i, (base, finetuned) in enumerate(zip(base_outputs, finetuned_outputs)):
    print(f"Sample {i+1}:")
    print(f"Base: {base}")
    print(f"Fine-Tuned: {finetuned}\n")

Device set to use cuda:0
Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'GotOcr2ForConditionalGeneration', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GP

Sample 1:
Base: Analyze the trade-offs between renewable and non-renewable energy sources in 100 words or less

Renewable energy is one of the most attractive energy sources for the future. The renewable energy industry has grown by leaps and bounds since the 1990s. The market has grown from $120 billion in 2007 to $400 billion in 2013. The growth of the renewable energy market is attributed to the wide variety of renewable energy sources used in the energy sector and the high level of technological innovation
Fine-Tuned: Analyze the trade-offs between renewable and non-renewable energy sources in 100 words or less

We’re on a roll now, but the energy sector is not. I’m not even going to try to get into why. But the industry is trying to change its thinking. And the energy sector is trying to change its thinking. And we’re going to see more of the same in the coming weeks.

As of mid-February 2018,

Sample 2:
Base: Given the rapid advancements in AI, discuss three ethical concerns that

# Human Analysis of the Outputs

Sample 1:

Base Model: 2/5 - Off-topic and lacks analysis.

Fine-Tuned Model: 2/5 - Still off-topic, but more coherent than the base.

Sample 2:

Base Model: 2/5 - Incomplete and unfocused on key ethical concerns.

Fine-Tuned Model: 2/5 - Repetitive, but attempts to highlight AI's challenges.

Sample 3:

Base Model: 1/5 - Off-topic and unclear.

Fine-Tuned Model: 3/5 - Introduces relevant concepts, though still incomplete.

Sample 4:

Base Model: 2/5 - Vague and lacks detailed impact on democracies.

Fine-Tuned Model: 3/5 - Offers more direct events, but still lacks full explanation of impact.

Sample 5:

Base Model: 2/5 - Not a clear comparison, lacks depth.

Fine-Tuned Model: 2/5 - Repetitive, but some mention of the transformer’s models.

Sample 6:

Base Model: 1/5 - Unclear, not simplified.

Fine-Tuned Model: 2/5 - Too technical, but attempts to clarify with examples.

Sample 7:

Base Model: 3/5 - Adequate but lacks deeper context.

Fine-Tuned Model: 3/5 - Clearer but still lacks geopolitical context.

Sample 8:

Base Model: 1/5 - Repetitive and nonsensical.

Fine-Tuned Model: 2/5 - Vague, but gives a basic attempt at comparison.

Sample 9:

Base Model: 1/5 - Incomplete and irrelevant.

Fine-Tuned Model: 3/5 - Mentions contributions but lacks detail.

Sample 10:

Base Model: 2/5 - Vague, lacks clear process explanation.

Fine-Tuned Model: 2/5 - Disjointed but covers cycles in the process.