In [1]:
#!pip install transformers peft datasets accelerate -q


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/480.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/116.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/179.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.3/179.3 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/134.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [22]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the base model
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Generate a joke with the base model
prompt = "Tell me a joke about programming:"
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(inputs.input_ids, max_length=50, num_return_sequences=1)

# Decode and print the output
base_joke = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Base Model Joke:\n", base_joke)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Base Model Joke:
 Tell me a joke about programming: I’m a programmer. I’m a programmer. I’m a programmer. I’m a programmer. I’m a programmer. I’m a programmer. I


In [24]:
from datasets import load_dataset
from transformers import Trainer, TrainingArguments

# Load the dataset
#dataset = load_dataset("json", data_files="jokes_dataset.json")
dataset = load_dataset("json", data_files="programming_jokes.json")

# Ensure the tokenizer has a padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Tokenize the dataset
def tokenize_function(examples):
    inputs = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=50)
    inputs["labels"] = inputs["input_ids"].copy()  # Labels are the same as input_ids for causal language modeling
    return inputs

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Fine-tune the model
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=30,
    per_device_train_batch_size=2,
    save_steps=10,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=10
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    tokenizer=tokenizer  # Pass the tokenizer to handle padding during training
)

trainer.train()

# Save the fine-tuned model
model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")

# Load fine-tuned models
from transformers import AutoModelForCausalLM

# Fine-tuned base model
fine_tuned_model = AutoModelForCausalLM.from_pretrained("./fine_tuned_model")
inputs = tokenizer(prompt, return_tensors="pt")
outputs = fine_tuned_model.generate(inputs.input_ids, max_length=50, num_return_sequences=1)
print("Fine-Tuned Model Joke:\n", tokenizer.decode(outputs[0], skip_special_tokens=True))


In [18]:
#Zip the model and download
!zip -r /content/fine_tuned_model.zip /content/fine_tuned_model

  adding: content/fine_tuned_model/ (stored 0%)
  adding: content/fine_tuned_model/generation_config.json (deflated 24%)
  adding: content/fine_tuned_model/vocab.json (deflated 59%)
  adding: content/fine_tuned_model/model.safetensors (deflated 7%)
  adding: content/fine_tuned_model/README.md (deflated 66%)
  adding: content/fine_tuned_model/tokenizer_config.json (deflated 54%)
  adding: content/fine_tuned_model/merges.txt (deflated 53%)
  adding: content/fine_tuned_model/special_tokens_map.json (deflated 60%)
  adding: content/fine_tuned_model/adapter_model.safetensors (deflated 7%)
  adding: content/fine_tuned_model/tokenizer.json (deflated 82%)
  adding: content/fine_tuned_model/config.json (deflated 51%)
  adding: content/fine_tuned_model/adapter_config.json (deflated 54%)


In [None]:
from google.colab import files
files.download("/content/fine_tuned_model.zip")

In [None]:
### PEFT - USING LoRA ########

In [25]:
from datasets import load_dataset
from transformers import Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training


#Load the dataset
#dataset = load_dataset("json", data_files="jokes_dataset.json")
dataset = load_dataset("json", data_files="programming_jokes.json")


#Load Model and Tokenizer
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Ensure the tokenizer has a padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Tokenize the dataset
def tokenize_function(examples):
    inputs = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=50)
    inputs["labels"] = inputs["input_ids"].copy()  # Labels are the same as input_ids for causal language modeling
    return inputs

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Prepare Model for LoRA
# LoRA Configuration
lora_config = LoraConfig(
    task_type="CAUSAL_LM",  # GPT2 is a causal language model
    inference_mode=False,
    r=8,  # Low-rank adaptation rank
    lora_alpha=16,  # Scaling factor for LoRA
    lora_dropout=0.1  # Dropout to avoid overfitting
)

# Prepare the model for LoRA
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)

# Define Training Arguments
training_args = TrainingArguments(
    output_dir="./results",
    overwrite_output_dir=True,
    num_train_epochs=30,
    per_device_train_batch_size=4,
    save_steps=100,
    save_total_limit=1,
    evaluation_strategy="no",
    learning_rate=5e-5,
    logging_dir="./logs",
    logging_steps=10,
    report_to="none",
    fp16=True  # Use mixed precision if using a GPU
)

#Fine-Tune the Model with LoRA
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    tokenizer=tokenizer
)

# Train the model
trainer.train()

# Save the LoRA-tuned model
model.save_pretrained("./lora_tuned_model")
tokenizer.save_pretrained("./lora_tuned_model")

# Generate a Joke with the Fine-Tuned Model
lora_model = AutoModelForCausalLM.from_pretrained("./lora_tuned_model")
outputs = lora_model.generate(inputs.input_ids, max_length=50, num_return_sequences=1)
print("LoRA Fine-Tuned Model Joke:\n", tokenizer.decode(outputs[0], skip_special_tokens=True))

# Generate a Joke with the Fine-Tuned Model
# def generate_joke(prompt):
#     inputs = tokenizer(prompt, return_tensors="pt", padding=True)
#     outputs = model.generate(
#         inputs.input_ids,
#         max_length=50,
#         temperature=0.7,  # Controls randomness
#         top_p=0.9,        # Controls nucleus sampling
#         num_return_sequences=1
#     )
#     return tokenizer.decode(outputs[0], skip_special_tokens=True)

# # Test the fine-tuned model
# prompt = "Tell me a joke about programming:"
# joke = generate_joke(prompt)
# print("Generated Joke:", joke)

  trainer = Trainer(


Step,Training Loss
10,8.5946
20,8.7045
30,8.6092
40,8.2473
50,7.8969
60,7.7156
70,7.4999
80,6.7
90,6.5111
100,5.9652


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


LoRA Fine-Tuned Model Joke:
 Tell me a joke about programming: You have a good cache.


In [21]:
!zip -r /content/lora_tuned_model.zip /content/lora_tuned_model #zip and save the model to local drive
files.download("/content/lora_tuned_model.zip")

  adding: content/lora_tuned_model/ (stored 0%)
  adding: content/lora_tuned_model/vocab.json (deflated 59%)
  adding: content/lora_tuned_model/README.md (deflated 66%)
  adding: content/lora_tuned_model/tokenizer_config.json (deflated 54%)
  adding: content/lora_tuned_model/merges.txt (deflated 53%)
  adding: content/lora_tuned_model/special_tokens_map.json (deflated 60%)
  adding: content/lora_tuned_model/adapter_model.safetensors (deflated 7%)
  adding: content/lora_tuned_model/tokenizer.json (deflated 82%)
  adding: content/lora_tuned_model/adapter_config.json (deflated 54%)
