In [1]:
# package imports
import torch
import huggingface
from transformers import (AutoModelForSequenceClassification, AutoTokenizer, GPT2Model, GPT2Tokenizer, AutoModelForCausalLM,GPT2LMHeadModel, GPT2Config)
from datasets import load_dataset

In [2]:
#import validation dataset for evaluation. 
dataset = load_dataset('rotten_tomatoes',split='validation') #just for evaluating

In [3]:
#shape of dataset
dataset

Dataset({
    features: ['text', 'label'],
    num_rows: 1066
})

In [4]:
import os

# get the current working directory
current_working_directory = os.getcwd()

# print output to the console
print(current_working_directory)

C:\Users\felix\Downloads


In [6]:
#instantiate model

#create tokenizer with padding
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
pad_token = "<PAD>"
tokenizer.pad_token = pad_token
#tokenizer.set_padding(tokenizer.pad_token, pad_to_multiple_of=8)
config = GPT2Config.from_pretrained("gpt2", pad_token_id=tokenizer.pad_token_id)
model = AutoModelForSequenceClassification.from_pretrained("gpt2",config=config)
#config = GPT2Config.from_pretrained("gpt2", pad_token_id=tokenizer.pad_token_id)

# Instantiate the model with the updated configuration
#model = GPT2ForSequenceClassification.from_pretrained("gpt2", config=config)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
#tokenize function
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

In [8]:
#create a tokenized dataset for evaluation
tokenized_datasets = dataset.map(tokenize_function, batched=True)

In [9]:
#creating input for model
inputs = tokenizer.encode_plus(
    tokenized_datasets['text'],
    add_special_tokens=True,
    max_length=128,  # Maximum sequence length
    padding="max_length",
    truncation=True,
    return_tensors="pt"  # Return PyTorch tensors
)

In [10]:
# Make prediction
with torch.no_grad():
    outputs = model(**inputs).logits
    probabilities = torch.nn.functional.softmax(outputs, dim=1)
    predicted_class = torch.argmax(probabilities)

# Display sentiment result
if predicted_class == 1:
    print(f"Sentiment: Positive ({probabilities[0][1] * 100:.2f}%)")
else:
    print(f"Sentiment: Negative ({probabilities[0][0] * 100:.2f}%)")

Sentiment: Positive (99.10%)


In [11]:
#dataset labels
labels = dataset["label"]

In [12]:
#load full dataset for testing
full_dataset = load_dataset('rotten_tomatoes')

In [13]:
# Create a DataLoader to efficiently process the data
data_loader = torch.utils.data.DataLoader(list(zip(inputs["input_ids"],inputs["attention_mask"], labels)),
batch_size=16, shuffle=False)

#evaluate model performance
from sklearn.metrics import accuracy_score

tokenized_dataset = full_dataset.map(lambda examples: tokenizer(examples["text"], padding=True, truncation=True))
                                     #, batched=True)

# Prepare the data for evaluation
eval_dataset = tokenized_dataset["test"].remove_columns(["text"]).rename_column("label", "labels")
eval_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

model.eval()
predictions = []
for batch in torch.utils.data.DataLoader(eval_dataset):
    with torch.no_grad():
        inputs = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_labels = torch.argmax(logits, dim=1)
        predictions.extend(predicted_labels.cpu().numpy())

true_labels = eval_dataset["labels"].numpy()
accuracy = accuracy_score(true_labels, predictions)
print("The pretrained model accuracy is", round(accuracy*100,2),"%")

In [14]:
# Create a PEFT Config for LoRA
from peft import LoraConfig, TaskType
config = LoraConfig(
r=8, # Rank
lora_alpha=32,
target_modules=['c_attn', 'c_proj'],
lora_dropout=0.1,
bias="none",
task_type=TaskType.SEQ_CLS
)

In [15]:
from peft import get_peft_model
lora_model = get_peft_model(model, config)



In [16]:
lora_model.print_trainable_parameters()

trainable params: 812,544 || all params: 125,253,888 || trainable%: 0.6487175871139426


In [17]:
#create a training dataset for PEFT model
new_dataset = full_dataset.map(lambda examples: tokenizer(examples["text"], padding=True, truncation=True))

In [18]:
new_dataset = new_dataset.rename_column("label", "labels")
new_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

In [19]:
#unfreeze model
for param in lora_model.parameters():
    param.requires_grad = True

In [20]:
import numpy as np
import torch.nn as nn
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding

In [21]:
#def compute_metrics(eval_pred):
#    predictions, labels = eval_pred
#    predictions = np.argmax(predictions, axis=1)
#    return{"accuracy": (predictions == labels).mean()}

In [22]:
#def compute_metrics(eval_pred):
#    predictions, labels = eval_pred
#    predictions = np.argmax(predictions, axis=1)
#    predictions = torch.from_numpy(predictions)  # Convert predictions to tensor
#    labels = torch.from_numpy(labels).float()  # Convert labels to tensor
#    loss = nn.CrossEntropyLoss()(predictions, labels)  # Calculate the evaluation loss
#    accuracy = (predictions == labels).float().mean()  # Calculate the accuracy
#    return {"eval_loss": loss.item(), "accuracy": accuracy.item()}

In [23]:
training_args = TrainingArguments(
    output_dir='C:/Users/felix/Documents/Udacity',
    evaluation_strategy='epoch',
    save_strategy='epoch',
    learning_rate=2e-3,
    per_device_train_batch_size=12,
    per_device_eval_batch_size=12,
    num_train_epochs=4,
    load_best_model_at_end=True,
    #weight_decay=0.1,
    remove_unused_columns=False,
    #label_names="labels"
)

In [24]:
import random

In [25]:
# reduce size of training data to speed up
n_samples =2000
train_dataset = new_dataset['train']

# Get the number of samples in the dataset
num_samples = len(train_dataset)

# Generate a list of random indices without replacement
random_indices = random.sample(range(num_samples), n_samples)

# Select the samples corresponding to the random indices
random_train_samples = train_dataset.select(indices=random_indices)

In [26]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = torch.from_numpy(predictions)  # Convert predictions to tensor
    labels = torch.from_numpy(labels).long()  # Convert labels to tensor
    loss = nn.CrossEntropyLoss()(predictions, labels)  # Calculate the evaluation loss
    accuracy = (torch.argmax(predictions, axis=1) == labels).float().mean()  # Calculate the accuracy

    # Print the metrics dictionary for debugging
    metrics = {"eval_loss": loss.item(), "accuracy": accuracy.item()}
    print("Metrics:", metrics)

    return metrics

In [27]:
trainer = Trainer(
    model=lora_model,
    args = training_args,
    train_dataset = random_train_samples,
    eval_dataset = new_dataset['test'],
    tokenizer=tokenizer,
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics = compute_metrics
)
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.71875,0.5197
2,No log,0.693211,0.5
3,0.748500,0.703284,0.5
4,0.748500,0.693253,0.5


Metrics: {'eval_loss': 0.7187504172325134, 'accuracy': 0.5196998119354248}
Metrics: {'eval_loss': 0.6932112574577332, 'accuracy': 0.5}
Metrics: {'eval_loss': 0.7032836079597473, 'accuracy': 0.5}
Metrics: {'eval_loss': 0.6932529807090759, 'accuracy': 0.5}


TrainOutput(global_step=668, training_loss=0.7355468772842498, metrics={'train_runtime': 4124.0911, 'train_samples_per_second': 1.94, 'train_steps_per_second': 0.162, 'total_flos': 181322573967360.0, 'train_loss': 0.7355468772842498, 'epoch': 4.0})

In [28]:
trainer.evaluate()

Metrics: {'eval_loss': 0.6931652426719666, 'accuracy': 0.5}


{'eval_loss': 0.693165123462677,
 'eval_accuracy': 0.5,
 'eval_runtime': 101.4372,
 'eval_samples_per_second': 10.509,
 'eval_steps_per_second': 0.877,
 'epoch': 4.0}

In [29]:
lora_model

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): GPT2ForSequenceClassification(
      (transformer): GPT2Model(
        (wte): Embedding(50257, 768)
        (wpe): Embedding(1024, 768)
        (drop): Dropout(p=0.1, inplace=False)
        (h): ModuleList(
          (0-11): 12 x GPT2Block(
            (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (attn): GPT2Attention(
              (c_attn): lora.Linear(
                (base_layer): Conv1D()
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=768, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=2304, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict(

In [30]:
#save model
lora_model.save_pretrained('C:/Users/felix/Documents/Udacity')

In [31]:
from peft import AutoPeftModelForSequenceClassification

In [None]:
from datetime import date
filename = f"{model_name}--LORA--{date.today()}"

In [35]:
# Specify the path to the saved model directory
model_dir = 'C:/Users/felix/Documents/Udacity'

# Load the saved PEFT model
model = AutoPeftModelForSequenceClassification.from_pretrained(model_dir)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HFValidationError: Repo id must be in the form 'repo_name' or 'namespace/repo_name': 'C:/Users/felix/Documents/Udacity'. Use `repo_type` argument if needed.