# Imports

In [1]:
# !pip install transformers
# !pip install torch
# !pip install scipy
# !pip install scikit-learn
# !pip install datasets
# !pip install accelerate -U

In [2]:
from collections import OrderedDict

from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2ForSequenceClassification, TrainingArguments, Trainer
import torch
from scipy.special import softmax
from datasets import load_dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
CUDA_LAUNCH_BLOCKING=1

Using cuda device


# Load dataset

In [4]:
dataset = load_dataset("imdb").shuffle(seed=42) # Load 100 test samples and shuffle them

train_dataset = dataset["train"]
test_dataset = dataset["test"]

# 1. Showing GPT-2 doesn't perform well on sentiment analysis

## Load model

In [5]:
# Load pre-trained GPT-2 model and tokenizer
model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name).to(device)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

def generate_prompt(text):
    # return f"Analyze the sentiment of the following text: '{text}' Is the sentiment positive or negative?"
    return f"Analyze the sentiment of the following text: '{text}'. The sentiment is "

def predict_sentiment(text):
    
    prompt = generate_prompt(text)

    # Tokenize the prompt
    inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model(inputs)

    logits = outputs.logits  # The raw scores (unnormalized probabilities) for all tokens

    # Get the logits for the last token in the input
    last_token_logits = logits[0, -1, :]  # Shape: (vocab_size,)

    # Token IDs for "positive" and "negative"
    negative_id = tokenizer.encode("negative", add_special_tokens=False)[0]
    positive_id = tokenizer.encode("positive", add_special_tokens=False)[0]

    sentiment_logits = torch.tensor([last_token_logits[negative_id], last_token_logits[positive_id]]).numpy()

    # Apply softmax to get probabilities
    sentiment_probabilities = softmax(sentiment_logits)

    negative_prob = sentiment_probabilities[0]
    positive_prob = sentiment_probabilities[1]
    
    predicted_label = 1 if positive_prob > negative_prob else 0
    return predicted_label, negative_prob, positive_prob



## Experiment

In [6]:
toy_dataset = ["I love the way the sun sets over the mountains","The movie was terrible!"]
    
for input_text in toy_dataset:
    print(f"predict text: {input_text}")
    predicted_label, negative_prob, positive_prob=predict_sentiment(input_text)
    print(f"Probability of 'negative': {negative_prob:.4f}")
    print(f"Probability of 'positive': {positive_prob:.4f}")

predict text: I love the way the sun sets over the mountains
Probability of 'negative': 0.3600
Probability of 'positive': 0.6400
predict text: The movie was terrible!
Probability of 'negative': 0.6476
Probability of 'positive': 0.3524


  attn_output = torch.nn.functional.scaled_dot_product_attention(


## Evalaute

In [7]:
true_labels = []
predicted_labels = []

# Process each sample in the dataset
for sample in test_dataset.select(range(100)):
    text = sample["text"]
    true_label = sample["label"]  # 1 for positive, 0 for negative

    predicted_label, negative_prob, positive_prob=predict_sentiment(text)

    true_labels.append(true_label)
    predicted_labels.append(predicted_label)

# Compute evaluation metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predicted_labels, average="binary")

print(f"Dataset size: {len(true_labels)}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Dataset size: 100
Accuracy: 0.6100
Precision: 0.5476
Recall: 0.9787
F1 Score: 0.7023


# 2. Show that GPT-2 isn't restricted to sentiment analysis (perform well after fine-tuning)

## Load model

In [8]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

model = GPT2ForSequenceClassification.from_pretrained("gpt2", num_labels=2).to(device)  # 2 labels: positive and negative
model.config.num_labels = 2
model.config.pad_token_id = tokenizer.pad_token_id

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Preprocess data

In [9]:
def preprocess_function(examples):
    return tokenizer(
        examples["text"], 
        truncation=True, 
        padding="max_length", 
        max_length=512
    )

train_dataset = train_dataset.map(preprocess_function, batched=True)
test_dataset = test_dataset.map(preprocess_function, batched=True)

## Train

In [18]:
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
)

def compute_accuracy(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    return {
        'accuracy': accuracy_score(labels, predictions)
    }
    
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset.select(range(1000)),
    eval_dataset=test_dataset.select(range(100)),
    tokenizer=tokenizer,
    compute_metrics=compute_accuracy
)

trainer.train()



Epoch,Training Loss,Validation Loss


TrainOutput(global_step=375, training_loss=0.16508551389475665, metrics={'train_runtime': 275.7413, 'train_samples_per_second': 10.88, 'train_steps_per_second': 1.36, 'total_flos': 783890251776000.0, 'train_loss': 0.16508551389475665, 'epoch': 3.0})

## Evaluate

In [19]:
results = trainer.evaluate()
print(results)

model.save_pretrained("results/fine-tune/gpt2-sentiment-analysis")
tokenizer.save_pretrained("results/fine-tune/gpt2-sentiment-analysis")

{'eval_loss': 0.4243985712528229, 'eval_accuracy': 0.92, 'eval_runtime': 2.8741, 'eval_samples_per_second': 34.793, 'eval_steps_per_second': 4.523, 'epoch': 3.0}


('results/fine-tune/gpt2-sentiment-analysis\\tokenizer_config.json',
 'results/fine-tune/gpt2-sentiment-analysis\\special_tokens_map.json',
 'results/fine-tune/gpt2-sentiment-analysis\\vocab.json',
 'results/fine-tune/gpt2-sentiment-analysis\\merges.txt',
 'results/fine-tune/gpt2-sentiment-analysis\\added_tokens.json')

# 3. Evaluate SOPHON manipulated GPT-2

## Load model

In [14]:
SOPHON_CHECKPOINT_PATH = "results/inverse_loss/gpt2_IMDB/10_21_21_13_54/54.2_74.2_0.54.pt"

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

model = GPT2ForSequenceClassification.from_pretrained("gpt2", num_labels=2).to(device)  # 2 labels: positive and negative

# Load the state dict (model weights)
checkpoint = torch.load(SOPHON_CHECKPOINT_PATH)
new_state_dict = OrderedDict()
for k, v in checkpoint['model'].items():
    new_key = k.replace('module.', '')
    new_state_dict[new_key] = v
model.load_state_dict(new_state_dict)

model.config.num_labels = 2
model.config.pad_token_id = tokenizer.pad_token_id

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  checkpoint = torch.load(SOPHON_CHECKPOINT_PATH)


## Preprocess data

In [15]:
def preprocess_function(examples):
    return tokenizer(
        examples["text"], 
        truncation=True, 
        padding="max_length", 
        max_length=512
    )

train_dataset = train_dataset.map(preprocess_function, batched=True)
test_dataset = test_dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

## Train

In [16]:
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
)

def compute_accuracy(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    return {
        'accuracy': accuracy_score(labels, predictions)
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset.select(range(1000)),
    eval_dataset=test_dataset.select(range(100)),
    tokenizer=tokenizer,
    compute_metrics=compute_accuracy
)

trainer.train()



Epoch,Training Loss,Validation Loss


TrainOutput(global_step=375, training_loss=0.39300166368484496, metrics={'train_runtime': 292.4223, 'train_samples_per_second': 10.259, 'train_steps_per_second': 1.282, 'total_flos': 783890251776000.0, 'train_loss': 0.39300166368484496, 'epoch': 3.0})

## Evaluate

In [17]:
results = trainer.evaluate()
print(results)

model.save_pretrained("results/fine-tune/sophon-gpt2-sentiment-analysis")
tokenizer.save_pretrained("results/fine-tune/sophon-gpt2-sentiment-analysis")

{'eval_loss': 0.3707934319972992, 'eval_accuracy': 0.9, 'eval_runtime': 2.8606, 'eval_samples_per_second': 34.957, 'eval_steps_per_second': 4.544, 'epoch': 3.0}


('results/fine-tune/sophon-gpt2-sentiment-analysis\\tokenizer_config.json',
 'results/fine-tune/sophon-gpt2-sentiment-analysis\\special_tokens_map.json',
 'results/fine-tune/sophon-gpt2-sentiment-analysis\\vocab.json',
 'results/fine-tune/sophon-gpt2-sentiment-analysis\\merges.txt',
 'results/fine-tune/sophon-gpt2-sentiment-analysis\\added_tokens.json')