## 1. Setup

In [22]:
import os
os.environ["WANDB_DISABLED"] = "true"

import torch
print(f"GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")


GPU available: True
GPU: Tesla T4


## 2. Load Dataset

In [23]:
from google.colab import drive
drive.mount('/content/drive')

!pip install datasets -q

from datasets import load_from_disk

# path to saved HF dataset
DATASET_PATH = "/content/drive/MyDrive/Writing-Style-Transfer-Project/melville300-chaptgpt-pairings"

# Load dataset from Google Drive
dataset = load_from_disk(DATASET_PATH)
print(f"Dataset loaded: {dataset}")

import textwrap

# Check the dataset structure
print(f"Columns: {dataset.column_names}")
print(f"Dataset size: {len(dataset)}")

# print sample
print("Sample: \n")
print("Author: ")
print('\n',dataset[0]['author'],'\n')
print("Input: \n")
print(textwrap.fill(dataset[0]['input'],width=70))
print("\nTarget: \n")
print(dataset[0]['target'])



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Dataset loaded: Dataset({
    features: ['input', 'target', 'author'],
    num_rows: 320
})
Columns: ['input', 'target', 'author']
Dataset size: 320
Sample: 

Author: 

 Herman Melville 

Input: 

My name is Ishmael. Some years ago, when I had little money and no
strong interest in staying on land, I decided to go on a sea voyage.
This is something I do to improve my mood and health. When I feel
depressed, especially to the point where I have troubling thoughts, I
see it as necessary to go to sea. This takes the place of more extreme
actions. Like the philosopher Cato killing himself, I instead choose
to board a ship. Many other people feel the same way about the ocean,
even if they do not realize it.

Target: 

Call me Ishmael. Some years ago—never mind how long precisely—having
little or no money in my purse, and nothing particular to interest me
on shore, 

## 3. Prepare Model

In [24]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

model_name = 'gpt2-medium'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = GPT2LMHeadModel.from_pretrained(model_name)

print("Model loaded successfully!")


Model loaded successfully!


## 4. Format Data

In [25]:
def format_text(example):
    author = example['author']
    input_text = example['input']
    target_text = example['target']

    # More explicit format for better learning
    formatted = f"Rewrite the following text in the style of {author}:\n\nOriginal: {input_text}\n\nRewritten: {target_text}{tokenizer.eos_token}"
    return {'text': formatted}

dataset = dataset.map(format_text)
print("Data formatted!")




Data formatted!


## 5. Tokenize dataset

In [26]:
def tokenize_function(examples):
    return tokenizer(
        examples['text'],
        truncation=True,
        padding=True,
        max_length=512  # Longer sequences for medium model
    )

tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=dataset.column_names
)
tokenized_dataset = tokenized_dataset.add_column("labels", tokenized_dataset["input_ids"])

print("Data tokenized")

Data tokenized


## 6. Setup training

In [27]:
from transformers import Trainer, TrainingArguments

# Optimized training for better style transfer
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=5,              # More epochs for better learning
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-5,              # Lower learning rate for more careful learning
    logging_steps=25,
    save_steps=250,
    warmup_steps=200,                # More warmup for stability
    weight_decay=0.01,
    fp16=True,
    dataloader_pin_memory=True,
    remove_unused_columns=False,
    report_to="none",
    eval_strategy="steps",
    eval_steps=250,
    save_total_limit=2,              # Keep only best checkpoints
    load_best_model_at_end=True,     # Load best model at end
    metric_for_best_model="eval_loss",
)

# Split dataset
train_test_split = tokenized_dataset.train_test_split(test_size=0.2)

# Create trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_test_split['train'],
    eval_dataset=train_test_split['test'],
    tokenizer=tokenizer,
)

print("Training setup complete")


  trainer = Trainer(


Training setup complete


## 7. Train Model

In [28]:
print("Starting training...")
print("This should take 3 minute on T4")

trainer.train()
print("Training finished!")

Starting training...
This should take 3 minute on T4


Step,Training Loss,Validation Loss


Training finished!


## 8. Save Model

In [29]:
# Save the trained model
save_path = '/content/drive/MyDrive/gpt2_medium_style_model'
trainer.save_model(save_path)
tokenizer.save_pretrained(save_path)
print(f"Model saved to: {save_path}")

Model saved to: /content/drive/MyDrive/gpt2_medium_style_model


In [30]:
def generate_style(input_text, author_name, max_length=80):
    prompt = f"Rewrite the following text in the style of {author_name}:\n\nOriginal: {input_text}\n\nRewritten:"

    # tokenizer with return_attention_mask to fix warning
    inputs = tokenizer(
        prompt,
        return_tensors='pt',
        padding=False,
        truncation=True,
        return_attention_mask=True
    )

    if torch.cuda.is_available():
        inputs = {k: v.to('cuda') for k, v in inputs.items()}
        model.to('cuda')

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_length=inputs['input_ids'].shape[1] + max_length,
            temperature=0.7,  # Lower temperature for more focused output
            do_sample=True,
            top_p=0.85,       # Slightly more focused
            top_k=40,         # More focused
            repetition_penalty=1.2,  # Higher penalty for repetition
            pad_token_id=tokenizer.eos_token_id,
            no_repeat_ngram_size=3  # Prevent 3-gram repetition
        )

    result = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)


    # Clean up result - take everything before newlines
    result = result.split('\n')[0].strip()

    if result == '':
      result = 'No style change'

    return result


In [31]:
test_cases = [
    # excerpts from moby dick
    ("Manhattan is surrounded by docks, similar to how coral reefs surround islands. Business activity covers it like waves. The streets lead toward the water. At the southern tip is the Battery, where the pier is hit by ocean waves and cooled by sea breezes that were far from land not long ago. Many people are gathered there, watching the water.", "Herman Melville"),
    ("More people are walking straight toward the water, as if preparing to jump in. They do not stop near the buildings for shade. They want to get as close to the water as they can without entering it. Many people stand there, stretching for miles. They come from all directions and different parts of the city. Yet they all gather at the water. Is there something drawing them there, like a magnet attracts a compass needle?","Herman Melville"),
    ("If you are in an area with lakes, most paths eventually lead downhill to a pool of water. Even someone lost in thought will unknowingly walk toward water if it is nearby. If you are ever thirsty in a desert and traveling with a philosopher, try this as a method to find water. Thought and water are closely connected.","Herman Melville"),
    ("An artist wants to paint a peaceful and beautiful scene in the Saco valley. He includes trees, fields, animals, a cottage with smoke, and distant mountains. But the image only feels complete if the shepherd in the scene is looking at a stream. In the prairies during June, despite many flowers, something feels missing—there is no water. If Niagara Falls were only sand, people would not visit. A poor poet once had to decide whether to buy a needed coat or spend his money on a trip to the beach. Many healthy boys want to go to sea at some point. On your first sea trip, you may have felt something special when told the ship was out of sight of land. The Persians respected the sea, and the Greeks gave it a god. These reactions suggest something important. The story of Narcissus, who drowned while trying to reach his reflection, symbolizes how we are drawn to an unreachable idea of life, which we also see in bodies of water.","Herman Melville"),
    ("When I say I go to sea when I feel physically or mentally unwell, I do not mean I travel as a passenger. Passengers need money, and they often get sick and uncomfortable. I also do not go as a high-ranking officer or a cook. I avoid positions of responsibility and prefer not to work hard or take on duties. While I respect good cooking, I do not enjoy doing it myself. Ancient Egyptians preserved animals they cooked, such as ibises and hippopotamuses, which were later found as mummies.","Herman Melville"),

    ("The sky is blue","Herman Melville"),
    ("I'm hungry","Herman Melville"),

    # excerpt from frankenstein neutralized
     ("Margaret, I believe I should achieve an important goal. I could have lived a life of comfort and wealth, but I chose to pursue recognition instead. I wish someone would confirm that I have made the right choice. My determination is strong, but my expectations change and I sometimes feel discouraged. I am preparing for a long and difficult journey that will require me to remain strong. I must encourage others and also maintain my own resolve when they are struggling.","Herman Melville"),
    # excerpt from Siddhartha neutralized
     ("After an hour without sleeping, the Brahman got up, walked back and forth, went outside, and saw the moon had risen. He looked into the room through the window and saw Siddhartha standing in the same place with his arms folded and moonlight on his lower legs. The father, feeling concerned, returned to bed.","Herman Melville"),
    # Sports paragraph on the dodgers
     ("Shohei Ohtani, a player for the Los Angeles Dodgers, hit his 40th home run of the season on Saturday in a game against the Toronto Blue Jays, giving the Dodgers a 3-0 lead. The Dodgers won the game 9-1. It was their first consecutive home wins since they defeated the Chicago White Sox in three games from July 1 to July 3. Ohtani’s home run traveled 417 feet to center field.","Herman Melville")
]

print("\nSample Tests\n")
sample_test_results = [generate_style(input_text, author) for input_text, author in test_cases]

for i in range(len(test_cases)):

    print(textwrap.fill(f"\nInput: {test_cases[i][0]}",width=70))
    print(f"Author: {test_cases[i][1]}")
    print(textwrap.fill(f"Generated: {sample_test_results[i]}",width=70))
    print('\n\n')

print("\nStyle transfer complete!")



Sample Tests

 Input: Manhattan is surrounded by docks, similar to how coral reefs
surround islands. Business activity covers it like waves. The streets
lead toward the water. At the southern tip is the Battery, where the
pier is hit by ocean waves and cooled by sea breezes that were far
from land not long ago. Many people are gathered there, watching the
water.
Author: Herman Melville
Generated: Now at this point one begins wondering what can be said
about a place whose waters rise above its shores? And even if you take
into account all the business which surrounds every city on earth,
still more than any other sort;—for besides those great harbors or
harbours heretofore mentioned,—one may add these seaswardly-placed
cities too! It seems as though an island might stand



 Input: More people are walking straight toward the water, as if
preparing to jump in. They do not stop near the buildings for shade.
They want to get as close to the water as they can without entering
it. Many peop

## 9. Evaluation Metrics

In [33]:
!pip install nltk rouge-score -q

import nltk
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
import torch
import numpy as np

nltk.download('punkt', quiet=True)
nltk.download('punkt_tab')

# Initialize ROUGE scorer
rouge = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

def calculate_perplexity(text):
    """Calculate perplexity using the trained model"""
    inputs = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)

    if torch.cuda.is_available():
        inputs = {k: v.to('cuda') for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs, labels=inputs['input_ids'])
        loss = outputs.loss
        perplexity = torch.exp(loss).item()

    return perplexity

def simple_evaluation_metrics(test_cases):
    """Evaluate using BLEU, ROUGE, and Perplexity"""

    print("\n" + "="*50)
    print("BLEU | ROUGE | PERPLEXITY EVALUATION")
    print("="*50)

    bleu_scores = []
    rouge1_scores = []
    rougeL_scores = []
    perplexities = []

    for i, (input_text, author) in enumerate(test_cases):
        generated = generate_style(input_text, author)

        # BLEU Score (generated vs original)
        input_tokens = nltk.word_tokenize(input_text.lower())
        generated_tokens = nltk.word_tokenize(generated.lower())
        bleu = sentence_bleu([input_tokens], generated_tokens)
        bleu_scores.append(bleu)

        # ROUGE Scores (generated vs original)
        rouge_scores = rouge.score(input_text, generated)
        rouge1 = rouge_scores['rouge1'].fmeasure
        rougeL = rouge_scores['rougeL'].fmeasure
        rouge1_scores.append(rouge1)
        rougeL_scores.append(rougeL)

        # Perplexity (how natural/fluent the generated text is)
        ppl = calculate_perplexity(generated)
        perplexities.append(ppl)

        print(f"\nTest {i+1}:")
        print(f"Input:     {input_text[:60]}...")
        print(f"Generated: {generated}")
        print(f"BLEU:      {bleu:.3f}")
        print(f"ROUGE-1:   {rouge1:.3f}")
        print(f"ROUGE-L:   {rougeL:.3f}")
        print(f"Perplexity: {ppl:.1f}")

    # Calculate averages
    avg_bleu = np.mean(bleu_scores)
    avg_rouge1 = np.mean(rouge1_scores)
    avg_rougeL = np.mean(rougeL_scores)
    avg_perplexity = np.mean(perplexities)

    print(f"\n" + "="*50)
    print("AVERAGE SCORES")
    print("="*50)
    print(f"BLEU Score:     {avg_bleu:.3f}")
    print(f"ROUGE-1 Score:  {avg_rouge1:.3f}")
    print(f"ROUGE-L Score:  {avg_rougeL:.3f}")
    print(f"Perplexity:     {avg_perplexity:.1f}")

    # Simple interpretation
    print(f"\n" + "="*50)
    print("INTERPRETATION")
    print("="*50)

    if avg_bleu < 0.3:
        print("✅ Good style change (low BLEU)")
    else:
        print("⚠️  Limited style change (high BLEU)")

    if avg_rouge1 > 0.3:
        print("✅ Content preserved (good ROUGE)")
    else:
        print("⚠️  Content may be lost (low ROUGE)")

    if avg_perplexity < 50:
        print("✅ Fluent text (low perplexity)")
    else:
        print("⚠️  Text may be unnatural (high perplexity)")

# Run evaluation
simple_evaluation_metrics(test_cases)

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!



BLEU | ROUGE | PERPLEXITY EVALUATION

Test 1:
Input:     Manhattan is surrounded by docks, similar to how coral reefs...
Generated: As if a great mariner with his ship at anchor near this place might have been foretold about as he was approaching its shores, so on account should we now look into every part or section within our city; for such places may be counted upon to receive these tides, which would render them no longer seaward but rather inland—like those mighty swells they call seas--and thus furnish us
BLEU:      0.000
ROUGE-1:   0.076
ROUGE-L:   0.046
Perplexity: 74.1

Test 2:
Input:     More people are walking straight toward the water, as if pre...
Generated: The more men who now approach the sea-shore, some on foot or by other — these were those that had been drawn into this great gathering; but then came upon
BLEU:      0.000
ROUGE-1:   0.075
ROUGE-L:   0.057
Perplexity: 110.2

Test 3:
Input:     If you are in an area with lakes, most paths eventually lead...
Generated: 