In [4]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, TrainingArguments, Trainer
import torch
import datasets
from datasets import Dataset
import accelerate
import pandas 

In [5]:

df = pd.read_csv("new_updated_data.csv")  # Replace with your actual file

df["Tropes"] = df["tags"].apply(lambda x: ", ".join(x.split(", ")) if isinstance(x, str) else "")

data_dicts = df.apply(lambda row: {"input_text": f"Summarize: {row['description']}", "target_text": row["Tropes"]}, axis=1).tolist()

dataset = Dataset.from_list(data_dicts)

print(dataset[0])  # Verify first entry


{'input_text': 'Summarize: Notes from Underground (pre-reform Russian: Записки изъ подполья; post-reform Russian: Записки из подполья, tr. Zapíski iz podpólʹya), also translated as Notes from the Underground or Letters from the Underworld, is an 1864 novella by Fyodor Dostoevsky. Notes is considered by many to be one of the first existentialist novels. It presents itself as an excerpt from the rambling memoirs of a bitter, isolated, unnamed narrator (generally referred to by critics as the Underground Man), who is a retired civil servant living in St. Petersburg. The first part of the story is told in monologue form, or the underground man\'s diary, and attacks emerging Western philosophy, especially Nikolay Chernyshevsky\'s What Is to Be Done? The second part of the book is called "Apropos of the Wet Snow" and describes certain events that appear to be destroying and sometimes renewing the underground man, who acts as a first person, unreliable narrator and anti-hero.', 'target_text':

In [6]:
from transformers import AutoTokenizer

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("KamilAin/bart-base-booksum")

# Tokenization function
def tokenize_function(examples):
    model_inputs = tokenizer(examples["input_text"], max_length=512, truncation=True, padding="max_length")
    labels = tokenizer(examples["target_text"], max_length=128, truncation=True, padding="max_length")

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Tokenize dataset
tokenized_datasets = dataset.map(tokenize_function, batched=True)


Map: 100%|██████████| 5099/5099 [00:00<00:00, 6354.73 examples/s]


In [7]:
from transformers import AutoModelForSeq2SeqLM, TrainingArguments, Trainer

# Load pre-trained model
model = AutoModelForSeq2SeqLM.from_pretrained("KamilAin/bart-base-booksum")

# Define training arguments
training_args = TrainingArguments(
    output_dir="./bart_booksum_tropes",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="epoch",
    logging_dir="./logs"
)

# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets,
    eval_dataset=tokenized_datasets,  # Ideally, use a separate validation dataset
    tokenizer=tokenizer
)

# Train the model
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./bart_booksum_tropes")
tokenizer.save_pretrained("./bart_booksum_tropes")


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,1.0346,0.091972
2,0.1015,0.079885
3,0.0927,0.075368




('./bart_booksum_tropes\\tokenizer_config.json',
 './bart_booksum_tropes\\special_tokens_map.json',
 './bart_booksum_tropes\\vocab.json',
 './bart_booksum_tropes\\merges.txt',
 './bart_booksum_tropes\\added_tokens.json',
 './bart_booksum_tropes\\tokenizer.json')

In [14]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load fine-tuned model
model_path = "./bart_booksum_tropes"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)

# Input new book description
book_description = "James Anderson had a plan. Or half of one. All that matters is that he managed to do what his older brother, the famous Aaron Warner Anderson, never did: infiltrate Ark Island, the last refuge of The Reestablishment. In the past decade no outsider has breached the stronghold of the authoritarian regime, but James is in. In a prison cell, sure, but as far as James is concerned, a win is a win. It’s been ten years since the fall of The Reestablishment. Ten years since the notorious duo — Juliette Ferrars and Aaron Warner Anderson — led a worldwide rebellion and established the New Republic of the West. But after a decade of unsettling quiet, The Reestablishment is ready to make a devastating move, and they have the perfect person for the job."

# Tokenize input
inputs = tokenizer(f"Summarize: {book_description}", return_tensors="pt", max_length=512, truncation=True)

# Generate output
outputs = model.generate(**inputs, max_length=128)
tropes = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("Predicted Tropes:", tropes)


Predicted Tropes: Fantasy, Young Adult, Adventure, Science fiction, Dystopian


In [16]:
import evaluate
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Load fine-tuned model and tokenizer
model_path = "./bart_booksum_tropes"  # Ensure this is the correct path
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)

# Load ROUGE and BLEU metrics
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

# Sample test dataset (replace with your actual test cases)
test_cases = [
    {
        "description": "ose yourself in this exhilarating return to the #1 global bestselling Shatter Me universe, the first book in a new series set ten years after the fall of The Reestablishment. James Anderson had a plan. Or half of one. All that matters is that he managed to do what his older brother, the famous Aaron Warner Anderson, never did: infiltrate Ark Island, the last refuge of The Reestablishment. In the past decade no outsider has breached the stronghold of the authoritarian regime, but James is in. In a prison cell, sure, but as far as James is concerned, a win is a win. It’s been ten years since the fall of The Reestablishment. Ten years since the notorious duo — Juliette Ferrars and Aaron Warner Anderson — led a worldwide rebellion and established the New Republic of the West. But after a decade of unsettling quiet, The Reestablishment is ready to make a devastating move, and they have the perfect person for the job. Rosabelle Wolff had a plan. She always has a plan. On Ark Island, where constant surveillance is packaged as security, even emotions must be experienced with caution. A trained assassin, her every movement is monitored by synthetic intelligence—and when she’s given an order to kill, she never hesitates. Brimming with pulse-pounding action and torturous romance, Watch Me is an explosive journey through a dystopian landscape where enemies-to-lovers has never felt more impossible. Step into a beloved and breathtaking world that demands an answer to a desperate question— Who are we when no one is watching?",
        "expected_tropes": "Fantasy, Dystopia, romance"
    },
    {
        "description": "From the #1 New York Times bestselling author of Just for the Summer comes a new playful yet deeply emotional contemporary romance. There might be no such a thing as a perfect guy, but Xavier Rush comes disastrously close. A gorgeous veterinarian giving Greek god vibes—all while cuddling a tiny kitten? Immediately yes. That is until Xavier opens his mouth and proves that even sculpted gods can say the absolute wrong thing. Like, really wrong. Of course, there’s nothing Samantha loves more than proving an asshole wrong… . . . unless, of course, he can admit he made a mistake. But after one incredible and seemingly endless date—possibly the best in living history—Samantha is forced to admit the truth, that her family is in crisis and any kind of relationship would be impossible. Samantha begs Xavier to forget her. To remember their night together as a perfect moment, as crushing as that may be. Only no amount of distance or time is nearly enough to forget that something between them. And the only thing better than one single perfect memory is to make a life—and even a love—worth remembering.",
        "expected_tropes": "Romance, contepmrory romance"
    }
]

# Store predictions & references
predictions = []
references = []

for case in test_cases:
    # Tokenize input description
    inputs = tokenizer(f"Summarize: {case['description']}", return_tensors="pt", max_length=512, truncation=True)

    # Generate prediction
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=128, num_beams=5)

    # Decode output
    predicted_tropes = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Store results
    predictions.append(predicted_tropes)
    references.append([case["expected_tropes"]])  # BLEU expects list of references

# Compute ROUGE
rouge_scores = rouge.compute(predictions=predictions, references=references)

# Compute BLEU
bleu_score = bleu.compute(predictions=predictions, references=references)

# Print results
print("ROUGE Scores:", rouge_scores)
print("BLEU Score:", bleu_score)


ROUGE Scores: {'rouge1': np.float64(0.1), 'rouge2': np.float64(0.0), 'rougeL': np.float64(0.1), 'rougeLsum': np.float64(0.1)}
BLEU Score: {'bleu': 0.0, 'precisions': [0.3, 0.1111111111111111, 0.0, 0.0], 'brevity_penalty': 1.0, 'length_ratio': 1.1111111111111112, 'translation_length': 10, 'reference_length': 9}
