In [17]:
import torch

# Is CUDA available (i.e., a GPU is detected)?
print("CUDA available:", torch.cuda.is_available())

# Which GPU is being used (if any)
if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))
    print("Device count:", torch.cuda.device_count())
    print("Current device:", torch.cuda.current_device())
else:
    print("No GPU detected by PyTorch.")


CUDA available: True
GPU Name: NVIDIA GeForce RTX 3050 6GB Laptop GPU
Device count: 1
Current device: 0


In [7]:
from dotenv import load_dotenv
import os

load_dotenv()
token = os.getenv("HF_TOKEN")

In [8]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer
import torch
import os

# Set environment variable to avoid potential conflicts
os.environ["KMP_DUPLICATE_LIB_OK"] = "True"

# Load a smaller dataset for summarization
dataset = load_dataset("cnn_dailymail","3.0.0")
print(dataset["train"][0])

# results = metric.compute(predictions=predictions, references=references)
# print(results)
model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)


{'article': 'LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won\'t cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don\'t plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don\'t think I\'ll be particularly extravagant. "The things I like buying are things that cost about 10 pounds -- books and CDs and DVDs." At 18, Radcliffe will be able to gamble in a casino, buy a drink in a pub or see the horror film "Hostel: Part II," currently six places below his number one movie on the UK box office cha

In [9]:
def tokenize_function(examples):
    # Prefix input for T5-style summarization
    inputs = ["summarize: " + doc for doc in examples["article"]]

    # Tokenize inputs
    model_inputs = tokenizer(
        inputs, max_length=512, truncation=True, padding="max_length"
    )

    # Tokenize targets (highlights) as labels
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            examples["highlights"], max_length=128, truncation=True, padding="max_length"
        )

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs


tokenized_datasets = dataset.map(tokenize_function, batched=True)

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    load_best_model_at_end=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    processing_class=tokenizer
)

#trainer.train()

# Set device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model.to(device)

# Define a sample text for summarization
sample_text = "The Transformer model has revolutionized NLP by enabling parallel processing of sequences."
inputs = tokenizer("summarize: " + sample_text, return_tensors="pt", max_length=512, truncation=True).to(device)
outputs = model.generate(inputs["input_ids"], max_length=150, num_beams=4, early_stopping=True)

print("Generated Summary: ", tokenizer.decode(outputs[0], skip_special_tokens=True))


Map:   0%|          | 0/11490 [00:00<?, ? examples/s]



Generated Summary:  Transformer model has revolutionized NLP by enabling parallel processing of sequences.


In [1]:
import transformers

print("torch_available:", transformers.is_torch_available())
print("tf_available:   ", transformers.is_tf_available())

torch_available: True
tf_available:    True
