In [24]:
import pandas as pd
import torch
from torch.utils.data import DataLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration
from torch.optim import AdamW
from sklearn.model_selection import train_test_split

In [25]:
df = pd.read_csv("news_summary.csv", encoding='latin1')
df = df[['text', 'ctext']].dropna().reset_index(drop=True)
df['text'] = df['text'].astype(str)
df['ctext'] = df['ctext'].astype(str)

In [26]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
print(f"Train size: {len(train_df)}, Test size: {len(test_df)}")

Train size: 3516, Test size: 880


In [33]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

# Use smaller model to avoid GPU memory overflow
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Device handling
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

try:
    model = model.to(device)
except Exception as e:
    print("⚠️ GPU memory issue:", e)
    print("Switching to CPU...")
    device = "cpu"
    model = model.to(device)

# Reduce batch size for safety
batch_size = 1

# Clear GPU memory if previously used
torch.cuda.empty_cache()


Using device: cuda
⚠️ GPU memory issue: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Switching to CPU...


AcceleratorError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [31]:
max_input_length = 256  # Shorter sequences
max_target_length = 100
batch_size = 2  # Very small batch size

# Process training data
train_input_ids = []
train_attention_mask = []
train_labels = []

print("Processing training data...")
for i in range(len(train_df)):
    text = "summarize: " + train_df.iloc[i]['text']
    summary = train_df.iloc[i]['ctext']

    input_enc = tokenizer(text, max_length=max_input_length, truncation=True,
                          padding="max_length", return_tensors="pt")
    target_enc = tokenizer(summary, max_length=max_target_length, truncation=True,
                           padding="max_length", return_tensors="pt")

    train_input_ids.append(input_enc['input_ids'])
    train_attention_mask.append(input_enc['attention_mask'])
    train_labels.append(target_enc['input_ids'])

print(f"Processed {len(train_input_ids)} training samples")

Processing training data...
Processed 3516 training samples


In [32]:
optimizer = AdamW(model.parameters(), lr=5e-5)
epochs = 1

model.train()
for epoch in range(epochs):
    total_loss = 0
    batch_count = 0
    
    for i in range(0, len(train_input_ids), batch_size):
        # Get current batch - NO .to(device) needed
        end_idx = i + batch_size
        current_batch_input_ids = train_input_ids[i:end_idx]
        current_batch_attention_mask = train_attention_mask[i:end_idx]
        current_batch_labels = train_labels[i:end_idx]
        
        # Stack the batch tensors
        batch_input_ids = torch.cat(current_batch_input_ids)
        batch_attention_mask = torch.cat(current_batch_attention_mask)
        batch_labels = torch.cat(current_batch_labels)

        # Forward pass
        outputs = model(input_ids=batch_input_ids,
                        attention_mask=batch_attention_mask,
                        labels=batch_labels)
        loss = outputs.loss

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        batch_count += 1
        
        print(f"Batch {batch_count}, Loss: {loss.item():.4f}")

    if batch_count > 0:
        avg_loss = total_loss / batch_count
        print(f"Epoch {epoch+1}, Average Loss: {avg_loss:.4f}")

AcceleratorError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [18]:
model.save_pretrained("./t5_summarization_model")
tokenizer.save_pretrained("./t5_summarization_model")

print("✅ Training complete and model saved!")

✅ Training complete and model saved!
