In [1]:
!pip install --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install --upgrade transformers datasets sentencepiece accelerate nltk

Looking in indexes: https://download.pytorch.org/whl/cu118


In [5]:
!pip install deepspeed
!pip install --upgrade deepspeed

Collecting deepspeed
  Using cached deepspeed-0.16.3.tar.gz (1.4 MB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting hjson (from deepspeed)
  Downloading hjson-3.1.0-py3-none-any.whl.metadata (2.6 kB)
Collecting ninja (from deepspeed)
  Downloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.3 kB)
Collecting nvidia-ml-py (from deepspeed)
  Downloading nvidia_ml_py-12.570.86-py3-none-any.whl.metadata (8.7 kB)
Downloading hjson-3.1.0-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.0/54.0 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ninja-1.11.1.3-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (422 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m422.9/422.9 kB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading nvidia_ml_py-12.570.86-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [

In [11]:
import gc
import torch
torch.cuda.empty_cache()
gc.collect()


316

In [12]:
import torch
print(torch.cuda.is_available())  # Should print True
print(torch.cuda.get_device_name(0))  # Should print something like NVIDIA T4


True
Tesla T4


In [15]:
try:
    import deepspeed
except ImportError:
    print("DeepSpeed is not installed.")


[2025-02-15 21:26:50,123] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [3]:
import torch
import gc

# 🔥 Free up GPU memory before loading the model
torch.cuda.empty_cache()
gc.collect()

# ✅ Reduce sequence lengths for minimal memory usage
max_input_length = 32  # Set to 32
max_target_length = 32  # Set to 32

# ✅ Load dataset
train_csv_path = "train-eng.csv"
dev_csv_path = "dev-eng.csv"

import pandas as pd
from datasets import Dataset

train_df = pd.read_csv(train_csv_path)
dev_df = pd.read_csv(dev_csv_path)

train_dataset = Dataset.from_pandas(train_df)
dev_dataset = Dataset.from_pandas(dev_df)

# ✅ Tokenization function
from transformers import T5ForConditionalGeneration, T5Tokenizer

tokenizer = T5Tokenizer.from_pretrained("t5-small")

def preprocess_function(examples):
    inputs = examples["post"]
    targets = examples["normalized claim"]

    model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True, padding="max_length")
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=max_target_length, truncation=True, padding="max_length")

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# ✅ Force CPU for dataset tokenization (saves GPU memory)
tokenized_train = train_dataset.map(preprocess_function, batched=True, num_proc=4)
tokenized_dev = dev_dataset.map(preprocess_function, batched=True, num_proc=4)

# ✅ Load model with memory-optimized settings
model = T5ForConditionalGeneration.from_pretrained("t5-small")
model.gradient_checkpointing_enable()  # Saves memory
model.config.use_cache = False  # Required for checkpointing
model.to("cuda")

# ✅ Trainer setup with minimal memory usage
from transformers import Trainer, TrainingArguments, DataCollatorForSeq2Seq

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-4,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=32,  # Highest accumulation, lowest memory
    weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=3,
    logging_steps=50,
    fp16=True,  # Use mixed precision
    report_to="none",
    optim="adamw_torch_fused",
)

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_dev,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# ✅ Free memory again before training
torch.cuda.empty_cache()
gc.collect()

# 🚀 Start training
print("Starting training...")
trainer.train()

# ✅ Evaluate model
print("Evaluating on development set...")
results = trainer.evaluate()
print("Evaluation Results:", results)

# ✅ Save final model
output_dir = "/content/drive/My Drive/t5_claim_normalization"
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"Model and tokenizer saved to {output_dir}")


Map (num_proc=4):   0%|          | 0/11374 [00:00<?, ? examples/s]



Map (num_proc=4):   0%|          | 0/1171 [00:00<?, ? examples/s]

  trainer = Trainer(


Starting training...


Epoch,Training Loss,Validation Loss
1,2.089,1.859626


Epoch,Training Loss,Validation Loss
1,2.089,1.859626
2,1.9468,1.780881


Evaluating on development set...


Evaluation Results: {'eval_loss': 1.7808811664581299, 'eval_runtime': 24.1579, 'eval_samples_per_second': 48.473, 'eval_steps_per_second': 48.473, 'epoch': 2.9931422542641113}
Model and tokenizer saved to /content/drive/My Drive/t5_claim_normalization


In [6]:
from nltk.translate.meteor_score import meteor_score
import pandas as pd
import torch

def compute_meteor(predictions, references):
    tokenized_predictions = [pred.split() for pred in predictions]  # 🔥 Fix: Tokenize predictions
    tokenized_references = [[ref.split()] for ref in references]  # 🔥 Fix: Tokenize references

    scores = [meteor_score(ref, pred) for pred, ref in zip(tokenized_predictions, tokenized_references)]
    return sum(scores) / len(scores)

# Load dev set
dev_df = pd.read_csv("dev-eng.csv")

# Generate predictions with truncation
predictions = []
for post in dev_df["post"]:
    inputs = tokenizer(post, return_tensors="pt", truncation=True, max_length=512).to("cuda")

    output = model.generate(**inputs, max_length=50)
    prediction = tokenizer.decode(output[0], skip_special_tokens=True)
    predictions.append(prediction)

# Compute METEOR Score
meteor_result = compute_meteor(predictions, dev_df["normalized claim"])
print("METEOR Score:", meteor_result)


METEOR Score: 0.22711133291895683


In [10]:
import torch
torch.cuda.empty_cache()  # 🔥 Clears unused memory
torch.cuda.memory_summary(device=None, abbreviated=False)  # Debug memory usage


