In [3]:
MODEL = 't5-large'
BATCH_SIZE = 8
NUM_PROCS = 4
EPOCHS = 10
OUT_DIR = 'results_t5_large/2k_samples'
MAX_LENGTH = 1024 # Maximum context length to consider while preparing dataset.
epoch_metrics = []
DRIVE_DATA_PATH = "/content/drive/MyDrive/processed/10k_samples"   # UPDATE PATH
CLEAN_TEXT_COLUMN='article'
SUMMARY_COLUMN='highlights'

In [4]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Set your data path in Google Drive
# DRIVE_DATA_PATH = '/content/drive/MyDrive/processed/'  # Update this path

Mounted at /content/drive


In [None]:
from google.colab import auth
auth.authenticate_user()

# Install gcsfuse
!echo "deb http://packages.cloud.google.com/apt gcsfuse-bionic main" > /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
!apt -qq update
!apt -qq install gcsfuse

# Create a local directory for mounting
!mkdir results_t5base

# Mount the GCS bucket
# Replace 'your-bucket-name' with the actual name of your GCS bucket
!gcsfuse --implicit-dirs models_checkpoint results_t5base

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1022  100  1022    0     0  13781      0 --:--:-- --:--:-- --:--:-- 13810
OK
^C
^C
mkdir: cannot create directory ‘results_t5base’: File exists
^C


In [5]:
!pip install -U transformers
!pip install -U datasets
!pip install tensorboard
!pip install sentencepiece
!pip install accelerate
!pip install evaluate
!pip install rouge_score
!pip install tqdm
!pip install tensorboard-data-server
!pip install tbparse

Collecting transformers
  Downloading transformers-4.57.3-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.57.3-py3-none-any.whl (12.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m145.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.57.2
    Uninstalling transformers-4.57.2:
      Successfully uninstalled transformers-4.57.2
Successfully installed transformers-4.57.3
Collecting datasets
  Downloading datasets-4.4.1-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow>=21.0.0 (from datasets)
  Downloading pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.2 kB)
Downloading datasets-4.4.1-py3-none-any.whl (511 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.6/511.6

In [6]:
import torch
import pprint
import evaluate
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from transformers import (
    T5Tokenizer,
    T5ForConditionalGeneration,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)
from datasets import load_dataset

pp = pprint.PrettyPrinter()


In [7]:
# Load data from Google Drive

print("Loading data...")
train_df = pd.read_csv(f"{DRIVE_DATA_PATH}/train.csv").head(2000)
val_df = pd.read_csv(f"{DRIVE_DATA_PATH}/val.csv").head(200)
# test_df = pd.read_csv(f"{DRIVE_DATA_PATH}/test.csv")

# train_df, val_df = train_test_split(train_df, test_size=0.2, shuffle=True)

# train_df = train_df.dropna(subset=['Summary', 'clean_text'])
# val_df = val_df.dropna(subset=['Summary', 'clean_text'])

print("Train:", len(train_df))
print("Val:", len(val_df))
# print("Test:", len(test_df))

Loading data...
Train: 2000
Val: 200


In [8]:
# dataset = load_dataset('gopalkalpande/bbc-news-summary', split='train')
# full_dataset = train_df.train_test_split(test_size=0.2, shuffle=True)
# dataset_train = train_df # full_dataset['train']
# dataset_valid =  val_df # full_dataset['test']

from datasets import Dataset
dataset_train = Dataset.from_pandas(train_df)
dataset_valid = Dataset.from_pandas(val_df)

print(dataset_train)
print(dataset_valid)

Dataset({
    features: ['article', 'highlights', 'id'],
    num_rows: 2000
})
Dataset({
    features: ['article', 'highlights', 'id'],
    num_rows: 200
})


In [9]:
def find_longest_length(dataset):
    """
    Find the longest article and summary in the entire training set.
    """
    max_length = 0
    counter_4k = 0
    counter_2k = 0
    counter_1k = 0
    counter_500 = 0
    counter_700 = 0
    for text in dataset:
        corpus = [
            word for word in text.split()
        ]
        if len(corpus) > 4000:
            counter_4k += 1
        if len(corpus) > 2000:
            counter_2k += 1
        if len(corpus) > 1000:
            counter_1k += 1
        if len(corpus) > 700:
            counter_700 += 1
        if len(corpus) > 500:
            counter_500 += 1
        if len(corpus) > max_length:
            max_length = len(corpus)
    return max_length, counter_4k, counter_2k, counter_1k, counter_700, counter_500

longest_article_length, counter_4k, counter_2k, counter_1k, counter_700, counter_500 = find_longest_length(dataset_train[CLEAN_TEXT_COLUMN])
print(f"Longest article length: {longest_article_length} words")
print(f"Artciles larger than 4000 words: {counter_4k}")
print(f"Artciles larger than 2000 words: {counter_2k}")
print(f"Artciles larger than 1000 words: {counter_1k}")
print(f"Artciles larger than 700 words: {counter_700}")
print(f"Artciles larger than 500 words: {counter_500}")
longest_summary_length, counter_4k, counter_2k, counter_1k, counter_700, counter_500 = find_longest_length(dataset_train[SUMMARY_COLUMN])
print(f"Longest summary length: {longest_summary_length} words")
print(f"Summaries larger than 4000 words: {counter_4k}")
print(f"Summaries larger than 2000 words: {counter_2k}")
print(f"Summaries larger than 1000 words: {counter_1k}")
print(f"Summaries larger than 700 words: {counter_700}")
print(f"Summaries larger than 500 words: {counter_500}")

Longest article length: 1806 words
Artciles larger than 4000 words: 0
Artciles larger than 2000 words: 0
Artciles larger than 1000 words: 242
Artciles larger than 700 words: 701
Artciles larger than 500 words: 1149
Longest summary length: 84 words
Summaries larger than 4000 words: 0
Summaries larger than 2000 words: 0
Summaries larger than 1000 words: 0
Summaries larger than 700 words: 0
Summaries larger than 500 words: 0


In [10]:
def find_avg_sentence_length(dataset):
    """
    Find the average sentence in the entire training set.
    """
    sentence_lengths = []
    for text in dataset:
        corpus = [
            word for word in text.split()
        ]
        sentence_lengths.append(len(corpus))
    return sum(sentence_lengths)/len(sentence_lengths)

avg_article_length = find_avg_sentence_length(dataset_train[CLEAN_TEXT_COLUMN])
print(f"Average article length: {avg_article_length} words")
avg_summary_length = find_avg_sentence_length(dataset_train[SUMMARY_COLUMN])
print(f"Averrage summary length: {avg_summary_length} words")

Average article length: 614.629 words
Averrage summary length: 45.2905 words


In [11]:
tokenizer = T5Tokenizer.from_pretrained(MODEL)
# Function to convert text data into model inputs and targets
def preprocess_function(examples):
    inputs = [f"summarize: {article}" for article in examples[CLEAN_TEXT_COLUMN]]
    model_inputs = tokenizer(
        inputs,
        max_length=MAX_LENGTH,
        truncation=True,
        padding='max_length'
    )

    # Set up the tokenizer for targets
    targets = [summary for summary in examples[SUMMARY_COLUMN]]
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            targets,
            max_length=MAX_LENGTH,
            truncation=True,
            padding='max_length'
        )

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply the function to the whole dataset
tokenized_train = dataset_train.map(
    preprocess_function,
    batched=True,
    num_proc=NUM_PROCS
)
tokenized_valid = dataset_valid.map(
    preprocess_function,
    batched=True,
    num_proc=NUM_PROCS
)

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Map (num_proc=4):   0%|          | 0/2000 [00:00<?, ? examples/s]



Map (num_proc=4):   0%|          | 0/200 [00:00<?, ? examples/s]



In [16]:
model = T5ForConditionalGeneration.from_pretrained(MODEL)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.config.eos_token_id = tokenizer.eos_token_id
model.config.decoder_start_token_id = tokenizer.pad_token_id
# Total parameters and trainable parameters.
total_params = sum(p.numel() for p in model.parameters())
print(f"{total_params:,} total parameters.")
total_trainable_params = sum(
    p.numel() for p in model.parameters() if p.requires_grad)
print(f"{total_trainable_params:,} training parameters.")

rouge = evaluate.load("rouge")


model.safetensors:   0%|          | 0.00/2.95G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

737,668,096 total parameters.
737,668,096 training parameters.


Downloading builder script: 0.00B [00:00, ?B/s]

In [12]:
def preprocess_logits_for_metrics(logits, labels):
    """
    Original Trainer may have a memory leak.
    This is a workaround to avoid storing too many tensors that are not needed.
    """
    pred_ids = torch.argmax(logits[0], dim=-1)
    return pred_ids, labels

In [13]:
from torch.utils.tensorboard import SummaryWriter
from transformers import TrainerCallback
import time
writer = SummaryWriter(log_dir=OUT_DIR)

class GpuLoggerCallback(TrainerCallback):
    def __init__(self, writer):
        self.writer = writer

    def on_step_end(self, args, state, control, **kwargs):
        if torch.cuda.is_available():
            gpu_mem = torch.cuda.memory_allocated() / (1024 ** 3)
            self.writer.add_scalar("gpu_memory_gb", gpu_mem, state.global_step)
        return control



In [14]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred.predictions[0], eval_pred.label_ids

    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    result = rouge.compute(
        predictions=decoded_preds,
        references=decoded_labels,
        use_stemmer=True,
        rouge_types=['rouge1','rouge2','rougeL']
    )

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)

    # NEW: Log to TensorBoard
    for k, v in result.items():
        writer.add_scalar(f"eval/{k}", v, trainer.state.global_step)

    # if trainer.state.is_local_process_zero and trainer.state.epoch is not None:
    #     print(result)
    #     pd.DataFrame([result]).to_csv(f"{OUT_DIR}/rouge_epoch_{int(trainer.state.epoch)}.csv")


    # FIXED: make sure it only runs on epoch boundaries
    # if trainer.state.is_local_process_zero and trainer.state.epoch is not None:
    #     epoch_num = int(trainer.state.epoch)
    #     print(f"[Saving ROUGE metrics for epoch {epoch_num}]")
    #     pd.DataFrame([result]).to_csv(f"{OUT_DIR}/rouge_epoch_{epoch_num}.csv", index=False)
    pd.DataFrame([result]).to_csv(f"{OUT_DIR}/rouge_results_step_{trainer.state.global_step}.csv")


    # NEW: Save as CSV for later plotting
    # pd.DataFrame([result]).to_csv(f"{OUT_DIR}/rouge_results_step_{trainer.state.global_step}.csv")

    return {k: round(v, 4) for k, v in result.items()}


In [19]:
training_args = TrainingArguments(
    output_dir=OUT_DIR,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=2, #todo to test this how much to use more means faster
    per_device_eval_batch_size=4,#todo to test this how much to use
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir=OUT_DIR,
    logging_steps=200,
    logging_strategy="epoch",
    # eval_strategy='steps',
    eval_strategy='epoch',

    eval_steps=200,
    save_strategy='epoch',
    report_to='tensorboard',

    learning_rate=0.0001,
    dataloader_num_workers=4,
    bf16=True,
    fp16=False,
    tf32=True,
    metric_for_best_model="rougeL",
    load_best_model_at_end=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_valid,
    preprocess_logits_for_metrics=preprocess_logits_for_metrics,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

trainer.add_callback(GpuLoggerCallback(writer))
pd.DataFrame(trainer.state.log_history).to_csv(f"{OUT_DIR}/training_history.csv")

start = time.time()
history = trainer.train()
end = time.time()

writer.add_scalar("total_training_time_seconds", end - start, 0)

Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Gen Len
1,1.1754,0.072933,0.6692,0.4188,0.6387,63.575
2,0.0664,0.074537,0.6645,0.4177,0.6348,63.575
3,0.0475,0.07981,0.6602,0.4096,0.6287,63.575
4,0.0339,0.085835,0.6587,0.4103,0.6273,63.575


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].


In [None]:
!gsutil cp -r /content/results_t5base/4k_samples gs://models_checkpoint/models/results_t5base/4k_samples


In [None]:
tokenizer.save_pretrained(OUT_DIR)

from transformers.trainer_utils import get_last_checkpoint
last_ckpt = get_last_checkpoint(OUT_DIR)
model_path = last_ckpt if last_ckpt else OUT_DIR


model = T5ForConditionalGeneration.from_pretrained(model_path)
tokenizer = T5Tokenizer.from_pretrained(OUT_DIR)

In [20]:
def summarize_text(text, model, tokenizer, max_length=512, num_beams=5):
    # 1. Tokenize properly (returns attention mask too)
    encoded = tokenizer(
        "summarize: " + text,
        return_tensors='pt',
        max_length=max_length,
        truncation=True,
    )

    # 2. Move everything to the same device as the model
    device = model.device
    encoded = {k: v.to(device) for k, v in encoded.items()}

    # 3. Generate summary
    summary_ids = model.generate(
        **encoded,
        max_length=128,         # not 50 → 50 is too short for news
        num_beams=num_beams,
        length_penalty=1.1,
        no_repeat_ngram_size=3,
        early_stopping=True
    )

    # 4. Decode
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)


In [21]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import nltk
nltk.download("punkt")
nltk.download("punkt_tab")

nli_tok = AutoTokenizer.from_pretrained("roberta-large-mnli")
nli_model = AutoModelForSequenceClassification.from_pretrained("roberta-large-mnli").to(device)

def hallucination_rate(summary, source):
    sentences = nltk.sent_tokenize(summary)
    hallucinated = 0

    for sent in sentences:
        inputs = nli_tok.encode_plus(source, sent, return_tensors="pt", truncation=True).to(device)
        logits = nli_model(**inputs).logits
        probs = torch.softmax(logits, dim=1)[0]
        contradiction = probs[0].item()
        entailment = probs[2].item()

        if contradiction > entailment:
            hallucinated += 1

    return hallucinated / len(sentences)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/688 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.43G [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [22]:
test_df = pd.read_csv(f"{DRIVE_DATA_PATH}/test.csv")


In [23]:
from tqdm import tqdm

model.eval()
results = []

# Process test set (limit to first 50 for demo)
test_limit = 100
test_subset = test_df.head(test_limit)

print(f"Processing {len(test_subset)} articles...")

for idx, row in tqdm(test_subset.iterrows(), total=len(test_subset)):
    article_id = int(idx)
    text = str(row[CLEAN_TEXT_COLUMN])
    original_summary = str(row[SUMMARY_COLUMN])

    summary = summarize_text(text, model, tokenizer)

    results.append({
        "article_id": article_id,
        "original_text": text,
        "summary": summary,
        "original_summary": original_summary
    })

print(f"Processed {len(results)} articles")

hallucinations = []
for sample in results:
    rate = hallucination_rate(sample["summary"], sample["original_text"])
    hallucinations.append(rate)

avg_hall = np.mean(hallucinations)
writer.add_scalar("hallucination_rate", avg_hall, 0)
pd.DataFrame({"hallucination_rate": hallucinations}).to_csv(f"{OUT_DIR}/hallucination.csv")



Processing 100 articles...


100%|██████████| 100/100 [05:05<00:00,  3.06s/it]


Processed 100 articles


In [24]:
# Save results
import json
import os


results_dir = '/content/t5_results_new_dataset'
os.makedirs(OUT_DIR, exist_ok=True)

output_path = f"{OUT_DIR}/t5_10_results.json"
with open(output_path, "w") as f:
    json.dump(results, f, indent=2)

print(f"Results saved to {output_path}")

# Optionally save to Google Drive
drive_output_path = f"{DRIVE_DATA_PATH}{OUT_DIR}/../results/t5_2k_results.json"
os.makedirs(os.path.dirname(drive_output_path), exist_ok=True)
with open(drive_output_path, "w") as f:
    json.dump(results, f, indent=2)

print(f"Results also saved to Google Drive: {drive_output_path}")


Results saved to results_t5_large/2k_samples/t5_10_results.json
Results also saved to Google Drive: /content/drive/MyDrive/processed/10k_samplesresults_t5_large/2k_samples/../results/t5_2k_results.json


In [25]:
from tbparse import SummaryReader
import pandas as pd
import matplotlib.pyplot as plt

#/content/results_t5base/4k_samples_test/events.out.tfevents.1764615908.80a126f62e9a.934.3
  # change for each run/content/results_t5base/4k_samples/events.out.tfevents.1764612565.80a126f62e9a.934.0

reader = SummaryReader(OUT_DIR, pivot=False)
df = reader.scalars
df.to_csv(f'{OUT_DIR}/metrics.csv')

# df_pivoted_cleaned = df.pivot_table(
#     index='step',
#     columns='tag',
#     values='value',
# )

# df_pivoted_cleaned
# df_pivoted = df.pivot(index='tag1', columns='tag')


# EVAL_PREFIX = 'eval/'
# eval_columns = [col for col in df.columns if col.startswith(EVAL_PREFIX)]

# # 2. Filter the DataFrame
# # Use .dropna() to remove rows (axis=0) where all of the columns in 'subset' are NaN.
# df_filtered = df.dropna(subset=eval_columns, how='all')

# # Display the first few rows of the cleaned data
# print(df_filtered.head())

# print("DF columns:", df.columns)
# print("Number of rows:", len(df))
# df.head()

In [None]:
%load_ext tensorboard
%tensorboard --logdir $LOG_DIR

In [26]:
from google.colab import auth
auth.authenticate_user()

# Install gcsfuse
!echo "deb http://packages.cloud.google.com/apt gcsfuse-bionic main" > /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
!apt -qq update
!apt -qq install gcsfuse

# Create a local directory for mounting
# !mkdir results_t5base

# Mount the GCS bucket
# Replace 'your-bucket-name' with the actual name of your GCS bucket
# !gcsfuse --implicit-dirs models_checkpoint results_t5base

!gsutil cp -r /content/results_t5_large/2k_samples gs://models_checkpoint/models/results_t5_large/2k_samples



  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100  1022  100  1022    0     0  13646      0 --:--:-- --:--:-- --:--:-- 13810
OK
58 packages can be upgraded. Run 'apt list --upgradable' to see them.
[1;33mW: [0mhttp://packages.cloud.google.com/apt/dists/gcsfuse-bionic/InRelease: Key is stored in legacy trusted.gpg keyring (/etc/apt/trusted.gpg), see the DEPRECATION section in apt-key(8) for details.[0m
[1;33mW: [0mSkipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)[0m
The following NEW packages will be installed:
  gcsfuse
0 upgraded, 1 newly installed, 0 to remove and 58 not upgraded.
Need to get 15.1 MB of archives.
After this operation, 0 B of additional dis

In [None]:
!gsutil cp -r /content/results_t5base/2k_samples gs://models_checkpoint/models/results_t5base/2k_samples

Copying file:///content/results_t5base/2k_samples/t5_10_results.json [Content-Type=application/json]...
Copying file:///content/results_t5base/2k_samples/rouge_results_step_334.csv [Content-Type=text/csv]...
Copying file:///content/results_t5base/2k_samples/events.out.tfevents.1764708975.e22f301bf5d1.45144.0 [Content-Type=application/octet-stream]...
Copying file:///content/results_t5base/2k_samples/training_history.csv [Content-Type=text/csv]...
| [4 files][525.5 KiB/525.5 KiB]                                                
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying file:///content/results_t5base/2k_samples/hallucination.csv [Content-Type=text/csv]...
Copying file:///content/results_t5base/2k_samples/rouge_results_step_668.csv [Content-Type=text/csv]...
Copying file:///c