In [1]:
!nvidia-smi

Fri Nov 15 19:57:45 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.90.07              Driver Version: 550.90.07      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA H100 80GB HBM3          On  |   00000000:BE:00.0 Off |                    0 |
| N/A   33C    P0            128W /  700W |   10712MiB /  81559MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer, Trainer, TrainingArguments
from datasets import load_dataset, Dataset

import pandas as pd
import numpy as np
import warnings
from tqdm import tqdm
warnings.filterwarnings("ignore")

### BART + Sports

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

# Loading Sports Category Train Data
df = pd.read_csv('./sports_train.csv')

In [None]:
# BART Tokenizer
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

# Preprocess the input and output text
def preprocess_function(examples):
    inputs = tokenizer(examples['text'], max_length=1024, truncation=True, padding='max_length')
    labels = tokenizer(examples['summary'], max_length=128, truncation=True, padding='max_length')

    # Set labels for training
    inputs['labels'] = labels['input_ids']
    return inputs

In [11]:
from datasets import Dataset

# Convert pandas DataFrame to Hugging Face Dataset
hf_dataset = Dataset.from_pandas(df[['text', 'summary']])

# Tokenize the dataset
tokenized_datasets = hf_dataset.map(preprocess_function, batched=True, remove_columns=["text", "summary"])


Map: 100%|██████████| 57000/57000 [02:55<00:00, 325.62 examples/s]


In [None]:
# 80-20 train-test split
from sklearn.model_selection import train_test_split
train_test_split = tokenized_datasets.train_test_split(test_size=0.2)
train_dataset = train_test_split['train']
eval_dataset = train_test_split['test']

In [None]:
# Pretrained BART-Large used for finetuning
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large')

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results_train_sport",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    report_to="none",
)


In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

BartForConditionalGeneration(
  (model): BartModel(
    (shared): BartScaledWordEmbedding(50265, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50265, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartEncoderLayer(
          (self_attn): BartSdpaAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
    

In [None]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

# Train the model with a progress bar (tqdm is automatically included)
trainer.train()

Epoch,Training Loss,Validation Loss
1,0.4823,0.374927
2,0.3058,0.364522
3,0.2628,0.363627


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



TrainOutput(global_step=34200, training_loss=0.3697824367863393, metrics={'train_runtime': 6512.1732, 'train_samples_per_second': 21.007, 'train_steps_per_second': 5.252, 'total_flos': 2.964599095689216e+17, 'train_loss': 0.3697824367863393, 'epoch': 3.0})

### BART+Sports Evaluation

In [None]:
## Loading model from last checkpoint

path = "./results_train_sport/checkpoint-34200"
model = BartForConditionalGeneration.from_pretrained(path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
# Loading Rouge metrics

from rouge_score import rouge_scorer

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

##### Evaluation on Architecture Target Domain

In [None]:
test_arch_df=pd.read_csv('./architecture_test.csv')
test_arch_df[['text','summary']].head()

Unnamed: 0,text,summary
0,In what may be the last hurrah for public buil...,Fifth of RIBA's annual architecture plaudits g...
1,"NEW HAVEN, April 27— Vincent Scully did not wa...","Vincent Scully did not want to retire, but Yal..."
2,It's been a good week for women in architectur...,It was a good week for women architects – exce...
3,This piece originally appeared on brunchwork.\...,Start with these three strategies
4,'This is one of those projects that is trying ...,"With its sleek new museum, built by 'winking-e..."


In [None]:
rouge_scores_arch=[]
for idx, row in tqdm(test_arch_df.iterrows(), total=test_arch_df.shape[0]):
    inputs = tokenizer(row['text'], return_tensors="pt", truncation=True, max_length=1024).to(device)
    # Generate summary
    summary_ids = model.generate(inputs['input_ids'], max_length=150, min_length=40, length_penalty=2.0, num_beams=4)
    predicted_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    # Calculate ROUGE scores
    score = scorer.score(row['summary'], predicted_summary)
    rouge_scores_arch.append(score)

100%|██████████| 419/419 [04:58<00:00,  1.40it/s]


In [None]:
average_scores_arch = {metric: sum([score[metric].fmeasure for score in rouge_scores_arch]) / len(rouge_scores_arch) for metric in rouge_scores_arch[0]}
average_scores_arch

{'rouge1': 0.3885935172823962,
 'rouge2': 0.26067413771638254,
 'rougeL': 0.338704633130942}

#### Evaluation of Food Target Domain

In [None]:
test_fd_df=pd.read_csv('./food_test.csv')
test_fd_df[['text','summary']].head()

Unnamed: 0,text,summary
0,When former UMass basketball player Luke Bonne...,The New Hampshire brewery joins up with the Ro...
1,is living the PRISON HIGH LIFE ... at least co...,Lauryn Hill is living the PRISON HIGH LIFE ......
2,The company posted fiscal second-quarter earni...,Darden Restaurants reported quarterly earnings...
3,A shrimp dish at Red Lobster has a calorie cou...,According to the Center for Science in the Pub...
4,Karoline Boehm Goodnick for The Boston Globe\n...,Don’t waste one of the best parts


In [None]:
rouge_scores_fd=[]
for idx, row in tqdm(test_fd_df.iterrows(), total=test_fd_df.shape[0]):
    inputs = tokenizer(row['text'], return_tensors="pt", truncation=True, max_length=1024).to(device)
    # Generate summary
    summary_ids = model.generate(inputs['input_ids'], max_length=150, min_length=40, length_penalty=2.0, num_beams=4)
    predicted_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    # Calculate ROUGE scores
    score = scorer.score(row['summary'], predicted_summary)
    rouge_scores_fd.append(score)

100%|██████████| 1482/1482 [17:59<00:00,  1.37it/s]


In [None]:
average_scores_fd = {metric: sum([score[metric].fmeasure for score in rouge_scores_fd]) / len(rouge_scores_fd) for metric in rouge_scores_fd[0]}
average_scores_fd

{'rouge1': 0.33970193274322263,
 'rouge2': 0.22281161505207608,
 'rougeL': 0.29548394783469795}

#### Evaluation of Sports Target Domain

In [None]:
test_sprt_df=pd.read_csv('./sports_test.csv')
test_sprt_df[['text','summary']].head()

Unnamed: 0,text,summary
0,"BADEN— BADEN, West Germany, Sept. 29 - A Roy a...","BADEN, West Germany, Sept. 29 - A Roy al Canad..."
1,Aaron Rodgers can still remember his close enc...,Aaron Rodgers can still remember his close enc...
2,"Wallace Robinson, the 6-foot-8-inch center who...","Wallace Robinson, the 6-foot-8-inch center who..."
3,Treasurer Joe Hockey has urged first home buye...,Asked whether Sydney’s property prices are out...
4,"On Saturday, the Australian freestyler Mack Ho...",Swimmers at the Rio Games who once served susp...


In [None]:
rouge_scores_sprt=[]
for idx, row in tqdm(test_sprt_df.iterrows(), total=test_sprt_df.shape[0]):
    inputs = tokenizer(row['text'], return_tensors="pt", truncation=True, max_length=1024).to(device)
    # Generate summary
    summary_ids = model.generate(inputs['input_ids'], max_length=150, min_length=40, length_penalty=2.0, num_beams=4)
    predicted_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    # Calculate ROUGE scores
    score = scorer.score(row['summary'], predicted_summary)
    rouge_scores_sprt.append(score)

100%|██████████| 6163/6163 [1:08:12<00:00,  1.51it/s]


In [None]:
average_scores_sprt = {metric: sum([score[metric].fmeasure for score in rouge_scores_sprt]) / len(rouge_scores_sprt) for metric in rouge_scores_sprt[0]}
average_scores_sprt

{'rouge1': 0.41663422079755513,
 'rouge2': 0.28440243223020006,
 'rougeL': 0.3639577403634263}

#### Evaluation of Entertainment Target Domain

In [None]:
test_ent_df=pd.read_csv('./entertainment_test.csv')
test_ent_df[['text','summary']].head()

Unnamed: 0,text,summary
0,Adam Sandler and Drew Barrymore paired up pret...,Adam Sandler and Drew Barrymore are back toget...
1,FORTUNE — Vine Alternative Investments is rais...,Vine is latest in recent string of 'income fun...
2,updated 03/23/2015 AT 01:45 PM EDT\n\n•origina...,The new Late Late Show host also names an Emmy...
3,Ridley Scott’s Exodus: Gods and Kings is not e...,"Ridley Scott's new movie ""Exodus: Gods and Kin..."
4,You know that it’s gotten bad when Dan Rather ...,You know that it’s gotten bad when Dan Rather ...


In [None]:
rouge_scores_ent=[]
for idx, row in tqdm(test_ent_df.iterrows(), total=test_ent_df.shape[0]):
    inputs = tokenizer(row['text'], return_tensors="pt", truncation=True, max_length=1024).to(device)
    # Generate summary
    summary_ids = model.generate(inputs['input_ids'], max_length=150, min_length=40, length_penalty=2.0, num_beams=4)
    predicted_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    # Calculate ROUGE scores
    score = scorer.score(row['summary'], predicted_summary)
    rouge_scores_ent.append(score)

100%|██████████| 6668/6668 [1:19:34<00:00,  1.40it/s]


In [None]:
average_scores_ent = {metric: sum([score[metric].fmeasure for score in rouge_scores_ent]) / len(rouge_scores_ent) for metric in rouge_scores_ent[0]}
average_scores_ent

{'rouge1': 0.3950585463511238,
 'rouge2': 0.27709023678851996,
 'rougeL': 0.34953245702320235}

#### Evaluation of Technology Target Domain

In [None]:
test_tech_df = pd.read_csv('./technology_test.csv')
test_tech_df[['text','summary']].head()

In [None]:
rouge_scores_tech = []
for idx, row in tqdm(test_tech_df.iterrows(), total=test_tech_df.shape[0]):
    inputs = tokenizer(row['text'], return_tensors="pt", truncation=True, max_length=1024).to(device)
    # Generate summary
    summary_ids = model.generate(inputs['input_ids'], max_length=150, min_length=40, length_penalty=2.0, num_beams=2)
    predicted_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    # Calculate ROUGE scores
    score = scorer.score(row['summary'], predicted_summary)
    rouge_scores_tech.append(score)

100%|██████████| 5448/5448 [1:03:14<00:00,  1.44it/s]


In [None]:
average_scores_tech = {metric: sum([score[metric].fmeasure for score in rouge_scores_tech]) / len(rouge_scores_tech) for metric in rouge_scores_tech[0]}
average_scores_tech

{'rouge1': 0.3881053490898981,
 'rouge2': 0.26634486962862797,
 'rougeL': 0.3414239827085971}