# Topsis
**Comparison using Pre-Trained models**

In [30]:
!pip install nltk rouge-score bert-score
import nltk
from datasets import load_dataset
from nltk.translate.bleu_score import corpus_bleu
from rouge_score import rouge_scorer
import pandas as pd
from bert_score import score
import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", message="Your max_length is set to *")
warnings.filterwarnings("ignore", category=DeprecationWarning)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [31]:
!pip install py7zr

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




# Dataset: SAMSUM

In [32]:
from datasets import load_dataset
dataset = load_dataset("samsum")
df= dataset['test'].to_pandas()

In [33]:
df.head()

Unnamed: 0,id,dialogue,summary
0,13862856,"Hannah: Hey, do you have Betty's number?\nAman...",Hannah needs Betty's number but Amanda doesn't...
1,13729565,Eric: MACHINE!\r\nRob: That's so gr8!\r\nEric:...,Eric and Rob are going to watch a stand-up on ...
2,13680171,"Lenny: Babe, can you help me with something?\r...",Lenny can't decide which trousers to buy. Bob ...
3,13729438,"Will: hey babe, what do you want for dinner to...",Emma will be home soon and she will let Will k...
4,13828600,"Ollie: Hi , are you in Warsaw\r\nJane: yes, ju...",Jane is in Warsaw. Ollie and Jane has a party....


# Topsis Parameters

In [53]:
def calculate_redundancy(summaries):
    
    total_tokens = sum(len(summary.split()) for summary in summaries)
    unique_tokens = len(set(token for summary in summaries for token in summary.split()))
    redundancy_score = 1 - (unique_tokens / total_tokens)
    
    return redundancy_score

In [None]:
def calc_bleu(actual_summary, predicted_summary):
    actual_summaries_tokenized = [[ref.split()] for ref in actual_summaries]         # tokenizing the actual summary
    pred_summaries_tokenized = [output.split() for output in pred_summaries]         # tokenizing the predicted summary
    bleu_score = corpus_bleu(actual_summaries_tokenized, pred_summaries_tokenized)
    return bleu_score
    

In [55]:
def calc_bert(actual_summary, predicted_summary):
    P, R, F1 = score(actual_summary, predicted_summary, lang='en', verbose=False)     # returns Precision, Recall and F1 score
    bert_score = F1.mean().item()  
    return bert_score

In [56]:
def calculate_rouge(actual_summary, predicted_summary):
    rouge = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)         
             
    rouge1_scores = []
    rouge2_scores = []
    rougeL_scores = []

    for pred, actual in zip(predicted_summary, actual_summary):
        rouge_scores = rouge.score(pred, actual)                                               
        rouge1_scores.append(rouge_scores['rouge1'].fmeasure)
        rouge2_scores.append(rouge_scores['rouge2'].fmeasure)
        rougeL_scores.append(rouge_scores['rougeL'].fmeasure)

            
    rouge1 = sum(rouge1_scores) / len(rouge1_scores)                                      
    rouge2 = sum(rouge2_scores) / len(rouge2_scores)
    rougeL = sum(rougeL_scores) / len(rougeL_scores)

    return rouge1, rouge2, rougeL

In [57]:
def get_metric(actual, predicted):
    redundancy = calculate_redundancy(predicted)
    rouge1, rouge2, rougeL = calculate_rouge(actual, predicted)
    bert = calc_bert(actual, predicted)
    bleu = calc_bleu(actual, predicted)
    
    return redundancy, bleu, bert, rouge1, rouge2, rougeL


In [58]:
redundancy_score=[]
bleu_score=[]
bert_score=[]
rouge1_score=[]
rouge2_score=[]
rougeL_score=[]

In [59]:
df= df.sample(n=50, replace= False).reset_index(drop= True)

# Models

In [60]:
models= ['philschmid/bart-large-cnn-samsum',
         'facebook/bart-large-cnn',
         'philschmid/distilbart-cnn-12-6-samsum',
         'knkarthick/MEETING_SUMMARY',
         'google/bigbird-pegasus-large-arxiv']

In [62]:
from transformers import pipeline

for Model in models:
    warnings.filterwarnings("ignore")
    pipe = pipeline("summarization", model=Model)
    
    print("\nMODEL: ",Model)
    print(pipe(df['dialogue'][0], max_length= 130, min_length=30, truncation= True))
    
    predictions= []
    for i in range(0,len(df)):
        warnings.filterwarnings("ignore")
        pred= pipe(df['dialogue'][i], max_length=130, min_length=30, truncation= True)[0]['summary_text']
        predictions.append(pred)
    
    BLEU,BERT,Rouge_1,Rouge_2,Rouge_L,Redundancy = get_metric(df['summary'].tolist(), predictions)
    
    print('PARAMETERS')
    print('BLEU Score: ',BLEU)
    print('BERT Score: ',BERT)
    print('Rouge-1 Score: ',Rouge_1)
    print('Rouge-2 Score: ',Rouge_2)
    print('Rouge-L Score: ',Rouge_L)
    print('Redundancy Score: ',Redundancy)
    
    bleu_score.append(BLEU)
    bert_score.append(BERT)
    rouge1_score.append(Rouge_1)
    rouge2_score.append(Rouge_2)
    rougeL_score.append(Rouge_L)
    redundancy_score.append(Redundancy)
    
    print('\n')


MODEL:  philschmid/bart-large-cnn-samsum
[{'summary_text': "Molly and Luca are having a silly season. Molly can't think and is frustrated. Luca tries to cheer her up."}]


Your max_length is set to 130, but your input_length is only 80. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=40)
Your max_length is set to 130, but your input_length is only 55. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=27)
Your max_length is set to 130, but your input_length is only 109. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=54)
Your max_length is set to 130, but your input_length is only 21. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
You

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


PARAMETERS
BLEU Score:  0.5248425472358292
BERT Score:  0.14330235645820502
Rouge-1 Score:  0.9124612212181091
Rouge-2 Score:  0.4897180815872998
Rouge-L Score:  0.2536939137548797
Redundancy Score:  0.3850672857072429




config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]


MODEL:  facebook/bart-large-cnn
[{'summary_text': "Molly: It's the silly season, isn't it? Luca: Yep. Just hang in there. Molly: I can't think anymore today!Luca: LOL! Molly: You can't let it get you down!"}]


Your max_length is set to 130, but your input_length is only 80. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=40)
Your max_length is set to 130, but your input_length is only 55. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=27)
Your max_length is set to 130, but your input_length is only 109. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=54)
Your max_length is set to 130, but your input_length is only 21. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
You

PARAMETERS
BLEU Score:  0.4580858085808581
BERT Score:  0.05664177239359781
Rouge-1 Score:  0.8735765218734741
Rouge-2 Score:  0.3337717848894025
Rouge-L Score:  0.11013806720926396
Redundancy Score:  0.23914419749950994




config.json:   0%|          | 0.00/1.85k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.10k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]


MODEL:  philschmid/distilbart-cnn-12-6-samsum
[{'summary_text': "Molly can't think anymore today. It's the silly season. Luca reminds Molly not to let it get her down. "}]


Your max_length is set to 130, but your input_length is only 80. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=40)
Your max_length is set to 130, but your input_length is only 55. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=27)
Your max_length is set to 130, but your input_length is only 109. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=54)
Your max_length is set to 130, but your input_length is only 21. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
You

PARAMETERS
BLEU Score:  0.5116788321167883
BERT Score:  0.13932229438644483
Rouge-1 Score:  0.9088510870933533
Rouge-2 Score:  0.47080047049510815
Rouge-L Score:  0.23880643154337491
Redundancy Score:  0.36200919848714785




config.json:   0%|          | 0.00/1.59k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/337 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]


MODEL:  knkarthick/MEETING_SUMMARY
[{'summary_text': "It's the silly season and Molly is stressed out because she can't think of what to do next. Luca tries to cheer her up."}]


Your max_length is set to 130, but your input_length is only 80. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=40)
Your max_length is set to 130, but your input_length is only 55. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=27)
Your max_length is set to 130, but your input_length is only 109. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=54)
Your max_length is set to 130, but your input_length is only 21. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
You

PARAMETERS
BLEU Score:  0.5191370911621433
BERT Score:  0.13732998877543318
Rouge-1 Score:  0.9100101590156555
Rouge-2 Score:  0.4683979476855605
Rouge-L Score:  0.23578188575238948
Redundancy Score:  0.354625858941433




config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.31G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/232 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.92M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.51M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/775 [00:00<?, ?B/s]

Your max_length is set to 130, but your input_length is only 114. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=57)
Attention type 'block_sparse' is not possible if sequence_length: 114 <= num global tokens: 2 * config.block_size + min. num sliding tokens: 3 * config.block_size + config.num_random_blocks * config.block_size + additional buffer: config.num_random_blocks * config.block_size = 704 with config.block_size = 64, config.num_random_blocks = 3. Changing attention type to 'original_full'...



MODEL:  google/bigbird-pegasus-large-arxiv


Your max_length is set to 130, but your input_length is only 114. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=57)


[{'summary_text': 'it has been shown that the laws of thermodynamics can be used to predict the outcome of many physical experiments .<n> here we show that the laws of thermodynamics can also be used to predict the outcome of some experiments .<n> we do this by showing that the laws of thermodynamics can be used to predict the outcome of experiments in which the experimenter is subjected to some external perturbation .<n> we do this by showing that the predictions of the laws of thermodynamics can be used to predict the outcome of experiments in which the experimenter is subjected to some external perturbation .<n> we do this by showing that the predictions of the laws'}]


Your max_length is set to 130, but your input_length is only 122. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=61)
Your max_length is set to 130, but your input_length is only 69. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=34)
Your max_length is set to 130, but your input_length is only 41. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=20)
Your max_length is set to 130, but your input_length is only 101. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=50)
Yo

PARAMETERS
BLEU Score:  0.8732200275608636
BERT Score:  0.06031464116282594
Rouge-1 Score:  0.792246401309967
Rouge-2 Score:  0.04762497445521983
Rouge-L Score:  0.0007775151037562385
Redundancy Score:  0.04297484876934316




**Scores**

In [75]:
import numpy as np
scores= [bleu_score,bert_score,
    rouge1_score,
    rouge2_score,
    rougeL_score,
    redundancy_score]
for score in scores:
    for i in range(len(score)):
        score[i]= np.round(score[i],3)

In [76]:
df_topsis= pd.DataFrame({
    'Model': models,
    'BLEU': bleu_score,
    'BERT': bert_score,
    'Rouge-1': rouge1_score,
    'Rouge-2': rouge2_score,
    'Rouge-L': rougeL_score,
    'Redundancy': redundancy_score
})

In [77]:
df_topsis

Unnamed: 0,Model,BLEU,BERT,Rouge-1,Rouge-2,Rouge-L,Redundancy
0,philschmid/bart-large-cnn-samsum,0.525,0.143,0.912,0.49,0.254,0.385
1,facebook/bart-large-cnn,0.458,0.057,0.874,0.334,0.11,0.239
2,philschmid/distilbart-cnn-12-6-samsum,0.512,0.139,0.909,0.471,0.239,0.362
3,knkarthick/MEETING_SUMMARY,0.519,0.137,0.91,0.468,0.236,0.355
4,google/bigbird-pegasus-large-arxiv,0.873,0.06,0.792,0.048,0.001,0.043


# Topsis

In [78]:
weights= [0.2,0.15,0.20,0.20,0.15,0.1]         
impacts= ['+','+','+','+','+','-']

In [79]:
def normalize(matrix):
    norm_matrix = matrix / np.sqrt(np.sum(matrix**2, axis=0))                    # normalize the matrix
    return norm_matrix

def weighted_normalize(norm_matrix, weights):
    weighted_norm_matrix = norm_matrix * weights                                 # calculate the weighted normalized matrix
    return weighted_norm_matrix

def ideal_best_worst(weighted_norm_matrix, impacts):
    ideal_solution = np.max(weighted_norm_matrix, axis=0) * impacts              # calculate the ideal_best and ideal_worst solutions
    ideal_worst_solution = np.min(weighted_norm_matrix, axis=0) * impacts
    return ideal_solution, ideal_worst_solution

def euclidean_distances(weighted_norm_matrix, ideal_solution, ideal_worst_solution):
    dist_to_ideal = np.sqrt(np.sum((weighted_norm_matrix - ideal_solution)**2, axis=1))           # Calculate the Euclidean distances to the ideal_best and ideal_worst solutions.
    dist_to_ideal_worst = np.sqrt(np.sum((weighted_norm_matrix - ideal_worst_solution)**2, axis=1))
    return dist_to_ideal, dist_to_ideal_worst

def performance_score(dist_to_ideal, dist_to_ideal_worst):
    score = dist_to_ideal_worst / (dist_to_ideal + dist_to_ideal_worst)            # calculate the topsis score for each model
    return score

def topsis(matrix, weights, impacts):                                              # perform TOPSIS analysis
    # Step 1: Normalize the decision matrix
    norm_matrix = normalize(matrix)
    
    # Step 2: Calculate the weighted normalized decision matrix
    weighted_norm_matrix = weighted_normalize(norm_matrix, weights)
    ideal_solution, ideal_worst_solution = ideal_best_worst(weighted_norm_matrix, impacts)
    
    # Step 4: Calculate the Euclidean distances to the ideal_best and ideal_worst solutions
    dist_to_ideal, dist_to_ideal_worst = euclidean_distances(weighted_norm_matrix, ideal_solution, ideal_worst_solution)
    
    # Step 5: Calculate the performance score for each alternative/model
    score = performance_score(dist_to_ideal, dist_to_ideal_worst)
    
    # Step 6: Rank the alternatives/models based on their performance scores
    sorted_indices = np.argsort(score)[::-1]                                       # Indices of scores sorted in descending order
    rankings = np.empty_like(sorted_indices)                                       # Create an empty array to store rankings
    rankings[sorted_indices] = np.arange(len(score)) + 1                           # Assign ranks
    
    return score, rankings

In [80]:
df_metrics= df_topsis.drop('Model',axis=1)
impacts_as_integers = [1 if impact == '+' else -1 for impact in impacts]
topsis_score, rankings = topsis(df_metrics, weights, impacts_as_integers)

In [81]:
for i in range(len(topsis_score)):
    topsis_score[i] = np.round(topsis_score[i], 3)
    
df_topsis['TOPSIS Score'] = topsis_score
df_topsis['TOPSIS Rank'] = rankings

# Final Result and Ranking

In [82]:
df_topsis

Unnamed: 0,Model,BLEU,BERT,Rouge-1,Rouge-2,Rouge-L,Redundancy,TOPSIS Score,TOPSIS Rank
0,philschmid/bart-large-cnn-samsum,0.525,0.143,0.912,0.49,0.254,0.385,0.555,1
1,facebook/bart-large-cnn,0.458,0.057,0.874,0.334,0.11,0.239,0.386,4
2,philschmid/distilbart-cnn-12-6-samsum,0.512,0.139,0.909,0.471,0.239,0.362,0.547,2
3,knkarthick/MEETING_SUMMARY,0.519,0.137,0.91,0.468,0.236,0.355,0.546,3
4,google/bigbird-pegasus-large-arxiv,0.873,0.06,0.792,0.048,0.001,0.043,0.291,5
