# NUBIA: A new SoTA evaluation metric for text generation
[View Paper](https://arxiv.org/abs/2004.14667) |  [Repo](https://github.com/wl-research/nubia) | [Blog post](https://wl-research.github.io/blog/) | [FAQ](https://github.com/wl-research/nubia/blob/master/FAQ.md)

## Cloning the repo and installing requirements:

In [None]:
!git clone https://github.com/wl-research/nubia.git
import os
os.chdir('/kaggle/working/nubia/')

In [None]:
%%capture
!pip install -r requirements.txt

## Importing Nubia and initializing:
Note: The first time you initialize the class it will download the pretrained models from the S3 bucket, this may take a while depending on your internet connection.

In [None]:
%%capture
from nubia_score import Nubia
nubia = Nubia()

## Text evaluation! 

In [None]:
%%capture
!pip install bert-score

In [None]:
%%capture
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.translate.bleu_score import SmoothingFunction
from bert_score import BERTScorer 
cc = SmoothingFunction()
scorer = BERTScorer(lang="en", rescale_with_baseline=True)

In [None]:
def all_metrics(s1, s2):
    s1_tokens = word_tokenize(s1)
    s2_tokens = word_tokenize(s2)
    BLEUscore = nltk.translate.bleu_score.sentence_bleu([s1_tokens], s2_tokens, smoothing_function=cc.method4, auto_reweigh=True)
    bert = scorer.score([s1], [s2])
    bert = (sum(bert) / len(bert)).item()
    nubia_scores = nubia.score(s1, s2, get_features=True)['features']

    return bert, BLEUscore, nubia_scores['semantic_relation'], nubia_scores['logical_agreement']

In [None]:
import pandas as pd
from tqdm import tqdm

In [None]:
bleu, bert, semantic_relation, logical_agreement= [], [], [], []

In [None]:
data = pd.read_csv('predictions.csv')

In [None]:
for row in tqdm(list(data.iterrows())):
    br, bl, sr, la = all_metrics(row[1]['ground_truth'], row[1]['prediction'])
    bleu.append(bl)
    bert.append(br)
    semantic_relation.append(sr)
    logical_agreement.append(la)

In [None]:
data['bleu_score'] = bleu
data['bert_score'] = bert
data['semantic_relation'] = semantic_relation
data['logical_agreement'] = logical_agreement

In [None]:
data.describe()

In [None]:
data.to_csv('predictions.csv', index=False)