In [17]:
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from langchain_community.embeddings import GPT4AllEmbeddings
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

In [18]:
validation_ds = pd.read_csv('./GPT2/original_structured/cbr_database.csv')['steps'].to_list()

In [19]:
tokenizer = GPT4AllEmbeddings()

def _cosine_similarity(original:str, generated:str) -> float:
    X = tokenizer.embed_query(str(generated))
    Y = tokenizer.embed_query(str(original))
    score = cosine_similarity([X], [Y])[0][0]
    return round(score, 6)

def bleu(original:str, generated:str, version:int=4) -> float:
    weights = [0.25, 0.25, 0.25, 0.25]
    if version == 3:
        weights = [0.33, 0.33, 0.33]
    elif version == 2:
        weights = [0.5, 0.5]

    return round(sentence_bleu([original.split()], generated.split(), weights=weights), 6)

def meteor(original:str, generated:str) -> float:
    return round(meteor_score([original.split()], generated.split()), 6)

In [20]:
import random

new_a = []
new_b = []

k = 1000

while k != 0:
    a = random.choices(validation_ds, k=k)
    b = random.choices(validation_ds, k=k)

    k = 0
    for i, j in zip(a, b):
        if i == j:
            k += 1
        else:
            new_a.append(i)
            new_b.append(j)

In [21]:
import tqdm

In [22]:
scores = []

for i, j in tqdm.tqdm(zip(a, b)):
    scores.append(_cosine_similarity(i, j))

1000it [01:45,  9.46it/s]


In [23]:
import statistics

statistics.mean(scores) - statistics.variance(scores)

0.4175513787845102