In [1]:
import pandas as pd
import numpy as np
import regex #xd


In [2]:
DFY = "_validation.csv"
# DFG = "small_validation_pe_results_p.csv"
DFG = "small_validation_cbr_00_results_p.csv"

In [3]:
dfy = pd.read_csv(DFY)
dfg = pd.read_csv(DFG)

In [4]:
def validate_datestet(df1:pd.DataFrame, df2:pd.DataFrame):
    return np.all(df1.target_recipe.values == df2.id.values)

validate_datestet(dfg, dfy)

True

In [5]:
y = dfy.steps.values
pred = dfg.response.values

len(y), len(pred)

(2125, 2125)

In [6]:
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from langchain_community.embeddings import GPT4AllEmbeddings
from sklearn.metrics.pairwise import cosine_similarity


class Validation():
    def __init__(self, experiment_configs) -> None:
        self.validation_configs = experiment_configs.validation
        self.prompt_configs = experiment_configs.prompt

    def _cosine_similarity(self, original:str, generated:str) -> float:
        tokenizer = GPT4AllEmbeddings()
        X = tokenizer.embed_query(generated)
        Y = tokenizer.embed_query(original)
        return round(cosine_similarity([X], [Y]), 6)

    def _bleu(self, original:str, generated:str, version:int=4) -> float:
        weights = [0.25, 0.25, 0.25, 0.25]
        if version == 3:
            weights = [0.33, 0.33, 0.33]
        elif version == 2:
            weights = [0.5, 0.5]

        return round(sentence_bleu([original.split()], generated.split(), weights=weights), 6)
    
    def _meteor(self, original:str, generated:str) -> float:
        return round(meteor_score([original.split()], generated.split()), 6)

    def validate(self, original, experiment_name):
        generated = pd.read_csv(f'./logs/{experiment_name}_results.csv')['response'].to_lsit()
        original_steps = original['steps'].to_list()
        
        x = 0

        for i, j in zip(original_steps, generated):
            if (
                self._bleu(i, j, version=4) >= self.validation_configs.bleu_4_threshold and 
                self._bleu(i, j, version=3) >= self.validation_configs.bleu_3_threshold and 
                self._bleu(i, j, version=2) >= self.validation_configs.bleu_2_threshold and
                self._meteor(i, j) >= self.validation_configs.meteor_threshold and
                self._cosine_similarity(i, j) >= self.validation_configs.cosine_similarity_threshold
            ):
                x += 1
        
        return x, len(original_steps)
    

In [7]:
from nltk.translate.bleu_score import sentence_bleu

def score_bleu(pred:str, y:str, type=2): 
    if type == 4:
        weights = [0.25, 0.25, 0.25, 0.25]
    elif type == 3:
        weights = [0.33, 0.33, 0.33]
    elif type == 2:
        weights = [0.5, 0.5]
    res = []
    for xe, ye in zip(pred, y):
        res.append(sentence_bleu([list(xe)], list(ye), weights))
    
    return np.average(res)
    
def _embedding_cosine_similarity(x:str, y:str, tokenizer) -> float:
    X = tokenizer.embed_query(x)
    Y = tokenizer.embed_query(y)
    return cosine_similarity([X], [Y])

def embedding_cosine_similarity(x:str, y:str) -> float:
    tokenizer = GPT4AllEmbeddings()
    res = []
    for xe, ye in zip(x, y):
        res.append(_embedding_cosine_similarity(xe, ye, tokenizer))
    return np.average(res)

In [8]:
bl2 = score_bleu(y, pred, 2)
bl3 = score_bleu(y, pred, 3)
bl4 = score_bleu(y, pred, 4)

In [9]:
bl2, bl3, bl4


(0.5288748034721343, 0.4460179789180792, 0.37680758056038033)

In [10]:
embedding_cosine_similarity(y, pred)

bert_load_from_file: gguf version     = 2
bert_load_from_file: gguf alignment   = 32
bert_load_from_file: gguf data offset = 695552
bert_load_from_file: model name           = BERT
bert_load_from_file: model architecture   = bert
bert_load_from_file: model file type      = 1
bert_load_from_file: bert tokenizer vocab = 30522


0.7598071288672484

In [11]:
"""
PE:
embedding_sim: 0.77055
bl2: 0.54114
bl3: 0.45684
bl4: 0.38604

CBR_00:
embedding_sim: 
bl2: 
bl3: 
bl4: 


"""

'\nPE:\nembedding_sim: 0.77055\nbl2: 0.54114\nbl3: 0.45684\nbl4: 0.38604\n\nCBR_00:\nembedding_sim: \nbl2: \nbl3: \nbl4: \n\n\n'