In [1]:
import pandas as pd
import numpy as np
import regex #dev
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer, util



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from nltk.translate.bleu_score import sentence_bleu

def score_bleu(pred:str, y:str, type=2): 
    if type == 4:
        weights = [0.25, 0.25, 0.25, 0.25]
    elif type == 3:
        weights = [0.33, 0.33, 0.33]
    elif type == 2:
        weights = [0.5, 0.5]
    res = []
    for xe, ye in zip(pred, y):
        res.append(sentence_bleu([list(xe)], list(ye), weights))
    
    return np.average(res)
    
def _embedding_cosine_similarity(x:str, y:str, embedding_fun) -> float:
    X = embedding_fun.embed_query(x)
    Y = embedding_fun.embed_query(y)
    return cosine_similarity([X], [Y])

def embedding_cosine_similarity(x:str, y:str, embedding_fun) -> float:
    res = []
    for xe, ye in zip(x, y):
        res.append(_embedding_cosine_similarity(xe, ye, embedding_fun))
    return np.average(res)

In [3]:
from pprint import pprint


def validate(val_file, pred_file, emb_fun):
    dfy = pd.read_csv(val_file)
    dfg = pd.read_csv(pred_file)

    dfy = dfy[:1000]
    dfg = dfg[:1000]

    y = dfy.steps.values
    # y = dfy.response.values
    pred = dfg.response.values

    bl2 = score_bleu(y, pred, 2)
    bl3 = score_bleu(y, pred, 3)
    bl4 = score_bleu(y, pred, 4)
    cosim = embedding_cosine_similarity(y, pred, emb_fun)
    pprint(
        {
            "dataset": pred_file,
            "cosim" : cosim,
            "bleu_2" : bl2,
            "bleu_3" : bl3,
            "bleu_4" : bl4,
        }
    )
    return bl2, bl3, bl4, cosim



In [4]:
emb_fun = GPT4AllEmbeddings()
# emb_fun = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")


bert_load_from_file: gguf version     = 2
bert_load_from_file: gguf alignment   = 32
bert_load_from_file: gguf data offset = 695552
bert_load_from_file: model name           = BERT
bert_load_from_file: model architecture   = bert
bert_load_from_file: model file type      = 1
bert_load_from_file: bert tokenizer vocab = 30522


In [5]:
VAL = "_validation_half.csv"
# PE = "validation_pe2_results.csv"
CBR = "validation_cbr_00_results.csv"
# CBR_ING = "small_validation_cbr_00_ing_results_p.csv"
# OLD_CBR = "old_small_validation_cbr_00_results_p.csv"
# CBR_NAM = "small_validation_cbr_00_nam_results_p.csv"
# CBR_LARGE = "small_validation_cbr_00_large_half_results_p.csv"

# validate(VAL, PE, emb_fun)
validate(VAL, CBR, emb_fun)
# validate(VAL, CBR_ING, emb_fun)
# validate(VAL, CBR_NAM, emb_fun)
# validate(VAL, CBR_LARGE, emb_fun)
print()

{'bleu_2': 0.5195513088029691,
 'bleu_3': 0.43677706291221,
 'bleu_4': 0.3666542954343745,
 'cosim': 0.7454890563329896,
 'dataset': 'validation_cbr_00_results.csv'}



In [6]:
# 1000 samples - LLama 7B

{'bleu_2': 0.5303783219506372,
 'bleu_3': 0.4462962309433792,
 'bleu_4': 0.3753365885749213,
        'cosim': 0.7618514457651429,
 'dataset': 'validation_pe2_results.csv'}
{'bleu_2': 0.5195513088029691,
 'bleu_3': 0.43677706291221,
 'bleu_4': 0.3666542954343745,
        'cosim': 0.7454890563329896,
 'dataset': 'validation_cbr_00_results.csv'}



{'bleu_2': 0.5303783219506372,
 'bleu_3': 0.4462962309433792,
 'bleu_4': 0.3753365885749213,
 'cosim': 0.7618514457651429,
 'dataset': 'validation_pe2_results.csv'}

In [7]:
# 1000 samples

{'bleu_2': 0.5396068827158336,
 'bleu_3': 0.45617526600881114,
 'bleu_4': 0.38571558598671957,
        'cosim': 0.7725588565018909,
 'dataset': 'small_validation_pe_results_p.csv'}

{'bleu_2': 0.542812939755022,
 'bleu_3': 0.45964949518984566,
 'bleu_4': 0.38905272937763996,
        'cosim': 0.7654708413731175,
 'dataset': 'small_validation_cbr_00_results_p.csv'}

{'bleu_2': 0.5513406686475593,
 'bleu_3': 0.4653106822819303,
 'bleu_4': 0.39302688807601166,
        'cosim': 0.7632422508541131,
 'dataset': 'small_validation_cbr_00_ing_results_p.csv'}

{'bleu_2': 0.5484066657634896,
 'bleu_3': 0.46442737156905955,
 'bleu_4': 0.3931165177905519,
        'cosim': 0.7621681662680856,
 'dataset': 'small_validation_cbr_00_nam_results_p.csv'}

{'bleu_2': 0.5509298184714941,
 'bleu_3': 0.4681376893134645,
 'bleu_4': 0.39796219635868374,
       'cosim': 0.7685492702758651,
 'dataset': 'small_validation_cbr_00_large_half_results_p.csv'}


print()




In [8]:
# 500 samples

{'bleu_2': 0.5426869728805064,
 'bleu_3': 0.458388462134935,
 'bleu_4': 0.3874877600270263,
 'cosim': 0.7707031342117534,
 'dataset': 'small_validation_pe_results_p.csv'}

{'bleu_2': 0.5438659346908847,
 'bleu_3': 0.46062494514966273,
 'bleu_4': 0.38978623748374275,
 'cosim': 0.7669587393039417,
 'dataset': 'small_validation_cbr_00_results_p.csv'}

{'bleu_2': 0.54496451882725,
 'bleu_3': 0.4593921251448247,
 'bleu_4': 0.38765187507372423,
 'cosim': 0.7590582257581593,
 'dataset': 'small_validation_cbr_00_ing_results_p.csv'}

{'bleu_2': 0.5460347176785251,
 'bleu_3': 0.4617148488887874,
 'bleu_4': 0.39058422725089476,
 'cosim': 0.7601030492891336,
 'dataset': 'small_validation_cbr_00_nam_results_p.csv'}

print()




In [9]:
# embedding_fun = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# embedding_fun = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L12-v2")
# embedding_fun = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L6-v2")
# embedding_fun = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")
# embedding_fun = GPT4AllEmbeddings()
# embedding_fun = HuggingFaceEmbeddings()

# model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# len(embedding_fun.embed_query("sdf"))
# len(model.encode("asd"))

In [10]:
# embedding_cosine_similarity(y, pred, GPT4AllEmbeddings())

In [11]:
"""
PE -----
embedding_sim: 0.77055
embedding_sim_pf: 0.72920
HuggingFaceEmbeddings: 0.82272
bl2: 0.54114
bl3: 0.45684
bl4: 0.38604

CBR_00 -----
embedding_sim: 0.76286
embedding_sim_pf: 0.72389
HuggingFaceEmbeddings: 0.81547
bl2: 0.54043
bl3: 0.45686
bl4: 0.38614

CBR_00_ing -----
embedding_sim: 0.76121
bl2: 0.54747
bl3: 0.46198
bl4: 0.39026


CBR_00 -----
embedding_sim: 0.
bl2: 0.
bl3: 0.
bl4: 0.


old_CBR_00 -----
embedding_sim: 0.75980
bl2: 0.52887
bl3: 0.44601
bl4: 0.37680


"""

'\nPE -----\nembedding_sim: 0.77055\nembedding_sim_pf: 0.72920\nHuggingFaceEmbeddings: 0.82272\nbl2: 0.54114\nbl3: 0.45684\nbl4: 0.38604\n\nCBR_00 -----\nembedding_sim: 0.76286\nembedding_sim_pf: 0.72389\nHuggingFaceEmbeddings: 0.81547\nbl2: 0.54043\nbl3: 0.45686\nbl4: 0.38614\n\nCBR_00_ing -----\nembedding_sim: 0.76121\nbl2: 0.54747\nbl3: 0.46198\nbl4: 0.39026\n\n\nCBR_00 -----\nembedding_sim: 0.\nbl2: 0.\nbl3: 0.\nbl4: 0.\n\n\nold_CBR_00 -----\nembedding_sim: 0.75980\nbl2: 0.52887\nbl3: 0.44601\nbl4: 0.37680\n\n\n'