In [25]:
import sys
sys.path.append("../")
sys.path.append("../src")
from tqdm import tqdm, trange
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Load Models

In [6]:
from src.llama2 import Llama2
from src.nlp_base import NLP_base
from src.nlp_langchain import NLP_langchain

llama2

In [7]:
llama2 = Llama2(
    input_files=["../data/pdf/am-mid-year-outlook-2024.pdf"],
    verbose=False,
); llama2.train()

nlp-base

In [35]:
nlp_base = NLP_base(
    input_files=["../data/pdf/am-mid-year-outlook-2024.pdf"],
    verbose=False,
); nlp_base.train()

# load Manual Labeled Data

In [2]:
questions = pd.read_csv("../data/testQ.csv")

llama2

In [30]:
sim_q_r, sim_a_r = [], []
iterator = questions[["Question", "Answer"]].to_records(index=False)
for q, a in tqdm(iterator):
    scores_q, scores_a = [], []
    for _ in range(10):
        response, score = llama2.answer(q)
        scores_q.append(score)
        scores_a.append(cosine_similarity(
            [llama2.embed_model._embed(a)], 
            [llama2.embed_model._embed(str(response))],
        )[0][0])
    sim_q_r.append(np.mean(scores_q))
    sim_a_r.append(np.mean(scores_a))

100%|██████████| 15/15 [21:54<00:00, 87.64s/it] 


In [36]:
res_llama2 = questions[["Question"]].copy()
res_llama2["model"] = "llama2"
res_llama2["similarity_response_question"] = sim_q_r
res_llama2["similarity_response_answer"] = sim_a_r

nlp

In [37]:
sim_q_r, sim_a_r = [], []
iterator = questions[["Question", "Answer"]].to_records(index=False)
for q, a in tqdm(iterator):
    scores_q, scores_a = [], []
    for _ in range(10):
        response, score = nlp_base.answer(q)
        scores_q.append(score)
        scores_a.append(cosine_similarity(
            [llama2.embed_model._embed(a)], 
            [llama2.embed_model._embed(str(response))],
        )[0][0])
    sim_q_r.append(np.mean(scores_q))
    sim_a_r.append(np.mean(scores_a))

100%|██████████| 15/15 [00:11<00:00,  1.33it/s]


In [38]:
res_nlp_base = questions[["Question"]].copy()
res_nlp_base["model"] = "nlp_base"
res_nlp_base["similarity_response_question"] = sim_q_r
res_nlp_base["similarity_response_answer"] = sim_a_r

merge and save results

In [45]:
res = pd.concat([res_llama2, res_nlp_base]).reset_index()
res.sort_values(["index", "model"], inplace=True, ignore_index=True)
res.drop("index", axis=1, inplace=True)
res.to_csv("./test_results.csv", index=False)

Interpretation of Columns
- `similarity_response_answer`: cosine similarity between response and manually labeled answer
- `similarity_response_question`: cosine similarity between response and input question

In [46]:
res

Unnamed: 0,Question,model,similarity_response_question,similarity_response_answer
0,How much has Norway's policy rate changed over...,llama2,0.860554,0.607631
1,How much has Norway's policy rate changed over...,nlp_base,0.682312,0.595814
2,Is the shift to higher interest rates posing c...,llama2,0.880874,0.899823
3,Is the shift to higher interest rates posing c...,nlp_base,0.754041,0.860071
4,Can small caps finally outperform?,llama2,0.796116,0.853054
5,Can small caps finally outperform?,nlp_base,0.668756,0.796996
6,What is “normal” for private equity?,llama2,0.847603,0.876351
7,What is “normal” for private equity?,nlp_base,0.755375,0.792823
8,What is the key takeaway of Goldman's mid-year...,llama2,0.83078,0.712899
9,What is the key takeaway of Goldman's mid-year...,nlp_base,0.761847,0.771109
