STEP 1: Preprocess all test.json and op.json files

In [11]:
from pathlib import Path
path = r".\models\evaluation_pipelines\json_files"
json_files_path = Path(path)

In [12]:
test_json_files = json_files_path.glob("test*.json")
op_json_files = json_files_path.glob("op*.json")

In [13]:
from models.evaluation_pipelines.utils import preprocess_op_json, preprocess_test_json
_test_json_resp = [preprocess_test_json(fp.name) for fp in test_json_files]
_op_json_resp = [preprocess_op_json(fp.name) for fp in op_json_files]

In [14]:
test_query_products_mapping = {resp['query']:resp['input_products'] for resp in _test_json_resp}
op_query_products_mapping = {resp['query']:resp['gold_product_recommendations'] for resp in _op_json_resp}

In [15]:
dataset = [{"query":query, "input_products":products, "gold_product_recommendations":op_query_products_mapping[query]} for query, products in test_query_products_mapping.items()]

In [16]:
import json
with open(json_files_path.joinpath("evaluation_dataset.json"),'w') as file:
    json.dump(dataset,file)

STEP 2: NDCG Dataset Creation: Use same dataset as above; But use LLM to get relevance scores and update witihn each dataset record

In [17]:
from models.rel_score_calculator import RelevanceScoreCalculator

from models.llm_modules.ner_model import NERModel
from models.llm_modules.product_evaluation_model import RelScoreProductEvaluationsModel

ner_model = NERModel(client_type="sync")
product_evaluation_model = RelScoreProductEvaluationsModel(client_type="sync")

rel_scorer = RelevanceScoreCalculator(
    ner_model = ner_model,
    product_evaluation_model = product_evaluation_model
)

In [18]:
def create_ndcg_dataset(dataset:list[dict])->list[dict]:
    ndcg_dataset = []
    for record in dataset:
        query, input_products, retrieved_products = record['query'], record['input_products'], record['gold_product_recommendations']
        gold_recs_dict = {prod['product_id']:prod for prod in retrieved_products}
        rel_scores = rel_scorer.predict(query,retrieved_products)
        
        updated_gold_recs = tuple([{**gold_recs_dict[prod['product_id']], "relevance_score":prod['relevance_score']} for prod in rel_scores])
        ndcg_dataset.append({"query":query,"documents":input_products, "gold_product_recommendations":updated_gold_recs})
    return ndcg_dataset

In [19]:
ndcg_dataset = create_ndcg_dataset(dataset)
with open(json_files_path.joinpath("ndcg_dataset.json"),'w') as file:
    json.dump(ndcg_dataset,file)

🍩 https://wandb.ai/srinzz-d2r-ai-labs/groq-weave-experiment/r/call/0194735b-ea0f-74f1-8817-9947d7ee3786
🍩 https://wandb.ai/srinzz-d2r-ai-labs/groq-weave-experiment/r/call/0194735b-f3ff-7600-a312-adb7868deee0
🍩 https://wandb.ai/srinzz-d2r-ai-labs/groq-weave-experiment/r/call/0194735b-fcea-7452-9f62-91fa3d0d1725
🍩 https://wandb.ai/srinzz-d2r-ai-labs/groq-weave-experiment/r/call/0194735c-3dee-7471-acce-0154685f006f
🍩 https://wandb.ai/srinzz-d2r-ai-labs/groq-weave-experiment/r/call/0194735c-c014-7363-b062-ca589d25e893
🍩 https://wandb.ai/srinzz-d2r-ai-labs/groq-weave-experiment/r/call/0194735d-324f-7342-9d73-690cbccdad1a
🍩 https://wandb.ai/srinzz-d2r-ai-labs/groq-weave-experiment/r/call/0194735d-97b3-7501-b75e-4fa0ba673ace
🍩 https://wandb.ai/srinzz-d2r-ai-labs/groq-weave-experiment/r/call/0194735e-04ba-7f01-a27d-82224868dbfe
🍩 https://wandb.ai/srinzz-d2r-ai-labs/groq-weave-experiment/r/call/0194735e-87c5-77a1-81a2-05af378ac55f
🍩 https://wandb.ai/srinzz-d2r-ai-labs/groq-weave-experiment/r/ca