In [43]:
import numpy as np
def compute_metrics_manual(predicted_asins: dict[str, float], true_asin: str, k=10):
    '''
        Compute metrics manually for one user
        compute r@k, mrr@k, ndcg@k
    '''
    recall, mrr, dcg = 0, 0, 0

    # Sort predicted_asins by score (descending), predicted_asins is a dictionary
    predicted_asins = sorted(predicted_asins.items(), key=lambda x: x[1], reverse=True)
    for i, (asin, _) in enumerate(predicted_asins[:k]):
        if asin == true_asin:
            recall = 1
            rank = i + 1  # 1-based index
            mrr = 1 / rank
            dcg = 1 / np.log2(rank + 1)  # DCG formula
            break  # Stop searching after the first match

    idcg = 1 # Best possible DCG is 1/log_2(2) if found, else avoid zero-division see formula \sum_{rank i} rel_i/log_2(rank_i+1), rank starts from 1
    ndcg = dcg if recall else 0  # Normalize only if recall@k is non zero
    return recall, mrr, ndcg

In [44]:
# this is for testing manual with the package implementations later (ranx, ir_measures)
qrels_dict =  {'reviewer_1': {'asin_1': 1}, 'reviewer_2': {'asin_2': 1 }} # target item for reviewer_1 is asin_1, reviewer_2 is asin_2

run_dict = {'reviewer_1': {'asin_1': 5, 'asin_2': 10, 'asin_3': 40, 'asin_4': 3}, \
 'reviewer_2': {'asin_10': 5, 'asin_2': 11, 'asin_3':0.1, 'asin_4': 0.2}} 


print(compute_metrics_manual(run_dict['reviewer_1'], 'asin_1', k=2))
print(compute_metrics_manual(run_dict['reviewer_2'], 'asin_2', k=2))
# run_dict['reviewer_1']


(0, 0, 0)
(1, 1.0, np.float64(1.0))


In [None]:
import pandas as pd
from ranx import Qrels, Run, evaluate
qrels_dict =  {'reviewer_1': {'asin_1': 1}, 'reviewer_2': {'asin_2': 1 }}

run_list = [{'reviewer_id': 'reviewer_1', 'asin': 'asin_1', 'score': 5.1}, \
            {'reviewer_id': 'reviewer_1', 'asin': 'asin_1', 'score': 1.1}, \
            {'reviewer_id': 'reviewer_1', 'asin': 'asin_2', 'score': 10.1}, \
            {'reviewer_id': 'reviewer_1', 'asin': 'asin_2', 'score': 100.1}, \
            
            {'reviewer_id': 'reviewer_2', 'asin': 'asin_2', 'score': 1}, \
            {'reviewer_id': 'reviewer_2', 'asin': 'asin_1', 'score': 10},
            {'reviewer_id': 'reviewer_2', 'asin': 'asin_1', 'score': 100}
            ]
df_run = pd.DataFrame(run_list)
print(df_run)


qrels = Qrels(qrels_dict)
run = Run.from_df(df_run, q_id_col='reviewer_id', doc_id_col='asin', score_col='score') # score_col must be floats?
evaluate(qrels, run, ["recall@2","ndcg@2"])
run.scores

  reviewer_id    asin  score
0  reviewer_1  asin_1    5.1
1  reviewer_1  asin_1    1.1
2  reviewer_1  asin_2   10.1
3  reviewer_1  asin_2  100.1
4  reviewer_2  asin_2    1.0
5  reviewer_2  asin_1   10.0
6  reviewer_2  asin_1  100.0


defaultdict(dict,
            {'recall@2': {'reviewer_1': np.float64(1.0),
              'reviewer_2': np.float64(1.0)},
             'ndcg@2': {'reviewer_1': np.float64(0.6309297535714575),
              'reviewer_2': np.float64(0.6309297535714575)}})

In [17]:
qrels

DictType[unicode_type,DictType[[unichr x 6],int64]<iv=None>]<iv=None>({reviewer_1: {asin_1: 1}, reviewer_2: {asin_2: 1}})

In [None]:
from ranx import Qrels, Run, evaluate

# ranx example:
qrels_dict =  {'reviewer_1': {'asin_12': 1, 'asin_25': 1}, 'reviewer_2': {'asin_11': 1, 'asin_22': 1}}
run_dict = {'reviewer_1': {'asin_12': 0.9, 'asin_23': 0.8, 'asin_25': 0.7, 'asin_36': 0.6, 'asin_32': 0.5, 'asin_35': 0.4}, 
 'reviewer_2': {'asin_12': 0.9, 'asin_11': 0.8, 'asin_25': 0.7, 'asin_36': 0.6, 'asin_22': 0.5, 'asin_35': 0.4}}

## default Dict[str, Dict[str, int]] # from_dict(d) staticmethod
# Convert a Python dictionary in form of {q_id: {doc_id: score}} to ranx.Qrels.

## from_dict(d, name=None) staticmethod # Convert a Python dictionary in form of {q_id: {doc_id: score}} to ranx.Run.

qrels = Qrels(qrels_dict)
run = Run(run_dict)

evaluate(qrels, run, ["recall@2-l1"]) #can also use -l1 to ignore those docs in qrel with
#relevance < 1 https://github.com/AmenRa/ranx/issues/58#issuecomment-1798219508


np.float64(0.5)

In [7]:
qrels_dict

{'reviewer_1': {'asin_1': 10}, 'reviewer_2': {'asin_2': 10}}

In [None]:
qrels = Qrels(qrels_dict)
run = Run(run_dict)

evaluate(qrels, run, ["recall@3","mrr@3", "ndcg@3"]) 

run.scores

defaultdict(dict,
            {'recall@3': {'reviewer_1': np.float64(1.0),
              'reviewer_2': np.float64(1.0)},
             'mrr@3': {'reviewer_1': np.float64(0.3333333333333333),
              'reviewer_2': np.float64(1.0)},
             'ndcg@3': {'reviewer_1': np.float64(0.5),
              'reviewer_2': np.float64(1.0)}})

In [None]:
# run.mean_scores

{'recall@4': np.float64(0.75),
 'ndcg@4': np.float64(0.7569557807573265),
 'recall@3': np.float64(0.75),
 'ndcg@3': np.float64(0.7569557807573265)}

In [45]:
import pandas as pd

# Convert qrels_dict to a DataFrame with columns: query_id, doc_id, relevance
qrels_data = [
    {"query_id": query, "doc_id": doc, "relevance": relevance}
    for query, docs in qrels_dict.items()
    for doc, relevance in docs.items()
]
qrels_df = pd.DataFrame(qrels_data)

# Convert run_dict to a DataFrame with columns: query, doc, score
run_data = [
{"query_id": query, "doc_id": doc, "score": score}
    for query, docs in run_dict.items()
    for doc, score in docs.items()
]
run_df = pd.DataFrame(run_data)

# Display the DataFrames
print("Qrels DataFrame:")
print(qrels_df)
print("\nRun DataFrame:")
print(run_df)

Qrels DataFrame:
     query_id  doc_id  relevance
0  reviewer_1  asin_1          1
1  reviewer_2  asin_2          1

Run DataFrame:
     query_id   doc_id  score
0  reviewer_1   asin_1    5.0
1  reviewer_1   asin_2   10.0
2  reviewer_1   asin_3   40.0
3  reviewer_1   asin_4    3.0
4  reviewer_2  asin_10    5.0
5  reviewer_2   asin_2   11.0
6  reviewer_2   asin_3    0.1
7  reviewer_2   asin_4    0.2


In [28]:
# checking robustness to shuffling
qrels_df = qrels_df.sample(frac=1, random_state=42).reset_index(drop=True)
runs_df = run_df.sample(frac=1, random_state=42).reset_index(drop=True)

qrels = Qrels.from_df(qrels_df, q_id_col='query_id', doc_id_col='doc_id', score_col='relevance')
run = Run.from_df(run_df, q_id_col='query_id', doc_id_col='doc_id', score_col='score')


In [29]:
evaluate(qrels, run, ["recall@3","ndcg@3"])
run.mean_scores

{'recall@3': np.float64(0.75), 'ndcg@3': np.float64(0.7569557807573265)}

In [23]:
# https://ir-measur.es/en/latest/getting-started.html
import ir_measures
metrics = [ir_measures.nDCG@4, ir_measures.R@4, ir_measures.nDCG@2]

display(qrels_df.head()) # MUST have column names as query_id, doc_id, relevance
display(run_df.head()) # MUST have column names as query_id, doc_id, score

qrels_df.columns = ['query_id', 'doc_id', 'relevance']
run_df.columns = ['query_id', 'doc_id', 'score']

evaluator = ir_measures.evaluator(metrics, qrels_df)
result = evaluator.calc_aggregate(run_df)
print(result)

Unnamed: 0,query_id,doc_id,relevance
0,reviewer_1,asin_12,5
1,reviewer_1,asin_25,3
2,reviewer_2,asin_11,6
3,reviewer_2,asin_22,1


Unnamed: 0,query_id,doc_id,score
0,reviewer_1,asin_12,0.9
1,reviewer_1,asin_23,0.8
2,reviewer_1,asin_25,0.7
3,reviewer_1,asin_36,0.6
4,reviewer_1,asin_32,0.5


{R@4: 0.75, nDCG@4: 0.7569557807573265, nDCG@2: 0.648146419026997}


In [42]:
# checking this case where runlist has duplciate asins with different scores
import pandas as pd
import ir_measures
metrics = [ir_measures.R@2]

qrels_list = [{'reviewer_id':'reviewer_1', 'asin':'asin_1', 'relevance': 1} , \
             {'reviewer_id':'reviewer_2', 'asin':'asin_2', 'relevance': 1} 
            ]

run_list = [{'reviewer_id': 'reviewer_1', 'asin': 'asin_1', 'score': 5.1}, \
            {'reviewer_id': 'reviewer_1', 'asin': 'asin_1', 'score': 1.1}, \
            {'reviewer_id': 'reviewer_1', 'asin': 'asin_2', 'score': 10.1}, \
            {'reviewer_id': 'reviewer_1', 'asin': 'asin_2', 'score': 100.1}, \
            
            {'reviewer_id': 'reviewer_2', 'asin': 'asin_2', 'score': 1}, \
            {'reviewer_id': 'reviewer_2', 'asin': 'asin_1', 'score': 10},
            {'reviewer_id': 'reviewer_2', 'asin': 'asin_1', 'score': 100}
            ]
qrels_df = pd.DataFrame(qrels_list)
df_run = pd.DataFrame(run_list)

df_run_sorted = df_run.sort_values(by=['reviewer_id', 'score'], ascending=[True, False])
df_run_sorted = df_run_sorted.drop_duplicates(subset=['reviewer_id', 'asin'], keep='first')

qrels_df.columns = ['query_id', 'doc_id', 'relevance']
df_run.columns = ['query_id', 'doc_id', 'score']
df_run_sorted.columns = ['query_id', 'doc_id', 'score']

evaluator = ir_measures.evaluator(metrics, qrels_df)
result = list(evaluator.iter_calc(df_run))
print(result)

result = list(evaluator.calc(df_run))
print(result)



[Metric(query_id='reviewer_1', measure=R@2, value=1.0), Metric(query_id='reviewer_2', measure=R@2, value=1.0)]
[{R@2: 1.0}, [Metric(query_id='reviewer_1', measure=R@2, value=1.0), Metric(query_id='reviewer_2', measure=R@2, value=1.0)]]


In [39]:
df_run_sorted

Unnamed: 0,reviewer_id,asin,score
3,reviewer_1,asin_2,100.1
0,reviewer_1,asin_1,5.1
6,reviewer_2,asin_1,100.0
4,reviewer_2,asin_2,1.0


# first we will evaluate the recall@1 and recall@5 
# for randomly sampling from within seen asins without replacement from the seen sequence

In [5]:
import pandas as pd
scimeta = pd.read_json('scimeta_corpus.json', orient='records', lines=True)
seqdata = pd.read_json('sci_seq_3rev.json', orient='records', lines=True)

In [6]:
scimeta.columns = ['asin', 'Title', 'Brand', 'Category', 'Price'] # only the corpus asins and their metadata
asins_compact = scimeta[['asin']].copy()
asins_compact['nlang'] = (
	"Title: " + scimeta['Title'] + ". " +
	"Brand: " + scimeta['Brand'] + ". " +
	"Category: " + scimeta['Category'] + ". " +
	"Price: " + scimeta['Price']
)
asin_dict = asins_compact.set_index('asin')['nlang'].to_dict()  # ASIN -> formatted text of the item

In [7]:
from datasets import load_from_disk
from src.utils.project_dirs import get_hfdata_dir

In [4]:
from ranx import Qrels, Run, evaluate
import numpy as np
import json
import pandas as pd
import bm25s
import Stemmer
from typing import List, Dict
from ranx import Qrels, Run, evaluate
import numpy as np
from datasets import Dataset
from datasets import load_from_disk
from src.utils.project_dirs import get_hfdata_dir, get_reviews_raw2018_dir
# from src.utils.generate_utils import generate_sequences

def get_qrels(dataset):
    '''
        dataset: typically validation or test
    '''
    qrels_dict = {}
    for row in dataset:
        qrels_dict[row['reviewer_id']] = {row['asin']: 1}
    return qrels_dict

def baseline_randrecs(dataset, k = 5):
    '''
        Randomly pick k items without replacement from seen_asins for each reviewer, if k < len(seen_asins), take all seen_asins
        dataset: typically validation or test
    '''
    run_dict = {}
    for row in dataset:
        reviewer_id = row['reviewer_id']
        seen_asins = row['seen_asins']
        rand_asins = np.random.choice(seen_asins, min(k, len(seen_asins)), replace=False)
        rand_scores = np.random.rand(len(rand_asins)) # random scores, matters only for nDCG
        run_dict[reviewer_id] = {asin: score for asin, score in zip(rand_asins, rand_scores)}
    return run_dict

def oracle_rec(dataset, k = 5):
    '''
        This is the upper limit, but useful for sanity check
        For each reviewer, pick the oracle item i.e., the target asin itself (obviosly this is unknown)
        dataset: typically validation or test
    '''
    run_dict = {}
    for row in dataset:
        reviewer_id = row['reviewer_id']
        seen_asins = row['seen_asins']
        rand_asins = np.random.choice(seen_asins, min(k-1, len(seen_asins)-1), replace=False)
        rand_scores = np.random.rand(len(rand_asins))
        run_dict[reviewer_id] = {asin: score for asin, score in zip(rand_asins, rand_scores)}
        run_dict[reviewer_id][row['asin']] = -1000 # target item, highest score
    return run_dict

In [7]:
dataset = load_from_disk(get_hfdata_dir() / "Amzn_scientific_2018")
dataset

DatasetDict({
    train: Dataset({
        features: ['reviewer_id', 'text'],
        num_rows: 10970
    })
    validation: Dataset({
        features: ['reviewer_id', 'ptext', 'text', 'seen_asins', 'asin', 'asin_text'],
        num_rows: 512
    })
    test: Dataset({
        features: ['reviewer_id', 'ptext', 'text', 'seen_asins', 'asin', 'asin_text'],
        num_rows: 10970
    })
})

In [17]:
qrels_val = Qrels(get_qrels(dataset['validation']))
qrels_test = Qrels(get_qrels(dataset['test']))

run_val = Run(baseline_randrecs(dataset['validation'], k=5))
run_test = Run(baseline_randrecs(dataset['test'], k=5))

run_val_oracle = Run(oracle_rec(dataset['validation'], k=5))
run_test_oracle = Run(oracle_rec(dataset['test'], k=5))

In [18]:
evaluate(qrels_val, run_val, ["recall@5", "ndcg@5", "mrr@5"])

{'recall@5': np.float64(0.009765625),
 'ndcg@5': np.float64(0.006956262327431349),
 'mrr@5': np.float64(0.006022135416666667)}

In [11]:
evaluate(qrels_val, run_val_oracle, ["recall@5", "ndcg@5", "mrr@5"])

{'recall@5': np.float64(1.0),
 'ndcg@5': np.float64(0.4821339169952692),
 'mrr@5': np.float64(0.3158528645833334)}

In [18]:
run_val

DictType[unicode_type,DictType[[unichr x 10],float64]<iv=None>]<iv=None>({A10042M3XO3NET: {B0002YWNLS: 0.9167622891298758, B000BQO15I: 0.7916263202551116, B005K2TXMO: 0.6140824107423796, B001HRMJ54: 0.2485111779870406, B01ENFOHN8: 0.1799188727910156}, A100WO06OQR8BQ: {B000U08ZN4: 0.9308728417770091, B00IGHFOUA: 0.887077014132285, B0001GAYRC: 0.7378372366871616, B0013B1XLA: 0.3591981457531572, B004SS8AMU: 0.27105884566154226}, A109L3WXD1SJFU: {B000WJ4XXY: 0.9745498367638358, B00020BQGK: 0.21100837284268426, B00R18XU7E: 0.15908861131997254, B00N1XCA8S: 0.09766561993306944}, A10CP7XAHJTEB5: {B00004Z4DS: 0.6702448966681489, B0000DH8I8: 0.36305549289819206, B00JZ18KNY: 0.11511498329670689}, A10KEVPK6PBH5M: {B00NB3U2BU: 0.609469356781905, B00DMI632G: 0.5570693433435427, B009IS86ZG: 0.513716508558644, B00NB3SQJU: 0.3890035648303103}, A10LZRSZJZ0LG2: {B001DDWQ4Q: 0.8139703634288779, B008BK74EG: 0.8111880536438564, B00DMI62HM: 0.7043335242066155, B006EGAIJ2: 0.5199265428314203, B005SJ51W0: 0.14

In [15]:
len(qrels_val), len(run_val)

(512, 512)

In [12]:
len(run_val.scores["recall@5"])

512

In [106]:
evaluate(qrels_test, run_test, ["recall@5", "ndcg@5", "mrr@5"])

{'recall@5': np.float64(0.007292616226071103),
 'ndcg@5': np.float64(0.004916255536019907),
 'mrr@5': np.float64(0.004115770282588878)}

In [103]:
qrels_test['A0096681Y127OL1H8W3U'], run_test_oracle['A0096681Y127OL1H8W3U']

({'B0098MLBAO': 1}, {'B00E8JOCOE': 0.12017354322291207, 'B0098MLBAO': -1000.0})

In [100]:
run_test_oracle.scores['mrr@5']['A0096681Y127OL1H8W3U']

np.float64(0.5)

In [60]:
dataset['validation'][0]

{'reviewer_id': 'A10042M3XO3NET',
 'ptext': "Below is a customer's purchase history on Amazon, listed in chronological order (earliest to latest). \nEach item is represented by the following format: Title: <item title> Category: <item category> Brand: <item brand> Price: <item price>.\nBased on this history, predict **only one** item the customer is most likely to purchase next in the same format.\n\n### Purchase history:\nTitle: FiiO D3 (D03K) Digital to Analog Audio Converter With Micca 6ft Optical Toslink Cable - 192kHz/24bit Optical and Coaxial DAC. Brand: FiiO. Category: All Electronics. Price: $22.95\nTitle: Eagle U2-51-S Red Galvanized Steel Type II Gas Safety Can with 7/8&quot; Flex Spout, 5 gallon Capacity, 13.5&quot; Height, 12.5&quot; Diameter. Brand: Eagle. Category: Amazon Home. Price: $81.49\nTitle: Eagle F-15 HDPE 10&quot; Poly Funnel For Metal Type I Safety Cans, 4&quot; Height, 8&quot; Width, 9&quot; Length. Brand: Eagle. Category: Industrial & Scientific. Price: $4.59

In [66]:
# Filter the dataset to find the reviewer with the specified ID
reviewer_data = [row for row in dataset['validation'] if row['reviewer_id'] == 'A10042M3XO3NET']
print(reviewer_data[0]['seen_asins'], reviewer_data[0]['asin']) 

reviewer_data = [row for row in dataset['test'] if row['reviewer_id'] == 'A10042M3XO3NET']
print(reviewer_data[0]['seen_asins'], reviewer_data[0]['asin']) 



['B005K2TXMO', 'B000BQO15I', 'B0002YWNLS', 'B001HRMJ54', 'B01ENFOHN8'] B00VSHBSVE
['B005K2TXMO', 'B000BQO15I', 'B0002YWNLS', 'B001HRMJ54', 'B01ENFOHN8', 'B00VSHBSVE'] B019CY4FB4


In [58]:
qrels_val['A10042M3XO3NET'], qrels_test['A10042M3XO3NET']

({'B00VSHBSVE': 1}, {'B019CY4FB4': 1})

In [50]:
for asin in dataset['test']['seen_asins'][0]:
    print(asin_dict[asin])

Title: Herbal Choice Mari Natural Toothgel, Cinnamon &amp; Baking Soda; 3.4floz Glass. Brand: Nature's Brands. Category: Health & Personal Care. Price: $14.47
Title: Pac-Kit by First Aid Only 25-450 Cotton Tipped Applicator with 6&quot; Wooden Shaft (Bag of 100). Brand: First Aid Only. Category: Industrial & Scientific. Price: Unknown
Title: Litmus pH Test Strips, Universal Application (pH 1-14), 2 Packs of 100 Strips. Brand: LabRat Supplies. Category: Industrial & Scientific. Price: Unknown


In [51]:
print(dataset['test']['text'][0])

Below is a customer's purchase history on Amazon, listed in chronological order (earliest to latest). 
Each item is represented by the following format: Title: <item title> Category: <item category> Brand: <item brand> Price: <item price>.
Based on this history, predict **only one** item the customer is most likely to purchase next in the same format.

### Purchase history:
Title: Herbal Choice Mari Natural Toothgel, Cinnamon &amp; Baking Soda; 3.4floz Glass. Brand: Nature's Brands. Category: Health & Personal Care. Price: $14.47
Title: Pac-Kit by First Aid Only 25-450 Cotton Tipped Applicator with 6&quot; Wooden Shaft (Bag of 100). Brand: First Aid Only. Category: Industrial & Scientific. Price: Unknown
Title: Litmus pH Test Strips, Universal Application (pH 1-14), 2 Packs of 100 Strips. Brand: LabRat Supplies. Category: Industrial & Scientific. Price: Unknown

### Next item:
Title: Herbal Choice Mari Natural Toothgel, Cinnamon &amp; Baking Soda; 3.4floz Glass. Brand: Nature's Brands.

In [52]:
print(dataset['test']['ptext'][0])

Below is a customer's purchase history on Amazon, listed in chronological order (earliest to latest). 
Each item is represented by the following format: Title: <item title> Category: <item category> Brand: <item brand> Price: <item price>.
Based on this history, predict **only one** item the customer is most likely to purchase next in the same format.

### Purchase history:
Title: Herbal Choice Mari Natural Toothgel, Cinnamon &amp; Baking Soda; 3.4floz Glass. Brand: Nature's Brands. Category: Health & Personal Care. Price: $14.47
Title: Pac-Kit by First Aid Only 25-450 Cotton Tipped Applicator with 6&quot; Wooden Shaft (Bag of 100). Brand: First Aid Only. Category: Industrial & Scientific. Price: Unknown
Title: Litmus pH Test Strips, Universal Application (pH 1-14), 2 Packs of 100 Strips. Brand: LabRat Supplies. Category: Industrial & Scientific. Price: Unknown

### Next item:



### Lookup based on text, llama-3.2-1B fed prompt texts

In [20]:
import os
import gc
import torch
from src.utils.project_dirs import get_hfdata_dir
from datasets import load_from_disk
from src.utils.generate_utils import generate_sequences
from unsloth import FastLanguageModel
from datasets import Dataset

In [21]:
# model_name = "unsloth/Llama-3.2-1B-bnb-4bit"
model_name = "unsloth/Llama-3.2-3B-bnb-4bit"
dataset_name = "Amzn_scientific_2018"
dataset_split = "validation"
data_field = 'ptext'
max_seq_length = 1024
dtype = None
load_in_4bit = True
batch_size = 4
num_return_sequences = 5
max_new_tokens = 50
temperature = 0.5

dataset_path = os.path.join(get_hfdata_dir(), dataset_name)
full_dataset = load_from_disk(dataset_path)
full_split = full_dataset[dataset_split]
print(f"Loaded dataset '{dataset_name}', split '{dataset_split}' with {len(full_split)} examples. Using field '{data_field}'.")

Loaded dataset 'Amzn_scientific_2018', split 'validation' with 512 examples. Using field 'ptext'.


In [22]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name, # Use config value
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    )
FastLanguageModel.for_inference(model)

results = generate_sequences(
        model,
        tokenizer,
        dataset=full_split,
        batch_size = batch_size,
        num_return_sequences = num_return_sequences,
        max_new_tokens = max_new_tokens,
        temperature = temperature,
        field=data_field
    )
del model
del tokenizer
torch.cuda.empty_cache()
gc.collect()

==((====))==  Unsloth 2025.2.15: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    GPU: NVIDIA RTX A5000. Max memory: 23.679 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Processing 512 items in 128 batches.


Generating sequences: 100%|██████████| 128/128 [07:24<00:00,  3.47s/batch]


8223

In [26]:
torch.cuda.empty_cache()
gc.collect()

33

In [24]:
type(results)

list

In [27]:
import json
# with open("llama1B_val_no-ft.json", "w") as f:
#     json.dump(results, f, indent=2)

with open("llama3B_val_no-ft.json", "w") as f:
    json.dump(results, f, indent=2)

In [1]:
import json
with open("llama1B_val_no-ft.json", "r") as f:
    loaded_valgen = json.load(f)

In [5]:
type(loaded_valgen), loaded_valgen[0]

(list,
 {'reviewer_id': 'A10042M3XO3NET',
  'asin': 'B00VSHBSVE',
  'seen_asins': ['B005K2TXMO',
   'B000BQO15I',
   'B0002YWNLS',
   'B001HRMJ54',
   'B01ENFOHN8'],
  'generated_sequences': ['Title: 3M Littmann Classic III Monitoring Stethoscope, Pearl Pink Tube, 27 inch, 5633. Brand: 3M Littmann. Category: Industrial & Scientific. Price: $88.89\n\n###',
   'Title: Apera Instruments AI209 PH20 Value Waterproof pH Pocket Tester, &plusmn;0.1 pH Accuracy, 0-14.0 pH Range, Complete Kit. Brand: Apera Instruments, LLC. Category: Industrial',
   'Title: Apera Instruments AI209 PH20 Value Waterproof pH Pocket Tester, &plusmn;0.1 pH Accuracy, 0-14.0 pH Range, Complete Kit. Brand: Apera Instruments, LLC. Category: Industrial',
   'Title: 3M Littmann Classic III Monitoring Stethoscope, Pearl Pink Tube, 27 inch, 5633. Brand: 3M Littmann. Category: Industrial & Scientific. Price: $88.89\n',
   'Title: 3M Littmann Classic III Monitoring Stethoscope, Pearl Pink Tube, 27 inch, 5633. Brand: 3M Littman

In [4]:
import pandas as pd
scimeta = pd.read_json('scimeta_corpus.json', orient='records', lines=True)
scimeta.columns = ['asin', 'Title', 'Brand', 'Category', 'Price'] # only the corpus asins and their metadata
asins_compact = scimeta[['asin']].copy()
asins_compact['nlang'] = (
	"Title: " + scimeta['Title'] + ". " +
	"Brand: " + scimeta['Brand'] + ". " +
	"Category: " + scimeta['Category'] + ". " +
	"Price: " + scimeta['Price']
)
asin_dict = asins_compact.set_index('asin')['nlang'].to_dict()  # ASIN -> formatted text of the item


In [87]:
import pandas as pd
import bm25s
import Stemmer
from ranx import Qrels, Run, evaluate
import numpy as np

def get_qrels(dataset):
    '''
        dataset: typically validation or test
    '''
    qrels_dict = {}
    for row in dataset:
        qrels_dict[row['reviewer_id']] = {row['asin']: 1}
    return qrels_dict

# def get_rundict_llama(dataset, retriever, asins_compact:pd.DataFrame):
#     '''
#         dataset: typically validation or test
#         asins_compact: has the asin and nlang column for the asin
#         retriever: the BM25 retriever built using the nlang column, note iloc is used to get the asin
#     '''
#     run_dict = {}
#     stemmer = Stemmer.Stemmer("english") # TODO make configurable
#     for row in dataset:
#         reviewer_id = row['reviewer_id']
#         query_tokens = bm25s.tokenize(row['generated_sequences'], stemmer=stemmer)
#         res, scores = retriever.retrieve(query_tokens, k=1)
#         asins = asins_compact.iloc[res.flatten()]['asin'].tolist()
#         run_dict[reviewer_id] = {asin: score for asin, score in zip(asins, scores)}
#     return run_dict

# def get_rundict_v2_llama(loaded_valgen, retriever, num_return_sequences:int,
#                          asins_compact:pd.DataFrame): # sped up?
#     run_dict = {}
#     l = len(loaded_valgen)
#     queries_flat = [seq for row in loaded_valgen for seq in row['generated_sequences']]
#     stemmer = Stemmer.Stemmer("english")
#     query_tokens = bm25s.tokenize(queries_flat, stemmer=stemmer)
#     res, scores = retriever.retrieve(query_tokens, k=1)
#     res = res.reshape((l, num_return_sequences))
#     scores = scores.reshape((l, num_return_sequences))
    
#     for i in range(l):
#         reviewer_id = loaded_valgen[i]['reviewer_id']
#         print(res[i], type(res[i]), res[i].shape)
#         asins = asins_compact.iloc[res[i]]['asin'].tolist()
#         # now sort by descending order of scores and keep unique unique asins, possibly write a different function to do this and call it
#         run_dict[reviewer_id] = {asin: score for asin, score in zip(asins, scores[i])}  # this is wrong, we may have duplicate asins and different score
#     return run_dict

def get_rundict_v2_llama(loaded_valgen, retriever, num_return_sequences: int,
                         asins_compact: pd.DataFrame):
    run_dict = {}
    l = len(loaded_valgen)

    # Flatten all generated queries
    queries_flat = []
    for row in loaded_valgen:
        for seq in row['generated_sequences']:
            queries_flat.append(seq)
    
    # Tokenize and retrieve
    stemmer = Stemmer.Stemmer("english")
    query_tokens = bm25s.tokenize(queries_flat, stemmer=stemmer)
    res, scores = retriever.retrieve(query_tokens, k=1)
    print(res.shape, scores.shape)
    # print(res.shape/num_return_sequences, l)
    
    # Reshape to group by reviewer
    res = res.flatten().reshape((l, num_return_sequences))
    scores =  scores.flatten().reshape((l, num_return_sequences))

    # Helper: return unique asins sorted by score
    def get_unique_sorted_asins(asins, scores):
        seen = set()
        asin_score_pairs = []
        for asin, score in sorted(zip(asins, scores), key=lambda x: -x[1]):
            if asin not in seen:
                asin_score_pairs.append((asin, score))
                seen.add(asin)
        return dict(asin_score_pairs)

    for i in range(l):
        reviewer_id = loaded_valgen[i]['reviewer_id']
        asin_indices = res[i]
        asin_scores = scores[i]
        asins = asins_compact.iloc[asin_indices]['asin'].tolist()
        run_dict[reviewer_id] = get_unique_sorted_asins(asins, asin_scores)

    return run_dict

In [96]:
temp = get_rundict_v2_llama(loaded_valgen, retriever, num_return_sequences=5, asins_compact=asins_compact)

Split strings:   0%|          | 0/2560 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/2560 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/2560 [00:00<?, ?it/s]

(2560, 1) (2560, 1)


In [97]:
temp['A10042M3XO3NET']

{'B00004YMDG': np.float32(32.771355), 'B00004YMDF': np.float32(25.011835)}

In [95]:
temp

{'A10042M3XO3NET': {'B01ENFOHN8': np.float32(44.989384),
  'B001HRMJ54': np.float32(36.296738)},
 'A100WO06OQR8BQ': {'B0012BA1M8': np.float32(43.704254),
  'B0076AY6J8': np.float32(24.843426)}}

In [93]:
asin_dict['B01ENFOHN8'], asin_dict['B001HRMJ54']

('Title: Apera Instruments AI209 PH20 Value Waterproof pH Pocket Tester, &plusmn;0.1 pH Accuracy, 0-14.0 pH Range, Complete Kit. Brand: Apera Instruments, LLC. Category: Industrial & Scientific. Price: $49.75',
 'Title: 3M Littmann Classic III Monitoring Stethoscope, Pearl Pink Tube, 27 inch, 5633. Brand: 3M Littmann. Category: Industrial & Scientific. Price: $88.89')

In [72]:
loaded_valgen[0], loaded_valgen[1]

({'reviewer_id': 'A10042M3XO3NET',
  'asin': 'B00VSHBSVE',
  'seen_asins': ['B005K2TXMO',
   'B000BQO15I',
   'B0002YWNLS',
   'B001HRMJ54',
   'B01ENFOHN8'],
  'generated_sequences': ['Title: 3M Littmann Classic III Monitoring Stethoscope, Pearl Pink Tube, 27 inch, 5633. Brand: 3M Littmann. Category: Industrial & Scientific. Price: $88.89\n\n###',
   'Title: Apera Instruments AI209 PH20 Value Waterproof pH Pocket Tester, &plusmn;0.1 pH Accuracy, 0-14.0 pH Range, Complete Kit. Brand: Apera Instruments, LLC. Category: Industrial',
   'Title: Apera Instruments AI209 PH20 Value Waterproof pH Pocket Tester, &plusmn;0.1 pH Accuracy, 0-14.0 pH Range, Complete Kit. Brand: Apera Instruments, LLC. Category: Industrial',
   'Title: 3M Littmann Classic III Monitoring Stethoscope, Pearl Pink Tube, 27 inch, 5633. Brand: 3M Littmann. Category: Industrial & Scientific. Price: $88.89\n',
   'Title: 3M Littmann Classic III Monitoring Stethoscope, Pearl Pink Tube, 27 inch, 5633. Brand: 3M Littmann. Cate

In [None]:
gens = []
for row in loaded_valgen:
    for gen in row['generated_sequences']:
        gens.append(gen)

Title: 3M Littmann Classic III Monitoring Stethoscope, Pearl Pink Tube, 27 inch, 5633. Brand: 3M Littmann. Category: Industrial & Scientific. Price: $88.89

###


In [77]:
queries_flat = [seq for row in loaded_valgen for seq in row['generated_sequences']]

print(queries_flat[10], queries_flat[101])

Title: Hoover PowerPak 1.2HP Portable Electric Cordless Vacuum Cleaner. Brand: Hoover. Category: Amazon Home. Price: $1.53
 Title: 1000pcs Male &amp; Female Insulated Wire Bullet Crimp Connector Terminal. Brand: Souked. Category: Tools & Home Improvement. Price: Unknown



In [42]:
results_val = Dataset.from_list(loaded_valgen)
qrels_val = Qrels(get_qrels(results_val))

In [15]:
retriever = bm25s.BM25.load("amznsci_2018_index", load_corpus=False)

In [40]:
asins_compact.iloc[np.array([1378,5195,5195,1378,1378])]['asin'].tolist()

['B001HRMJ54', 'B01ENFOHN8', 'B01ENFOHN8', 'B001HRMJ54', 'B001HRMJ54']

In [79]:
num_return_sequences

5

In [83]:
run_dict_llama = get_rundict_v2_llama(loaded_valgen, retriever, num_return_sequences, asins_compact)

Split strings:   0%|          | 0/2560 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/2560 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/2560 [00:00<?, ?it/s]

(2560, 1) (2560, 1)


In [84]:
run_dict_llama['A10042M3XO3NET']

{'B00004YMDG': np.float32(32.771355), 'B00004YMDF': np.float32(25.011835)}

In [64]:
loaded_valgen[0]

{'reviewer_id': 'A10042M3XO3NET',
 'asin': 'B00VSHBSVE',
 'seen_asins': ['B005K2TXMO',
  'B000BQO15I',
  'B0002YWNLS',
  'B001HRMJ54',
  'B01ENFOHN8'],
 'generated_sequences': ['Title: 3M Littmann Classic III Monitoring Stethoscope, Pearl Pink Tube, 27 inch, 5633. Brand: 3M Littmann. Category: Industrial & Scientific. Price: $88.89\n\n###',
  'Title: Apera Instruments AI209 PH20 Value Waterproof pH Pocket Tester, &plusmn;0.1 pH Accuracy, 0-14.0 pH Range, Complete Kit. Brand: Apera Instruments, LLC. Category: Industrial',
  'Title: Apera Instruments AI209 PH20 Value Waterproof pH Pocket Tester, &plusmn;0.1 pH Accuracy, 0-14.0 pH Range, Complete Kit. Brand: Apera Instruments, LLC. Category: Industrial',
  'Title: 3M Littmann Classic III Monitoring Stethoscope, Pearl Pink Tube, 27 inch, 5633. Brand: 3M Littmann. Category: Industrial & Scientific. Price: $88.89\n',
  'Title: 3M Littmann Classic III Monitoring Stethoscope, Pearl Pink Tube, 27 inch, 5633. Brand: 3M Littmann. Category: Indust

In [None]:
for asins in run_dict_llama['A10042M3XO3NET']: # some bug??
    print(asins, asin_dict[asins])

B00004YMDG Title: DEWALT DW4902 1-Inch by 1/4-Inch High Performance Carbon Knot Wire End Brush, 0.020-Inch Wire. Brand: DEWALT. Category: Tools & Home Improvement. Price: $5.00
B00004YMDF Title: DEWALT DW4901 1-Inch Crimped End Wire Brush. Brand: DEWALT. Category: Tools & Home Improvement. Price: $5.95


In [85]:
queries = loaded_valgen[0]['generated_sequences']
query_tokens = bm25s.tokenize(queries, stemmer=stemmer)
docs, scores = retriever.retrieve(query_tokens, k=1)
print(docs, scores)

Split strings:   0%|          | 0/5 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/5 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/5 [00:00<?, ?it/s]

[[1378]
 [5195]
 [5195]
 [1378]
 [1378]] [[36.296738]
 [44.989384]
 [44.989384]
 [36.296738]
 [36.296738]]


In [70]:
asins_compact[asins_compact['asin'] == 'B00004YMDG']

Unnamed: 0,asin,nlang
50,B00004YMDG,Title: DEWALT DW4902 1-Inch by 1/4-Inch High P...


In [69]:
asins_compact.iloc[1378]

asin                                            B001HRMJ54
nlang    Title: 3M Littmann Classic III Monitoring Stet...
Name: 1378, dtype: object

In [None]:
query =  'Title: 3M Littmann Classic III Monitoring Stethoscope, Pearl Pink Tube, 27 inch, 5633. Brand: 3M Littmann. Category: Industrial & Scientific. Price: $88.89\n',


Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/1 [00:00<?, ?it/s]

In [56]:
asins_compact.iloc[1378]['nlang']

'Title: 3M Littmann Classic III Monitoring Stethoscope, Pearl Pink Tube, 27 inch, 5633. Brand: 3M Littmann. Category: Industrial & Scientific. Price: $88.89'

In [None]:
type(run_dict_llama), run_dict_llama['A10042M3XO3NET']

(dict,
 {'B00004YMDG': np.float32(32.771355), 'B00004YMDF': np.float32(25.011835)})

In [49]:
asin_dict['B00004YMDG']

'Title: DEWALT DW4902 1-Inch by 1/4-Inch High Performance Carbon Knot Wire End Brush, 0.020-Inch Wire. Brand: DEWALT. Category: Tools & Home Improvement. Price: $5.00'

In [50]:
asin_dict['B00004YMDF']

'Title: DEWALT DW4901 1-Inch Crimped End Wire Brush. Brand: DEWALT. Category: Tools & Home Improvement. Price: $5.95'

In [47]:
evaluate(qrels_val, Run(run_dict_llama), ["recall@5", "ndcg@5", "mrr@5"])

{'recall@5': np.float64(0.0),
 'ndcg@5': np.float64(0.0),
 'mrr@5': np.float64(0.0)}

In [46]:
qrels_val['A10042M3XO3NET']

{'B00VSHBSVE': 1}

In [1]:
# run_dict_llama = get_rundict_llama(results_val, retriever, asins_compact)

In [121]:
dataset['test'][:4]['ptext'], dataset['test'][:4]['reviewer_id'], dataset['test'][:4]['asin']

(["Below is a customer's purchase history on Amazon, listed in chronological order (earliest to latest). \nEach item is represented by the following format: Title: <item title> Category: <item category> Brand: <item brand> Price: <item price>.\nBased on this history, predict **only one** item the customer is most likely to purchase next in the same format.\n\n### Purchase history:\nTitle: Herbal Choice Mari Natural Toothgel, Cinnamon &amp; Baking Soda; 3.4floz Glass. Brand: Nature's Brands. Category: Health & Personal Care. Price: $14.47\nTitle: Pac-Kit by First Aid Only 25-450 Cotton Tipped Applicator with 6&quot; Wooden Shaft (Bag of 100). Brand: First Aid Only. Category: Industrial & Scientific. Price: Unknown\nTitle: Litmus pH Test Strips, Universal Application (pH 1-14), 2 Packs of 100 Strips. Brand: LabRat Supplies. Category: Industrial & Scientific. Price: Unknown\n\n### Next item:\n",
  "Below is a customer's purchase history on Amazon, listed in chronological order (earliest t

In [4]:
import bm25s
import Stemmer
# Create your corpus here
corpus = asins_compact['nlang'].tolist()
# optional: create a stemmer
stemmer = Stemmer.Stemmer("english")

# Tokenize the corpus and only keep the ids (faster and saves memory)
# corpus_tokens = bm25s.tokenize(corpus, stopwords="en", stemmer=stemmer)
corpus_tokens = bm25s.tokenize(corpus, stopwords="en")

# Create the BM25 model and index the corpus
retriever = bm25s.BM25()
retriever.index(corpus_tokens)

# You can save the corpus along with the model
retriever.save("amznsci_2018_index2", corpus=corpus)

# ...and load them when you need them
import bm25s
retriever = bm25s.BM25.load("amznsci_2018_index", load_corpus=False)
# set load_corpus=False if you don't need the corpus


Split strings:   0%|          | 0/5326 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/5326 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/5326 [00:00<?, ?it/s]

Finding newlines for mmindex:   0%|          | 0.00/965k [00:00<?, ?B/s]

In [16]:
import bm25s
retriever = bm25s.BM25.load("amznsci_2018_index", load_corpus=False)



In [17]:
retriever

AttributeError: 'BM25' object has no attribute 'stemmer'

In [24]:
query = [[asins_compact['nlang'][10], asins_compact['nlang'][5001][:100]], 
         [asins_compact['nlang'][5], asins_compact['nlang'][101]]]

# Flatten the nested list into a single list of strings
flattened_query = [item for sublist in query for item in sublist]

# Tokenize the flattened query
query_tokens = bm25s.tokenize(flattened_query, stemmer=stemmer)

# Get top-k results as a tuple of (doc ids, scores). Both are arrays of shape (n_queries, k).
# To return docs instead of IDs, set the `corpus=corpus` parameter.
results, scores = retriever.retrieve(query_tokens, k=1)

print(results, type(results))
print(scores, type(scores))


Split strings:   0%|          | 0/4 [00:00<?, ?it/s]

Stem Tokens:   0%|          | 0/4 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/4 [00:00<?, ?it/s]

[[  10]
 [5001]
 [   5]
 [ 101]] <class 'numpy.ndarray'>
[[41.23096 ]
 [27.236872]
 [25.903563]
 [44.98705 ]] <class 'numpy.ndarray'>
[[  10 5001]
 [   5  101]]


In [33]:
asins_compact.iloc[results.flatten()]

Unnamed: 0,asin,nlang
10,B00002NC3K,Title: Rubbermaid Commercial Products FG263100...
5001,B01BHAA96G,Title: 45 count - SMALL diameter Disposable Sp...
5,B0000224MY,"Title: Qualcraft 2601 Wall Jack, Red. Brand: Q..."
101,B00008IHTL,"Title: Starrett 25-441J Dial Indicator, 0.375&..."


In [32]:
asins_compact.iloc[results.flatten()]['asin'].tolist()

['B00002NC3K', 'B01BHAA96G', 'B0000224MY', 'B00008IHTL']

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained(
    "unsloth/Llama-3.2-1B",
    device_map="auto",
    load_in_4bit=True,  # if you have bitsandbytes installed
)
tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B")

config.json:   0%|          | 0.00/935 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]