# Best Hybrid Search Configuration

This notebook runs different hybrid search configurations, calculates the metrics for each configuration and compares the results to the metrics calcuated after the baseline run from the previous notebook. 

We are using the same query set to have a fair comparison.

## Get queries

In [1]:
import pandas as pd
import numpy as np
import requests
import json
import mercury as mr
import itertools
from IPython.display import display, HTML, Image

app = mr.App(title="Let's Run a Hybrid Search", static_notebook=True)

In [2]:
# Set b_most_judged to the same value you set in notebook 3 - otherwise the query sets won't match and the results will be meaningless
b_most_judged = False

if b_most_judged:
    df_query_idx = pd.read_csv('most_judged.csv', usecols=[0, 1])
    # Transform the data so that it contains two columns: the index of the query as its identifier and the query
    df_query_idx = df_query_idx.reset_index()
    df_query_idx = df_query_idx.rename(columns={'index': 'idx'})
    df_query_idx = df_query_idx.drop(columns=['query_set_id'])
else:
    name = 'queries.txt'
    df_query_idx = pd.read_csv(name, sep="\t", names=['idx', 'query'])

In [3]:
df_query_idx

Unnamed: 0,idx,query
0,0,$30 roblox gift card not digital
1,1,(fiction without frontiers)
2,2,100
3,3,10x10x6 cake box without window
4,4,15 inch light weight laptop that has lots of m...
...,...,...
215,215,wooden stool
216,216,woodwick wax melt
217,217,world of warcraft anniversary collector's edition
218,218,wowled


In [4]:
# Import the ratings generated in the previous notebook
df_ratings = pd.read_csv('ratings.csv', sep="\t", names=['query', 'docid', 'rating', 'idx'])#, index=False)
df_ratings

Unnamed: 0,query,docid,rating,idx
0,$30 roblox gift card not digital,B07RX6FBFR,3,0
1,$30 roblox gift card not digital,B09194H44R,0,0
2,$30 roblox gift card not digital,B08R5N6W6B,2,0
3,$30 roblox gift card not digital,B07Y693ND1,0,0
4,$30 roblox gift card not digital,B07RZ75JW3,2,0
...,...,...,...,...
4060,yarn purple and pink,B00JX10Q2O,2,219
4061,yarn purple and pink,B00KY41UHO,3,219
4062,yarn purple and pink,B00QXJOUL2,3,219
4063,yarn purple and pink,B00UY14WCM,3,219


In [5]:
df_queries = df_ratings.groupby(by='query', as_index=False).agg({
    'rating': ['count']
})
df_query_idx = df_queries['query']

In [6]:
df_query_idx = pd.DataFrame(df_query_idx)

df_query_idx = df_query_idx.reset_index()

df_query_idx = df_query_idx.rename(columns={'index': 'idx'})
df_query_idx

Unnamed: 0,idx,query
0,0,$30 roblox gift card not digital
1,1,(fiction without frontiers)
2,2,100
3,3,10x10x6 cake box without window
4,4,15 inch light weight laptop that has lots of m...
...,...,...
215,215,wooden stool
216,216,woodwick wax melt
217,217,world of warcraft anniversary collector's edition
218,218,wowled


## Query OpenSearch with the Hybrid Search Configurations

In [7]:
keyword_weight = 0.3

In [8]:
neural_weight = round(1.0 - keyword_weight, 2)
print(f"Keyword Weight is {keyword_weight} and Neural Weight is {neural_weight}")

Keyword Weight is 0.3 and Neural Weight is 0.7


In [9]:
# Get model_id
# We are assuming that the installation has only one model. Change this if you have more models 
# and need to pick a specific one

url = "http://localhost:9200/_plugins/_ml/models/_search"

headers = {
    'Content-Type': 'application/json'
}

payload = {
  "query": {
    "match_all": {}
  },
  "size": 1
}

response = requests.request("POST", url, headers=headers, data=json.dumps(payload))

model_id = response.json()['hits']['hits'][0]['_source']['model_id']

In [10]:
normalization = 'arithmetic_mean'
combination = 'l2'
keyword = 0.3
vector = 0.7
pipeline = 'hybrid-search-pipeline'

url = "http://localhost:9200/_search/pipeline/" + pipeline

print(f"Setting default model id to: {model_id}")
payload = {
  "request_processors": [
    {
      "neural_query_enricher" : {
        "description": "Sets the default model ID at index and field levels",
        "default_model_id": model_id,
        "neural_field_default_id": {
           "title_embeddings": model_id
        }
      }
    }
  ],
  "phase_results_processors": [
    {
      "normalization-processor": {
        "normalization": {
          "technique": "min_max"
        },
        "combination": {
          "technique": "arithmetic_mean",
          "parameters": {
            "weights": [
              keyword_weight,
              neural_weight
            ]
          }
        }
      }
    }
  ]    
}


response = requests.request("PUT", url, headers=headers, data=json.dumps(payload))
mr.JSON(response.json(), level=4)

Setting default model id to: B0KeipIBhjG7Xxf7_o-P


In [11]:
df_query_idx

Unnamed: 0,idx,query
0,0,$30 roblox gift card not digital
1,1,(fiction without frontiers)
2,2,100
3,3,10x10x6 cake box without window
4,4,15 inch light weight laptop that has lots of m...
...,...,...
215,215,wooden stool
216,216,woodwick wax melt
217,217,world of warcraft anniversary collector's edition
218,218,wowled


## Create a DataFrame with all possible combinations of hybrid search configurations

In [12]:
# Define the possible values for each column
normalization_values = ['min_max', 'l2']
combination_values = ['arithmetic_mean', 'harmonic_mean', 'geometric_mean']
keyword_values = [round(i * 0.1, 1) for i in range(11)]

# Create all possible combinations of normalization, combination, and keyword
combinations = list(itertools.product(normalization_values, combination_values, keyword_values))

# Calculate the vector as 1.0 - keyword
data = [(norm, comb, kw, 1.0 - kw) for norm, comb, kw in combinations]

# Create DataFrame
df_hybrid_search_params = pd.DataFrame(data, columns=['normalization', 'combination', 'keyword', 'vector'])

# Create a column with a pipeline name made up of its components
df_hybrid_search_params['pipeline'] = df_hybrid_search_params.normalization.apply(str) + \
    df_hybrid_search_params.combination.apply(str) + df_hybrid_search_params.keyword.apply(str)

df_hybrid_search_params.head()

Unnamed: 0,normalization,combination,keyword,vector,pipeline
0,min_max,arithmetic_mean,0.0,1.0,min_maxarithmetic_mean0.0
1,min_max,arithmetic_mean,0.1,0.9,min_maxarithmetic_mean0.1
2,min_max,arithmetic_mean,0.2,0.8,min_maxarithmetic_mean0.2
3,min_max,arithmetic_mean,0.3,0.7,min_maxarithmetic_mean0.3
4,min_max,arithmetic_mean,0.4,0.6,min_maxarithmetic_mean0.4


In [13]:
def create_search_pipeline(df):
    for idx, row in df.iterrows():
        normalization = row['normalization']
        combination = row['combination']
        keyword = row['keyword']
        vector = round(row['vector'],1)
        pipeline = row['pipeline']

        payload = {
          "request_processors": [
            {
              "neural_query_enricher" : {
                "description": "Sets the default model ID at index and field levels",
                "default_model_id": model_id,
                "neural_field_default_id": {
                   "title_embeddings": model_id
                }
              }
            }
          ],
          "phase_results_processors": [
            {
              "normalization-processor": {
                "normalization": {
                  "technique": normalization
                },
                "combination": {
                  "technique": combination,
                  "parameters": {
                    "weights": [
                      keyword,
                      vector
                    ]
                  }
                }
              }
            }
          ]    
        }

        url = "http://localhost:9200/_search/pipeline/" + pipeline
        
        response = requests.request("PUT", url, headers=headers, data=json.dumps(payload))
        #mr.JSON(response.json(), level=1)
        #print(payload)

In [14]:
create_search_pipeline(df_hybrid_search_params)

In [15]:
df_relevance = pd.DataFrame()
for config in df_hybrid_search_params.itertuples():
#for config in df_hybrid_search_params.head(1).itertuples():
    pipeline_name = config[5]
    print(pipeline_name)

    # Set pipeline 
     
    url = "http://localhost:9200/ecommerce/_search?search_pipeline=" + pipeline_name
    
    headers = {
        'Content-Type': 'application/json'
    }
    # iterate over all query strings and send a hybrid search query to OpenSearch with the set pipeline
    for query in df_query_idx.itertuples():
    
        payload = {
          "_source": {
            "excludes": [
              "title_embedding"
            ]
          },
          "query": {
            "hybrid": {
              "queries": [
                {
                  "multi_match" : {
                      "type":       "best_fields",
                      "fields":     [
                        "product_id^100",
                        "product_bullet_point^3",
                        "product_color^2",
                        "product_brand^5",
                        "product_description",
                        "product_title^10"
                      ],
                      "operator":   "and",
                      "query":      query[2]
                    }
                },
                {
                  "neural": {
                    "title_embedding": {
                      "query_text": query[2],
                      "k": 50
                    }
                  }
                }
              ]
            }
          }
        }
    
        response = requests.request("POST", url, headers=headers, data=json.dumps(payload)).json()
        # store results per pipeline_id
        position = 0
        for hit in response['hits']['hits']:
            # create a new row for the DataFrame and append it
            row = { 'query_id' : str(query[1]), 'query_string': query[2], 'product_id' : hit["_id"], 'position' : str(position), 'relevance' : hit["_score"], 'run': pipeline_name }
    
            new_row_df = pd.DataFrame([row])
            df_relevance = pd.concat([df_relevance, new_row_df], ignore_index=True)
            #print("%(id)s %(title)s: %(name)s" % hit["_source"])
            position += 1
    
    # work with two for loops:
    # 1) one to iterate over the list of queries and have a query id instead of a query
    # 2) another one to iterate over the result sets to have the position of the result in the result set 
    
    # DataFrame with columns:
    # query_id: the id of the query as the trec_eval tool needs a numeric id rather than a query string as an identifier
    # product_id: the id of the product in the hit list
    # position: the position of the product in the result set
    # relevance: relevance as given by the search engine
    # run: the name of the query pipeline

min_maxarithmetic_mean0.0
min_maxarithmetic_mean0.1
min_maxarithmetic_mean0.2
min_maxarithmetic_mean0.3
min_maxarithmetic_mean0.4
min_maxarithmetic_mean0.5
min_maxarithmetic_mean0.6
min_maxarithmetic_mean0.7
min_maxarithmetic_mean0.8
min_maxarithmetic_mean0.9
min_maxarithmetic_mean1.0
min_maxharmonic_mean0.0
min_maxharmonic_mean0.1
min_maxharmonic_mean0.2
min_maxharmonic_mean0.3
min_maxharmonic_mean0.4
min_maxharmonic_mean0.5
min_maxharmonic_mean0.6
min_maxharmonic_mean0.7
min_maxharmonic_mean0.8
min_maxharmonic_mean0.9
min_maxharmonic_mean1.0
min_maxgeometric_mean0.0
min_maxgeometric_mean0.1
min_maxgeometric_mean0.2
min_maxgeometric_mean0.3
min_maxgeometric_mean0.4
min_maxgeometric_mean0.5
min_maxgeometric_mean0.6
min_maxgeometric_mean0.7
min_maxgeometric_mean0.8
min_maxgeometric_mean0.9
min_maxgeometric_mean1.0
l2arithmetic_mean0.0
l2arithmetic_mean0.1
l2arithmetic_mean0.2
l2arithmetic_mean0.3
l2arithmetic_mean0.4
l2arithmetic_mean0.5
l2arithmetic_mean0.6
l2arithmetic_mean0.7
l2arith

In [21]:
df_relevance.head(3)

Unnamed: 0,query_id,query_string,product_id,position,relevance,run
0,0,$30 roblox gift card not digital,B00F4CF4PU,0,1.0,min_maxarithmetic_mean0.0
1,0,$30 roblox gift card not digital,B07C438TMN,1,0.474647,min_maxarithmetic_mean0.0
2,0,$30 roblox gift card not digital,B00XJZHJCA,2,0.462698,min_maxarithmetic_mean0.0


In [37]:
df_relevance.position.max()

np.int64(9)

# Calculate Metrics per Pipeline

In [22]:
df_ratings.columns = ['query_string', 'product_id', 'rating', 'query_id']
df_ratings.head(3)

Unnamed: 0,query_string,product_id,rating,query_id
0,$30 roblox gift card not digital,B07RX6FBFR,3,0
1,$30 roblox gift card not digital,B09194H44R,0,0
2,$30 roblox gift card not digital,B08R5N6W6B,2,0


In [23]:
# Make sure ids are strings - otherwise the merge operation might cause an error
df_relevance['query_id'] = df_relevance['query_id'].astype(str)
df_relevance['position'] = df_relevance['position'].astype(int)
df_ratings['query_id'] = df_ratings['query_id'].astype(str)
# Remove duplicates from the ratings DataFrame
df_unique_ratings = df_ratings.drop_duplicates(subset=['product_id', 'query_id'])

In [47]:
df_ratings.groupby("query_string").count()

Unnamed: 0_level_0,product_id,rating,query_id
query_string,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
$30 roblox gift card not digital,15,15,15
(fiction without frontiers),16,16,16
100,16,16,16
10x10x6 cake box without window,16,16,16
15 inch light weight laptop that has lots of memory storage and RAM. It should include a HDMI port as well as a USB drive.,15,15,15
...,...,...,...
wooden stool,16,16,16
woodwick wax melt,16,16,16
world of warcraft anniversary collector's edition,16,16,16
wowled,16,16,16


In [24]:
# Merge results on query_id and product_id so that the resulting DataFrame has the ratings together with the search results
# Validations helps us make sure that we have only one rating for each query-doc pair. We have identical query-doc pairs per
# search pipeline but we can only have one rating for these.
df_merged = df_relevance.merge(df_unique_ratings, on=['query_id', 'product_id'], how='left', validate='many_to_one')
df_merged = df_merged.drop(columns=['query_string_y'])

df_merged.head(3)
df_merged = df_merged.rename(columns={"query_string_x": "query_string"})

In [25]:
# Count the rows without ratings - the higher the count is the less reliable the results will be
nan_count_rating = df_merged['rating'].isna().sum()
print(f"There are {df_merged.shape[0]} rows and {nan_count_rating} do not contain a rating")

There are 145200 rows and 98509 do not contain a rating


## Metrics

In [26]:
def dcg_at_10(df):
    # Sort DataFrame by position and take the top 10 results
    df = df.head(10)
    
    # Apply DCG formula
    dcg = np.sum((df['rating']) / np.log2(df['position'] + 2))
    
    return dcg

def ndcg_at_10(df):
    # Sort DataFrame by position and take the top 10 results
    top_10 = df.sort_values('position').head(10)
    
    # Calculate DCG@10
    dcg = dcg_at_10(top_10)
    
    # Sort by rating in descending order to get the ideal DCG
    ideal_top_10 = df.sort_values('rating', ascending=False).head(10)
    
    # Reset the positional information - otherwise it uses the original positions and the 
    # changed sorting would have no effect
    ideal_top_10['position'] = range(ideal_top_10.shape[0])
    
    # Calculate iDCG (ideal DCG)
    idcg = np.sum((ideal_top_10['rating']) / np.log2(ideal_top_10['position'] + 2))
    
    # Handle cases where iDCG is 0
    if idcg == 0:
        return 0
    
    # Normalize DCG
    ndcg = dcg / idcg
    
    return ndcg

def precision_at_k(df, k=10):
    # Sort by position and take the top k results
    top_k = df.sort_values('position').head(k)
    
    # Calculate the number of relevant results (assuming relevance > 1 is relevant)
    relevant_count = np.sum(top_k['rating'] > 1)
    
    # Calculate precision
    precision = relevant_count / k
    
    return precision

def ratio_of_ratings(df):
    num_of_ratings = df[~df['rating'].isna()].shape[0]
    num_of_shown_results = df.shape[0]
    if num_of_shown_results == 0:
        return 0
    else:
        return num_of_ratings/num_of_shown_results

metrics = [
    ("dcg", dcg_at_10, None),
    ("ndcg", ndcg_at_10, None),
    ("prec@10", lambda x: precision_at_k(x, 10), None),
    ("ratio_of_ratings", lambda x: ratio_of_ratings(x), None)
]

## Calculate Metrics

Iterate over the queries in the query set, calculate the three metrics dcg@10, ndcg@10 and precision@10 and store the results for every query in a DataFrame

In [27]:
df_metrics = []
for config in df_hybrid_search_params.itertuples():
    pipeline_name = config[5]
    for m_name, m_function, ref_search in metrics:
        for query in df_query_idx.itertuples():
            metric = m_function(df_merged[(df_merged['query_string'] == query[2]) & (df_merged['run'] == pipeline_name)])
            df_metrics.append(pd.DataFrame({
                "query": [query[2]],
                "pipeline": [pipeline_name],
                "metric": [m_name],
                "value": [metric],
            }))
df_metrics = pd.concat(df_metrics)

In [28]:
df_merged

Unnamed: 0,query_id,query_string,product_id,position,relevance,run,rating
0,0,$30 roblox gift card not digital,B00F4CF4PU,0,1.000000,min_maxarithmetic_mean0.0,
1,0,$30 roblox gift card not digital,B07C438TMN,1,0.474647,min_maxarithmetic_mean0.0,
2,0,$30 roblox gift card not digital,B00XJZHJCA,2,0.462698,min_maxarithmetic_mean0.0,
3,0,$30 roblox gift card not digital,B00GAC1D2G,3,0.399549,min_maxarithmetic_mean0.0,
4,0,$30 roblox gift card not digital,B004RMK4BC,4,0.325092,min_maxarithmetic_mean0.0,
...,...,...,...,...,...,...,...
145195,219,yarn purple and pink,B079J4V4G8,5,0.311684,l2geometric_mean1.0,
145196,219,yarn purple and pink,B07HG28KLX,6,0.309372,l2geometric_mean1.0,
145197,219,yarn purple and pink,B08B9K99R9,7,0.277731,l2geometric_mean1.0,2.0
145198,219,yarn purple and pink,B00MNNISMY,8,0.276288,l2geometric_mean1.0,


In [29]:
df_metrics

Unnamed: 0,query,pipeline,metric,value
0,$30 roblox gift card not digital,min_maxarithmetic_mean0.0,dcg,0.666667
0,(fiction without frontiers),min_maxarithmetic_mean0.0,dcg,7.253440
0,100,min_maxarithmetic_mean0.0,dcg,0.000000
0,10x10x6 cake box without window,min_maxarithmetic_mean0.0,dcg,1.000000
0,15 inch light weight laptop that has lots of m...,min_maxarithmetic_mean0.0,dcg,0.000000
...,...,...,...,...
0,wooden stool,l2geometric_mean1.0,ratio_of_ratings,0.000000
0,woodwick wax melt,l2geometric_mean1.0,ratio_of_ratings,1.000000
0,world of warcraft anniversary collector's edition,l2geometric_mean1.0,ratio_of_ratings,0.700000
0,wowled,l2geometric_mean1.0,ratio_of_ratings,0.800000


## Calculate Metrics per Pipeline by Averaging the Query Metrics

In [30]:
metrics_results = []

for config in df_hybrid_search_params.itertuples():
    pipeline_name = config[5]
    average_ndcg = df_metrics[(df_metrics['metric'] == 'ndcg') & (df_metrics['pipeline'] == pipeline_name)]['value'].mean().round(2)
    average_dcg = df_metrics[(df_metrics['metric'] == 'dcg') & (df_metrics['pipeline'] == pipeline_name)]['value'].mean().round(2)
    average_prec = df_metrics[(df_metrics['metric'] == 'prec@10') & (df_metrics['pipeline'] == pipeline_name)]['value'].mean().round(2)
    metrics_results.append({
        'pipeline' : pipeline_name,
        'avg_ndcg' : average_ndcg,
        'avg_dcg' : average_dcg,
        'avg_prec' : average_prec
    })
df_metrics_per_pipeline = pd.DataFrame(metrics_results)

### Top five Pipelines by NDCG@10 Descending

In [31]:
df_metrics_per_pipeline.sort_values(by='avg_ndcg', ascending=False).head(5)

Unnamed: 0,pipeline,avg_ndcg,avg_dcg,avg_prec
65,l2geometric_mean1.0,0.61,4.93,0.34
10,min_maxarithmetic_mean1.0,0.61,4.93,0.34
39,l2arithmetic_mean0.6,0.61,4.94,0.34
40,l2arithmetic_mean0.7,0.61,4.95,0.34
41,l2arithmetic_mean0.8,0.61,4.94,0.34


### Top five Pipelines by DCG@10 Descending

In [32]:
df_metrics_per_pipeline.sort_values(by='avg_dcg', ascending=False).head(5)

Unnamed: 0,pipeline,avg_ndcg,avg_dcg,avg_prec
40,l2arithmetic_mean0.7,0.61,4.95,0.34
42,l2arithmetic_mean0.9,0.61,4.95,0.34
39,l2arithmetic_mean0.6,0.61,4.94,0.34
41,l2arithmetic_mean0.8,0.61,4.94,0.34
65,l2geometric_mean1.0,0.61,4.93,0.34


In [43]:
df_metrics_per_pipeline.sort_values(by='avg_dcg', ascending=False).head(60)

Unnamed: 0,pipeline,avg_ndcg,avg_dcg,avg_prec
40,l2arithmetic_mean0.7,0.61,4.95,0.34
42,l2arithmetic_mean0.9,0.61,4.95,0.34
39,l2arithmetic_mean0.6,0.61,4.94,0.34
41,l2arithmetic_mean0.8,0.61,4.94,0.34
65,l2geometric_mean1.0,0.61,4.93,0.34
10,min_maxarithmetic_mean1.0,0.61,4.93,0.34
21,min_maxharmonic_mean1.0,0.61,4.93,0.34
43,l2arithmetic_mean1.0,0.61,4.93,0.34
32,min_maxgeometric_mean1.0,0.61,4.93,0.34
54,l2harmonic_mean1.0,0.61,4.93,0.34


In [44]:
df_metrics_per_pipeline.sort_values(by='avg_dcg', ascending=False).tail(20)

Unnamed: 0,pipeline,avg_ndcg,avg_dcg,avg_prec
26,min_maxgeometric_mean0.4,0.56,4.5,0.31
16,min_maxharmonic_mean0.5,0.55,4.49,0.31
12,min_maxharmonic_mean0.1,0.55,4.49,0.31
19,min_maxharmonic_mean0.8,0.56,4.49,0.31
15,min_maxharmonic_mean0.4,0.56,4.49,0.31
13,min_maxharmonic_mean0.2,0.55,4.49,0.31
4,min_maxarithmetic_mean0.4,0.56,4.47,0.31
3,min_maxarithmetic_mean0.3,0.55,4.4,0.31
2,min_maxarithmetic_mean0.2,0.53,4.26,0.3
1,min_maxarithmetic_mean0.1,0.52,4.11,0.29


### Top five Pipelines by Precision@10 Descending

In [33]:
df_metrics_per_pipeline.sort_values(by='avg_prec', ascending=False).head(5)

Unnamed: 0,pipeline,avg_ndcg,avg_dcg,avg_prec
65,l2geometric_mean1.0,0.61,4.93,0.34
42,l2arithmetic_mean0.9,0.61,4.95,0.34
41,l2arithmetic_mean0.8,0.61,4.94,0.34
21,min_maxharmonic_mean1.0,0.61,4.93,0.34
40,l2arithmetic_mean0.7,0.61,4.95,0.34


In [34]:
df_merged.to_csv('results_and_ratings.csv')

In [35]:
# Use query (fiction without frontiers) for random query set, and 3 ring binder for most judged query set

query = '(fiction without frontiers)'
#query = '3 ring binder'

df_merged[(df_merged['query_string'] == query) & (df_merged['run'] == 'min_maxarithmetic_mean0.0')]

Unnamed: 0,query_id,query_string,product_id,position,relevance,run,rating
10,1,(fiction without frontiers),B07GJVWWWR,0,1.0,min_maxarithmetic_mean0.0,3.0
11,1,(fiction without frontiers),B08C5MQFCY,1,0.767349,min_maxarithmetic_mean0.0,3.0
12,1,(fiction without frontiers),B07XPDNFL2,2,0.711205,min_maxarithmetic_mean0.0,
13,1,(fiction without frontiers),B07PGQF8K2,3,0.686337,min_maxarithmetic_mean0.0,3.0
14,1,(fiction without frontiers),1787583325,4,0.319315,min_maxarithmetic_mean0.0,
15,1,(fiction without frontiers),B086689HYL,5,0.192198,min_maxarithmetic_mean0.0,3.0
16,1,(fiction without frontiers),178758402X,6,0.192198,min_maxarithmetic_mean0.0,
17,1,(fiction without frontiers),1787581780,7,0.182499,min_maxarithmetic_mean0.0,
18,1,(fiction without frontiers),B082VH3ZFS,8,0.016028,min_maxarithmetic_mean0.0,
19,1,(fiction without frontiers),B07QFX8XLY,9,0.001,min_maxarithmetic_mean0.0,


In [36]:
df_metrics[(df_metrics['query'] == query) & (df_metrics['pipeline'] == 'min_maxarithmetic_mean0.0')]

Unnamed: 0,query,pipeline,metric,value
0,(fiction without frontiers),min_maxarithmetic_mean0.0,dcg,7.25344
0,(fiction without frontiers),min_maxarithmetic_mean0.0,ndcg,0.943866
0,(fiction without frontiers),min_maxarithmetic_mean0.0,prec@10,0.4
0,(fiction without frontiers),min_maxarithmetic_mean0.0,ratio_of_ratings,0.4


### Use the following code to double check that the results and the judgements match

Requires knowing a query from the current query set, e.g. airpods for the most judged query set, (fiction wihtout frontiers) for the random query set

In [50]:
DATA_DIR = '/Users/danielwrigley/work/Testing/git_repos/esci-data/shopping_queries_dataset/'

In [51]:
df_examples = pd.read_parquet(DATA_DIR + '/shopping_queries_dataset_examples.parquet')

In [52]:
df_examples[(df_examples['query'] == query) & (df_examples['product_locale'] == 'us')]

Unnamed: 0,example_id,query,query_id,product_id,product_locale,esci_label,small_version,large_version,split
5172,5172,(fiction without frontiers),170,B08C5KY8V8,us,I,0,1,train
5173,5173,(fiction without frontiers),170,B08NLC5VJC,us,S,0,1,train
5174,5174,(fiction without frontiers),170,B08NKCZM5Y,us,E,0,1,train
5175,5175,(fiction without frontiers),170,B08LDTWY9L,us,I,0,1,train
5176,5176,(fiction without frontiers),170,B08CVS88GV,us,S,0,1,train
5177,5177,(fiction without frontiers),170,B08C5N9F3G,us,E,0,1,train
5178,5178,(fiction without frontiers),170,B08C5MQFCY,us,E,0,1,train
5179,5179,(fiction without frontiers),170,B088JLGCWB,us,E,0,1,train
5180,5180,(fiction without frontiers),170,B086689HYL,us,E,0,1,train
5181,5181,(fiction without frontiers),170,B07SNF45NV,us,E,0,1,train


In [66]:
query_id = str(df_query_idx[df_query_idx['query'] == query]['idx'][1])
print(query_id)

1


In [67]:
df_ratings[df_ratings['query_id'] == query_id]

Unnamed: 0,query_string,product_id,rating,query_id
15,(fiction without frontiers),B08C5KY8V8,0,1
16,(fiction without frontiers),B08NLC5VJC,2,1
17,(fiction without frontiers),B08NKCZM5Y,3,1
18,(fiction without frontiers),B08LDTWY9L,0,1
19,(fiction without frontiers),B08CVS88GV,2,1
20,(fiction without frontiers),B08C5N9F3G,3,1
21,(fiction without frontiers),B08C5MQFCY,3,1
22,(fiction without frontiers),B088JLGCWB,3,1
23,(fiction without frontiers),B086689HYL,3,1
24,(fiction without frontiers),B07SNF45NV,3,1


In [68]:
df_merged[df_merged['query_string'] == query]['product_id']

10        B07GJVWWWR
11        B08C5MQFCY
12        B07XPDNFL2
13        B07PGQF8K2
14        1787583325
             ...    
143015    B07RG5TT93
143016    B07QCVBDRS
143017    B07KL6QTN1
143018    B07SM9NJCP
143019    B07L9JSMT8
Name: product_id, Length: 660, dtype: object

In [69]:
df_examples[(df_examples['query'] == query) & (df_examples['product_locale'] == 'us') & df_examples['product_id'].isin(df_merged[df_merged['query_string'] == query]['product_id'])]

Unnamed: 0,example_id,query,query_id,product_id,product_locale,esci_label,small_version,large_version,split
5178,5178,(fiction without frontiers),170,B08C5MQFCY,us,E,0,1,train
5180,5180,(fiction without frontiers),170,B086689HYL,us,E,0,1,train
5183,5183,(fiction without frontiers),170,B07PGQF8K2,us,E,0,1,train
5185,5185,(fiction without frontiers),170,B07L9JSMT8,us,E,0,1,train
5187,5187,(fiction without frontiers),170,B07GJVWWWR,us,E,0,1,train
