In [5]:
## Add this directory to the path and load our functions
import sys
sys.path.append("../src/")
from evaluating_results import process_labels

import polars as pl

## Load the labels and votes for the 00 config
labeled_00_df = process_labels.get_merged_labels_and_votes(config="00")
labeled_00_df = process_labels.get_majority_vote(labeled_00_df)

## Load a the raw query results for the 00 config
raw_query_00 = pl.read_parquet(
    "../data_labeling/raw_results/query_results_config_00.parquet")


In [6]:
## Group the labeled dataframe by query_text
grouped = labeled_00_df.group_by("query_text")

## Add a counts of relevant results, lists of relevant results
relevant_results = grouped.agg(
    num_relevant = pl.col("relevance_rating")\
            .filter(pl.col("relevance_rating")==1).len(),
    relevant_names = pl.col("reddit_name")\
            .filter(pl.col("relevance_rating")==1)).clone()

## Print the results
with pl.Config(tbl_rows=26, tbl_width_chars=180, fmt_str_lengths=180):
    print(relevant_results)

shape: (26, 3)
┌────────────────────────────────────────────────────────────────────────────────────────────┬──────────────┬──────────────────────────────────────────────┐
│ query_text                                                                                 ┆ num_relevant ┆ relevant_names                               │
│ ---                                                                                        ┆ ---          ┆ ---                                          │
│ str                                                                                        ┆ u32          ┆ list[str]                                    │
╞════════════════════════════════════════════════════════════════════════════════════════════╪══════════════╪══════════════════════════════════════════════╡
│ What is the worst drink to make for Starbucks baristas?                                    ┆ 11           ┆ ["t1_ju4gp30", "t1_jwgv0qq", … "t3_1afrjfo"] │
│ Do GameStop employees feel valued by the 

In [9]:
## Compute the scores for each query
query_scores = {}

## Loop over the queries that we have labeled results for
for i in range(relevant_results.shape[0]):
    ## Get the query text and number of relevant results for that query
    test_query = relevant_results[i]["query_text"][0]
    test_num = relevant_results[i]["num_relevant"][0]

    ## Get the raw results for that query, sorted by distance
    test_results = raw_query_00\
            .filter(pl.col("query_text") == test_query)\
            .sort(by="_distance")
    
    ## Compute the Modified ExtRR score for the query
    query_score = 0
    for j in range(test_results.shape[0]):
        if test_results["reddit_name"][j] in relevant_results[i]["relevant_names"][0]:
            if j < test_num:
                query_score += 1
            else:
                query_score += 1/(j-test_num+1)
    if test_num > 0:
        query_scores[test_query] = query_score/test_num
    else:
        query_scores[test_query] = 0

## Create a dataframe of the scores
score_00_df = pl.DataFrame({"query":query_scores.keys(), "score":query_scores.values()})

In [13]:
with pl.Config(tbl_rows=26, tbl_width_chars=180, fmt_str_lengths=180):
    print(score_00_df.sort("score", descending=True))

print("Average score for config 00:", score_00_df["score"].mean())

shape: (26, 2)
┌────────────────────────────────────────────────────────────────────────────────────────────┬──────────┐
│ query                                                                                      ┆ score    │
│ ---                                                                                        ┆ ---      │
│ str                                                                                        ┆ f64      │
╞════════════════════════════════════════════════════════════════════════════════════════════╪══════════╡
│ How do General Motors employees feel about RTO?                                            ┆ 1.0      │
│ Does your schedule get changed often at Lowes?                                             ┆ 0.89881  │
│ Does Starbucks pay overtime?                                                               ┆ 0.866667 │
│ How much does a driver make with UPS?                                                      ┆ 0.866117 │
│ What do CVS workers do if the