In [6]:
import polars as pl
import sys
sys.path.append("../src/")

## For file handling
import os
import numpy as np
## For warnings
import warnings
import math
## Loading our modules
from data_processing import preprocess, chunk, vectorize
from evaluating_results import process_labels

In [17]:
def ndcg_calc(array):
    
    dcg = 0
    dcg_ideal = 0
    
    sort_ind = np.argsort(array)
    sorted_arr = np.take(array,sort_ind[::-1])

    for j in range(len(array)):
        dcg = dcg+(2**array[j]-1)/math.log2(j+2)

    for j in range(len(sorted_arr)):
        dcg_ideal = dcg_ideal+(2**sorted_arr[j]-1)/math.log2(j+2)

        # Add the 1e-8 to control division by 0 error. 
        # This results when all the entries are irrelevant.
    
    return dcg/(dcg_ideal+1e-8)

In [18]:
def check_relevance_get_ndcg(query_text,df_new,df_labeled):

    #Apply thresolding, chuck out everything from the 00 and 02 config results that are not relevant    
    old_rating = [1,2,3]
    new_rating = [1,0,0]
    df_labeled = df_labeled.with_columns(pl.when(pl.col("relevance_rating") == old_rating[1]).then(new_rating[1])
                       .when(pl.col("relevance_rating") == old_rating[2]).then(new_rating[2])
    .otherwise(pl.col("relevance_rating")).alias("relevance_rating")
)
    df_labeled = df_labeled.filter(pl.col("relevance_rating") == 1)
    
    df_new = df_new.filter(pl.col("query_text") == query_text)
    df_lab = df_labeled.filter(pl.col("query_text") == query_text)
    relevant_names = df_lab["reddit_name"].to_list()

    #compare if the replies from redditors with name "reddit_name", in the new config is present as relevant names. If they do,
    #we score it 1, otherwise we score it 0
    
    df_r = df_new.with_columns(pl.col("reddit_name").is_in(relevant_names).alias("is_relevant"))
    df_r = df_r.with_columns(pl.when(pl.col("is_relevant") == True).then(1)
                       .when(pl.col("is_relevant") == False).then(0)
    .otherwise(pl.col("is_relevant")).alias("relevance_score"))
    return ndcg_calc(df_r["relevance_score"].to_numpy())

In [21]:
def calc_ndgc_score(df_new,df_labeled):
    queries = df_new["query_text"].unique().to_list()
    NDCG = []
    for i in range(len(queries)):
        ndcg_rating = check_relevance_get_ndcg(queries[i],df_new,df_labeled)
        NDCG.append(ndcg_rating)
        print(f"Average_ratings:", queries[i],ndcg_rating) 
    #return queries, ndcg_rating

In [22]:
# Read the new config results
df_new_config = pl.read_parquet("../temp_query_data/queries_demo01query_demo.parquet")
# Read our labeled results, aggregate into a single dataframe

df_labeled_00 = process_labels.get_merged_labels_and_votes(config="00")
df_labeled_00 = process_labels.get_majority_vote(df_labeled_00)

df_labeled_02 = process_labels.get_merged_labels_and_votes(config="02")
df_labeled_02 = process_labels.get_majority_vote(df_labeled_02)
df_labeled_config = pl.concat([df_labeled_00,df_labeled_02])

calc_ndgc_score(df_new_config,df_labeled_config)

Average_ratings: What is your favorite thing about working for Starbucks? 0.0
Average_ratings: When should you apply for a promotion at GM? 0.7816401445175513
Average_ratings: What are some reasons that bank employees quit their jobs? 0.0
Average_ratings: How often do you get a raise at Lowes? 0.0
Average_ratings: Does Starbucks pay overtime? 0.0
Average_ratings: How do General Motors employees feel about RTO? 0.8847483822948525
Average_ratings: How much does a driver make with UPS? 0.0
Average_ratings: Do Fidelity employees want to work remotely? 0.0
Average_ratings: What causes bank employees the most stress at work? 0.0
Average_ratings: What job perks for Whole Foods employees value most? 0.0
Average_ratings: Is it better to work at fedex express or fedex ground? 0.0
Average_ratings: Does your schedule get changed often at Lowes? 0.0
Average_ratings: What do CVS workers do if they notice theft? 0.0
Average_ratings: How do FedEx employees feel about route cuts? 0.0
Average_ratings: W