In [44]:
import polars as pl
import sys
import os
import numpy as np
import math
sys.path.append("../src/")

from evaluating_results import process_labels

In [45]:
voters = ["mo", "kk", "kp", "dr", "sr"]
vote_cols = ["mo_label", "kk_label", "kp_label", "dr_label", "sr_label"]

In [46]:
labeling_dir = "./labeled_results/"

In [47]:
    def voting_machine(row: pl.Series) -> int:
        votes = row.to_list()
        votes_1 = votes.count(1)
        votes_2 = votes.count(2)
        votes_3 = votes.count(3)
        
        ## Clear winners
        if (votes_1 > votes_2) & (votes_1 > votes_3):
            return 1
        elif (votes_2 > votes_1) & (votes_2 > votes_3):
            return 2
        elif (votes_3 > votes_2) & (votes_3 > votes_1):
            return 3

        ## Dealing with ties
        if votes_1 == votes_2 > votes_3:
            return 2

        if votes_1 == votes_3 > votes_2:
            return 3

        if votes_2 == votes_3 > votes_1:
            return 2

        if votes_1 == votes_2 == votes_3:
            return 2

In [1]:
def ndcg_calc(array):
    
    dcg = 0
    dcg_ideal = 0
    
    sort_ind = np.argsort(array)
    sorted_arr = np.take(array,sort_ind[::-1])

    for j in range(len(array)):
        dcg = dcg+(2**array[j]-1)/math.log2(j+2)

    for j in range(len(sorted_arr)):
        dcg_ideal = dcg_ideal+(2**sorted_arr[j]-1)/math.log2(j+2)

        # Add the 1e-8 to control division by 0 error. 
        # This results when all the entries are irrelevant.
    
    return dcg/(dcg_ideal+1e-8)

In [54]:
def get_relevance_score(query):
    config = "00"
    files = [f for f in os.listdir("../data_labeling/labeled_results/") if f.startswith("labeled_"+config)]
    
    ## Read the first file to get the schema
    df = pl.read_parquet(labeling_dir+files[0]).filter(pl.col("query_text") == query).sort("_distance")

    ## Merge all the files
    for f in files[1:]:
        ## Get the human name from the filename
        human = f.split("_")[2][:2]

        ## Read the file and sort by distance
        temp_df = pl.read_parquet(labeling_dir+f).filter(pl.col("query_text") == query).sort("_distance")
        
        ## Update the labels in the main dataframe
        df = df.with_columns(temp_df[human+"_label"].alias(human+"_label"))


    df = df.with_columns(                  
        pl.struct([pl.col(column_name) for column_name in vote_cols])
        .map_elements(lambda s: [value for value in s.values() 
                                 if value is not None])
                                 .alias("votes"))

    # Apply the voting machine function
    df = df.with_columns(
        df['votes'].map_elements(
            voting_machine, return_dtype=int).alias("relevance_rating"))


    #df_sr = pl.read_parquet('labeled_00_sr.parquet')
    #df_mo = pl.read_parquet('labeled_00_mo.parquet')
    #df_kp = pl.read_parquet('labeled_00_kp.parquet')
    #df_kk = pl.read_parquet('labeled_00_kk.parquet')
    #df_dr = pl.read_parquet('labeled_00_dr.parquet')

    #df_sr_replies =  df_sr.filter(pl.col("query_text") == query).sort("_distance")
    #df_mo_replies =  df_mo.filter(pl.col("query_text") == query).sort("_distance")
    #df_kp_replies =  df_kp.filter(pl.col("query_text") == query).sort("_distance")
    #df_kk_replies =  df_kk.filter(pl.col("query_text") == query).sort("_distance")
    #df_dr_replies =  df_dr.filter(pl.col("query_text") == query).sort("_distance")

    #thresolding values - relevant = 1; 2 and 3 are set to -0 = irrelevant
    
    rating = df['relevance_rating'].replace(2,0).replace(3,0)
    
    #print("before thresolding", df['relevance_rating'].to_numpy())
    #print("after thresolding", rating.to_numpy()) 

    NDCG = ndcg_calc(rating.to_numpy())
    return np.mean(NDCG)

In [55]:
data_file = 'Queries.txt'
query_file = open(data_file, 'r')
queries = query_file.readlines()
query_file.close()

for i in range(len(queries)):
    ndcg_rating = get_relevance_score(queries[i].strip('\n').strip(' '))
    print(f"Average_ratings:", queries[i],ndcg_rating) 


Average_ratings: How do General Motors employees feel about RTO?
 0.8943280336381709
Average_ratings: What kind of benefits does GM offer?
 0.9492585259264136
Average_ratings: When should you apply for a promotion at GM?
 0.7872793455168332
Average_ratings: How much does a driver make with UPS?
 0.884382128669919
Average_ratings: How long is a typical UPS shift? OR Should I work a double shift at UPS?
 0.8620078631854978
Average_ratings: How do UPS employees feel about route cuts?
 0.8060196532917736
Average_ratings: Is it better to work at fedex express or fedex ground?
 0.6598852917528001
Average_ratings: How do FedEx employees feel about route cuts?
 0.8315395891100723
Average_ratings: How often do you get a raise at Lowes?
 0.7574028813651963
Average_ratings: Does your schedule get changed often at Lowes?
 0.8577292102802969
Average_ratings: What is the worst drink to make for Starbucks baristas?
 0.613041825618817
Average_ratings: Does Starbucks pay overtime?
 0.9261859184748538
A

  return np.divide(ndcg,ndcg_ideal)


Average_ratings: What do Kraken employees find frustrating in their day to day work?
 nan
Average_ratings: What benefits do Chase employees value most?
 0.411833840435435
Average_ratings: Do Chase employees see opportunities for promotion and professional growth at the company?
 0.2789429456511298
Average_ratings: What causes bank employees the most stress at work?
 0.8718378990745818
Average_ratings: What are some reasons that bank employees quit their jobs?
 0.6058809468824158
Average_ratings: Do Fidelity employees want to work remotely?
 0.38685280723454163
Average_ratings: Do GameStop employees feel valued by the company?
 0.7010838744717742
Average_ratings: What does a typical day look like when working for GameStop?
 0.49835051598734353
Average_ratings: Do CVS employees feel safe at work?
 0.8818065456597719
Average_ratings: What do CVS workers do if they notice theft?
 0.8969523259955808
