In [1]:
import numpy as np
import pandas as pd
from collections import defaultdict
import os

In [2]:
def borda_pool(ranking_vectors):
    """
    Perform Borda count with tie-handling over a list of ranking vectors for a single item.
    
    Args:
        ranking_vectors (List[np.array]): List of 1D arrays of candidate ranks (e.g., rank[i] = rank of candidate i)
    
    Returns:
        np.array: 1D array of final pooled ranks
    """
    n_votes = len(ranking_vectors)
    n_candidates = len(ranking_vectors[0])
    
    score_accumulator = np.zeros(n_candidates)
    
    for r in ranking_vectors:
        # Compute Borda scores (inverse of ranks)
        scores = np.zeros(n_candidates)
        for rank_value in np.unique(r):
            # Find candidates with this rank (tie-aware)
            tied = (r == rank_value)
            # Average Borda score for tied candidates
            avg_score = np.mean(n_candidates - r[tied])
            scores[tied] = avg_score
        score_accumulator += scores
    
    avg_scores = score_accumulator / n_votes
    # Convert to final ranking: higher score = better rank (rank 1 is best)
    pooled_rank = avg_scores.argsort()[::-1].argsort() + 1  # 1-based
    return pooled_rank


In [3]:
input_dir = 'C:/Users/User/NextoGroove/inputs/choice_ranks'
os.chdir(input_dir)

# Load all CSVs into one long DataFrame
files = ['choice_ranks_1.csv', 'choice_ranks_2.csv']
df_all = pd.concat([pd.read_csv(f, sep = ';', encoding = 'utf8') for f in files])

In [4]:
df_all['item'] = df_all['Input'] + '_' + df_all['Density_Class'].astype(str)

In [5]:
candidates = ['Kick', 'Snare', 'Hihat', 'Tom', 'Cymbals']

In [6]:
df_all['rank_list'] = (
    df_all[candidates]
      .apply(lambda row: row.dropna().tolist(), axis=1)
)

In [7]:
item_list = np.unique(df_all['item'].to_numpy()).tolist()
index_list = list(range(len(item_list)))
mapping_dict = dict(zip(item_list, index_list))

In [8]:
df_all['item_index'] = df_all.item.map(mapping_dict)

In [9]:
df_all_filtered_and_transposed = df_all[['item_index', 'rank_list']].reset_index().drop(columns = ['index'])

In [10]:
df_all_filtered_and_transposed['row'] = df_all_filtered_and_transposed.groupby('item_index').cumcount() # optional: keep index for unstacking
pivoted = df_all_filtered_and_transposed.pivot(index='row', columns='item_index', values='rank_list')

In [11]:
voting_results_dict = {
    col: [np.array(val) for val in pivoted[col].values]
    for col in pivoted.columns
}

In [12]:
pooled_result = {item: borda_pool(rank_list) for item, rank_list in voting_results_dict.items()}