In [33]:
import numpy as np
import pandas as pd
from collections import defaultdict
import os

In [34]:
def conditional_mean(series):
    zeros = (series == 0).sum()
    if zeros >= len(series) / 2:
        return 0
    else:
        return series.mean()

In [35]:
input_dir = 'C:/Users/User/NextoGroove/inputs/choice_ranks'
os.chdir(input_dir)

# Load all CSVs into one long DataFrame
files = ['choice_ranks_1.csv', 'choice_ranks_2.csv']
df_all = pd.concat([pd.read_csv(f, sep = ';', encoding = 'utf8') for f in files])

In [36]:
#score pooling -> average scores but if one of the number is 0 in more than half the csvs -> the pooled score is zero

In [37]:
item_list = np.unique(df_all['Input'].to_numpy()).tolist()
index_list = list(range(len(item_list)))
mapping_dict = dict(zip(item_list, index_list))
df_all['Input_index'] = df_all.Input.map(mapping_dict)
inv_mapping_dict = dict(zip(index_list, item_list))

In [38]:
pooled_scoring_df = df_all.groupby(['Input_index','Density_Class']).agg({'Kick' : conditional_mean, 
                                                                          'Snare' : conditional_mean, 
                                                                          'Hihat' : conditional_mean,
                                                                          'Tom' : conditional_mean,
                                                                          'Cymbals' : conditional_mean}).reset_index()

In [39]:
pooled_scoring_df

Unnamed: 0,Input_index,Density_Class,Kick,Snare,Hihat,Tom,Cymbals
0,0,0,0.0,0.0,4.5,3.5,4.5
1,0,1,0.0,3.0,4.5,2.5,2.0
2,0,2,0.0,3.0,4.5,1.5,0.0
3,1,0,4.0,0.0,4.5,3.5,4.0
4,1,1,3.0,0.0,3.5,1.5,3.0
5,1,2,0.0,0.0,3.0,2.5,2.0
6,2,0,3.5,4.0,3.0,0.0,3.0
7,2,1,3.0,3.0,2.5,0.0,2.5
8,2,2,2.5,2.0,2.0,0.0,2.0
9,3,0,1.5,0.0,0.0,3.5,4.5


In [41]:
pooled_scoring_df['Input'] = pooled_scoring_df.Input_index.map(inv_mapping_dict)
pooled_scoring_df['Input_list'] = [[int(s) for s in e.split(',')] for e in pooled_scoring_df['Input'].to_list()]

In [42]:
pooled_scoring_df

Unnamed: 0,Input_index,Density_Class,Kick,Snare,Hihat,Tom,Cymbals,Input,Input_list
0,0,0,0.0,0.0,4.5,3.5,4.5,3536,"[35, 36]"
1,0,1,0.0,3.0,4.5,2.5,2.0,3536,"[35, 36]"
2,0,2,0.0,3.0,4.5,1.5,0.0,3536,"[35, 36]"
3,1,0,4.0,0.0,4.5,3.5,4.0,37383940,"[37, 38, 39, 40]"
4,1,1,3.0,0.0,3.5,1.5,3.0,37383940,"[37, 38, 39, 40]"
5,1,2,0.0,0.0,3.0,2.5,2.0,37383940,"[37, 38, 39, 40]"
6,2,0,3.5,4.0,3.0,0.0,3.0,414345474850,"[41, 43, 45, 47, 48, 50]"
7,2,1,3.0,3.0,2.5,0.0,2.5,414345474850,"[41, 43, 45, 47, 48, 50]"
8,2,2,2.5,2.0,2.0,0.0,2.0,414345474850,"[41, 43, 45, 47, 48, 50]"
9,3,0,1.5,0.0,0.0,3.5,4.5,424446,"[42, 44, 46]"


In [43]:
output_dir = 'C:/Users/User/NextoGroove/inputs'
os.chdir(output_dir)

pooled_scoring_df.to_csv('pooled_choice_ranks.csv', index = False)