In [94]:
import pandas as pd
import numpy as np
from pathlib import Path, PureWindowsPath

In [50]:
csv_url = ['https://raw.githubusercontent.com/iiscleap/Coswara-Exp/master/Annotated_Data/Annotated_responses.csv', 
            'https://raw.githubusercontent.com/iiscleap/Coswara-Exp/master/Annotated_v2/Annotated_v2_ans.csv']

sound_categories = ['breathing-shallow', 'breathing-deep', 'cough-shallow', 'cough-heavy', 'vowel-o', 'vowel-e', 'vowel-a','counting-normal','counting-fast']
audio_qualities = ['clean_audio', 'noisy_audio','bad_audio']

In [116]:
# loop on sound categories
for sound_category in sound_categories:
    # init an empty dictionary
    data = {}
    # loop on the CSV files
    for csv_file_name in csv_url:
        # load csv file
        df = pd.read_csv(csv_file_name)
        # sample the sound category rows
        if 'Annotated_v2' in csv_file_name:
            df_temp = df[df['recording'] ==  sound_category + '_v2'].reset_index(drop=True)
        else:
            df_temp = df[df['recording'] ==  sound_category].reset_index(drop=True)

        # loop on the rows and append to dictionary
        for i in range(len(df_temp)):
            recording_type = df_temp['recording'].iloc[i]
            if '_v2' in recording_type:
                recording_type = recording_type[:-3]
            if recording_type != df_temp['stage'].iloc[i]:
                data[df_temp['id'].iloc[i] + '_' + sound_category] = 0
            elif df_temp['quality'].iloc[i] == 'clean_audio':
                data[df_temp['id'].iloc[i] + '_' + sound_category] = 2
            elif df_temp['quality'].iloc[i] == 'noisy_audio':
                data[df_temp['id'].iloc[i] + '_' + sound_category] = 1
            elif df_temp['quality'].iloc[i] == 'bad_audio':
                data[df_temp['id'].iloc[i] + '_' + sound_category] = 0
    
    # convert dictionary to dataframe
    data_dict = {}
    data_dict['FILENAME'] = []
    data_dict[' QUALITY'] = []
    for key in data.keys():
        data_dict['FILENAME'].append(key)
        data_dict[' QUALITY'].append(data[key])
                
    data_dict = pd.DataFrame.from_dict(data_dict)
    print('% Bad in ' + sound_category, np.round(len(data_dict[data_dict[' QUALITY']==0])/len(data_dict)*100, 2), '%', 'of', len(data_dict))
    # save dataframe as label file
    data_dict.to_csv(sound_category + '_' + 'past.csv', index=False)

% Bad in breathing-shallow 8.91 % of 1324
% Bad in breathing-deep 7.55 % of 1324
% Bad in cough-shallow 4.31 % of 1324
% Bad in cough-heavy 4.23 % of 1324
% Bad in vowel-o 7.23 % of 1328
% Bad in vowel-e 6.71 % of 1326
% Bad in vowel-a 6.11 % of 1326
% Bad in counting-normal 5.51 % of 1326
% Bad in counting-fast 6.18 % of 1326


In [117]:
# combine with any existing LABEL file
path_store_labels = Path('./LABELS/')
# init csv file name to combine
existing_csv = 'vowel-o_labels_neeraj.csv'

# get sound category from file name
sound_category = existing_csv.split('_')[0]
# get annotator name from file name
annotator_name = existing_csv.split('_')[2].split('.')[0]

# init empty dictionary
data = {}
# append entries from past CSV
df_1 = pd.read_csv(sound_category + '_past.csv')
for i in range(len(df_1)):
    data[df_1['FILENAME'].iloc[i]] = df_1[' QUALITY'].iloc[i]
# append entries from current CSV
df_1 = pd.read_csv(path_store_labels / existing_csv)
for i in range(len(df_1)):
    data[df_1['FILENAME'].iloc[i]] = df_1[' QUALITY'].iloc[i]

# convert dictionary to dataframe
data_dict = {}
data_dict['FILENAME'] = []
data_dict[' QUALITY'] = []
for key in data.keys():
    data_dict['FILENAME'].append(key)
    data_dict[' QUALITY'].append(data[key])
            
data_dict = pd.DataFrame.from_dict(data_dict)
print('% Bad in ' + sound_category, np.round(len(data_dict[data_dict[' QUALITY']==0])/len(data_dict)*100, 2), '%', 'of', len(data_dict))
# save the updated label file
data_dict.to_csv(sound_category + '_' + 'labels_updated' + annotator_name  + '.csv', index=False)


% Bad in vowel-o 10.12 % of 1768


In [121]:
existing_csv.split('_')

['vowel-o', 'labels', 'neeraj.csv']