### Class refinement and dimensionality reduction
This notebooks refines the computed embeddings classes with respect to the hierarchical label annotations of the FSD50K dataset. Further it uses the probability classes and the user tags to find the correct class when hierarchy and embeddings labels are unambiguous. Then the dataset is deduced to an evenly distributed dataset for simplicity. After that, UMAP is applied to reduce the dimension (x/y positions).

#### Class refinement
The following cells refine the sound classes based on user tags, embedding labels and probabilities.

In [1]:
import json
import os
import pacmap
import numpy as np
import pandas as pd
import utils
import random
import enchant
from treelib import Tree, Node

In [2]:
# Specify paths for metadata
DATA_DIR          = 'data'
YAMNET_FILE       = os.path.join(DATA_DIR, 'eval_clips_info_FSD50K_analysis.json')
ONTOLOGY_JSON     = os.path.join(DATA_DIR, 'ontology.json')
FSD50K_VOCABULARY = os.path.join(DATA_DIR, 'vocabulary.csv')
ONTOLOGY_TREE_L1  = os.path.join(DATA_DIR, 'ontology_layer_1.csv')
ONTOLOGY_TREE_L2  = os.path.join(DATA_DIR, 'ontology_layer_2.csv')
ONTOLOGY_TREE_L3  = os.path.join(DATA_DIR, 'ontology_layer_3.csv')

In [3]:
# Load metadata JSON
yamnet_df = pd.read_json(YAMNET_FILE).T.reset_index().rename(columns={'index': 'file_id'})

# Load first two layers of ontology buckets
tree_l1_df   = pd.read_csv(ONTOLOGY_TREE_L1, index_col=0).T
tree_l2_df   = pd.read_csv(ONTOLOGY_TREE_L2, index_col=0).T
tree_l3_df   = pd.read_csv(ONTOLOGY_TREE_L3, index_col=0).T
tree_l1_dict = utils.getOntoDict(tree_l1_df)
tree_l2_dict = utils.getOntoDict(tree_l2_df)
tree_l3_dict = utils.getOntoDict(tree_l3_df)
merged_dict  = tree_l1_dict | tree_l2_dict | tree_l3_dict

# Ontology translation
onto_df         = pd.read_json(ONTOLOGY_JSON)
fsdk_df         = pd.read_csv(FSD50K_VOCABULARY, header=None)
fsdk_name_to_id = dict(zip(fsdk_df[1], fsdk_df[2]))
onto_name_to_id = dict(zip(onto_df['name'], onto_df['id']))
fsdk_id_to_name = dict(zip(fsdk_df[2], fsdk_df[1]))
onto_id_to_name = dict(zip(onto_df['id'], onto_df['name']))

# Ontology layer 1
ontology_str = ['Human sounds', 'Source-ambiguous sounds', 'Animal', 'Sounds of things', 'Music', 'Natural sounds', 'Channel, environment and background']
ontology_ids = [onto_name_to_id[x] for x in ontology_str]

# Save IDs of the 632 ontology classes
all_onto_classes_ids = list(set([c_id for c_id in onto_df['id']]) - set(ontology_ids))
all_onto_classes     = [onto_id_to_name[c_id] for c_id in all_onto_classes_ids]
layer2_classes       = [onto_id_to_name[x] for x in tree_l2_dict.keys()]
layer3_classes       = [onto_id_to_name[x] for x in tree_l3_dict.keys()]
layer1_2_classes     = np.concatenate((ontology_str, layer2_classes))


In [4]:
'''
Define some local helper functions
'''

def getBucketKey(class_id):
    # Check for layer 3 and return key
    for key, item in tree_l3_dict.items():
        if class_id in item:
            return key

def checkBucketl2(class_id):
    # Check for layer 2
    for key, item in tree_l2_dict.items():
        if class_id in item:
            return key
    return class_id

def checkBucketl3(class_id):
    # Check for layer 3 and return id
    for key, item in tree_l3_dict.items():
        if class_id in item:
            return key
    return class_id

def userTagsDistances(value, bucket=[], flag=False):
    # Compute distances between tag and ontology classes
    distances = []
    b = all_onto_classes
    if flag:
        b = bucket
    for cl in b:
        ld = enchant.utils.levenshtein(cl, value)
        distances.append(ld)
    return distances

def refineClass(row):    
    '''
    The refinement process as described in the thesis paper
    returns: class_id
    '''
    
    prob_init    = row['yamnet_prob_classes']
    user_tags    = np.array(row['user_tags'])
    title        = row['title']
    user_data    = np.append(user_tags, title)
    prob_classes = []
    
    
    # Remove first and second layer classifications [bad workaround]
    for idx, cl in enumerate(prob_init):
        if cl[0] not in layer1_2_classes:
            prob_classes.append(cl)
    
    # If the embedding was really sure..
    if prob_classes[0][1] > 0.85:
        return onto_name_to_id[prob_classes[0][0]]

    # Check user tags and title and receive nearest classes
    for tag in user_data:
        # Simple string comparison
        ltag  = tag.lower()
        for x in layer3_classes:
            if ltag in x.lower():
                return onto_name_to_id[x]
    
    # Iterate over first 10 probability classes to get refined bucket of classes
    prob_buckets = []
    for c in prob_classes:
        class_d  = onto_name_to_id[c[0]]
        prob_key = getBucketKey(class_d)
        prob_buckets.append(prob_key)        
    
    # Get nearest class for user tags based on levensthein distance
    user_classes = []
    buck_key  = getBucketKey([max(prob_buckets,key=prob_buckets.count)])
    buck_vals = merged_dict[buck_key]
    
    for tag in user_data:
        distances = userTagsDistances(tag, buck_vals, True)
        minValIdx = utils.getMinValIndex(distances)
        nearClass = buck_vals[minValIdx]
        user_classes.append(nearClass)
    
    # If refined labels bucket has no majority class, return the parent class
    if len(user_classes) == len(set(user_classes)):
        return buck_key
    else:
        return max(user_classes,key=user_classes.count)

    
# Refine class for each file of the dataframe 
refined_classes_ids    = []
refined_classes        = []
refined_parent_id_l1   = []
refined_parent_id_l2   = []
refined_parent_id_l3   = []
unrefined_parent_id_l1 = []
unrefined_parent_id_l2 = []
unrefined_parent_id_l3 = []
for i, row in yamnet_df.iterrows():
    
    # Refine class
    ref_class_id = refineClass(row)
    ref_class    = onto_id_to_name[ref_class_id]
    refined_classes_ids.append(ref_class_id)
    refined_classes.append(ref_class)
    
    for key, item in tree_l1_dict.items():
        if ref_class_id in item:
            refined_parent_id_l1.append(key)
            break
    
    for key, item in tree_l2_dict.items():
        if ref_class_id in item:
            refined_parent_id_l2.append(key)
            break
    
    for key, item in tree_l3_dict.items():
        if ref_class_id in item:
            refined_parent_id_l3.append(key)
            break

    # Unrefined layer 1 labels for base model
    class_label = row['yamnet_class_id']
    
    for key, item in tree_l1_dict.items():
        if class_label in item:
            unrefined_parent_id_l1.append(key)
            break
    
    unrefined_parent_id_l2.append(checkBucketl2(class_label))
    unrefined_parent_id_l3.append(checkBucketl3(class_label))
              

# Put columns in dataframe
yamnet_df['refined_classes_ids']   = refined_classes_ids
yamnet_df['refined_classes']       = refined_classes
yamnet_df['layer1_id']             = refined_parent_id_l1
yamnet_df['layer2_id']             = refined_parent_id_l2
yamnet_df['layer3_id']             = refined_parent_id_l3
yamnet_df['unrefined_layer1_id']   = unrefined_parent_id_l1
yamnet_df['unrefined_layer2_id']   = unrefined_parent_id_l2
yamnet_df['unrefined_layer3_id']   = unrefined_parent_id_l3
 
yamnet_df.tail()

Unnamed: 0,file_id,title,user_tags,yamnet_class,yamnet_class_id,yamnet_prob_classes,yamnet_embedding,refined_classes_ids,refined_classes,layer1_id,layer2_id,layer3_id,unrefined_layer1_id,unrefined_layer2_id,unrefined_layer3_id
6838,345138,Closing_Drawer.wav,"[drawer, kitchen, close]",Silence,/m/028v0c,"[[Silence, 0.7932662367820741], [Inside, small...","[0.677103579044342, 0.23611395061016002, 0.034...",/m/0fqfqc,Drawer open or close,/t/dd00041,/t/dd00071,/m/0fqfqc,/t/dd00098,/m/028v0c,/m/028v0c
6839,171994,glass1.wav,"[glass, light-bulb, breaking]",Glass,/m/039jq,"[[Glass, 0.7569197416305541], [Clang, 0.496511...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",/m/07pc8lb,Breaking,/t/dd00098,/t/dd00099,/m/07pc8lb,/t/dd00041,/m/039jq,/m/039jq
6840,21314,sound-cough2.wav,"[cough, illness, noise]",Cough,/m/01b_21,"[[Cough, 0.9605462551116941], [Throat clearing...","[0.0, 0.0, 0.857789754867553, 0.0, 0.0, 0.0, 0...",/m/01b_21,Cough,/m/0dgw9r,/m/09hlz4,/m/01b_21,/m/0dgw9r,/m/09hlz4,/m/01b_21
6841,269337,Stirring Ice,"[foley, cup, straw, mic, ice, plastic, water, ...",Engine,/m/02mk9,"[[Engine, 0.15777900815010001], [Wood, 0.13997...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",/m/02p3nc,Hiccup,/m/0dgw9r,/m/0160x5,/m/02p3nc,/t/dd00041,/m/02mk9,/m/02mk9
6842,207119,Bus.aif,"[bus, travel, accelerate, doors]",Vehicle,/m/07yv9,"[[Vehicle, 0.6246568560600281], [Helicopter, 0...","[0.00147914304398, 0.000417939940234, 0.040898...",/m/0k4j,Car,/t/dd00041,/m/07yv9,/m/012f08,/t/dd00041,/m/07yv9,/m/07yv9


#### UMAP

Compute UMAP of embedding and class label (1) with refined labels and (2) without refined labels.

In [5]:
import umap.umap_ as umap

# For refined labels
X      = np.array(list(yamnet_df['yamnet_embedding']))
labels = utils.getColormap(list(yamnet_df['layer1_id']))
embedding = umap.UMAP(n_neighbors=40, random_state=25).fit_transform(X, y=labels)  
yamnet_df['xypos'] = embedding.tolist() 
# Save to JSON file
fname = 'eval_clips_info_FSD50K_analysis_refinement_umap_refined.json'
path = os.path.join(DATA_DIR, fname)
yamnet_df.to_json(path)

In [6]:
# For unrefined labels
X      = np.array(list(yamnet_df['yamnet_embedding']))
labels = utils.getColormap(list(yamnet_df['unrefined_layer1_id']))
embedding = umap.UMAP(n_neighbors=40, random_state=25).fit_transform(X, y=labels)  
yamnet_df['xypos'] = embedding.tolist() 
# Save to JSON file
fname = 'eval_clips_info_FSD50K_analysis_refinement_umap_unrefined.json'
path = os.path.join(DATA_DIR, fname)
yamnet_df.to_json(path)