In [13]:
import json
import os
import copy
import numpy as np
import sys

sys.path.insert(0, '../Research/MTG/ModeTonicEstimation/ModeTonicEstimation/')

from Chordia import Chordia
import ModeFunctions as mf
import PitchDistribution
from fileOperations import getFileNamesInDir


In [14]:
# I/O
base_dir = '../sankalp/'
tradition_dir = 'hindustani'  # possible: makam, carnatic, hindustani
num_class_dir = '5_classes'  # possible: 5, 10, 15, 20, 25, 30

data_dir = os.path.join(base_dir, tradition_dir)


In [15]:
# define feature extraction code: compute the pcd's of all recordings
def getPCD(pitchfile, tonic_freq, source):
    pitch_track = np.loadtxt(pitchfile)
    if pitch_track.ndim > 1:  # assume the first col is time, the second is pitch and the rest is labels etc
        pitch_track = pitch_track[:,1]

    # Each chunk is converted to cents
    pitch_cent = mf.hz_to_cent(pitch_track, ref_freq=tonic_freq)

    # PitchDistribution of the current chunk is generated
    pd = mf.generate_pd(pitch_cent, ref_freq=tonic_freq, 
                        smooth_factor=10.0, step_size=10.0,
                        source=source)
    return mf.generate_pcd(pd)


In [16]:
# Experiment metadata initiated
results = {'true_raag':[], 'estimated_raag':[], 'eval':[], 'recording_mbid':[], 'accuracy':0}
experiment_dir = os.path.join(base_dir, 'fileLists', tradition_dir, num_class_dir)
experiment_file = getFileNamesInDir(experiment_dir, keyword='*.json')[0][0]
experiments = json.load(open(experiment_file, 'r'))

# Paths, PCDs, Tonics are loaded
audio_names = [data_dir+'/'+e[2] for e in experiments]
pitch_files = [(data_dir+'/'+e[2]+'.pitch') for e in experiments]
pcd_files = [(data_dir+'/'+e[2]+'.pcd') for e in experiments]
audio_mbids = [e[0] for e in experiments]
audio_tonics = [np.loadtxt(str(p+'.tonic')) for p in audio_names]
audio_labels = [e[1] for e in experiments]
unique_labels = set(audio_labels)

# instantiate objects
che = Chordia(step_size=10, smooth_factor=15,
                      chunk_size=0, threshold=0.5, 
                      overlap=0, frame_rate=196.0/44100)

In [17]:
# PCD extraction
pcds = []
for i, (pf, tf, am, al, pcdf) in enumerate(zip(pitch_files, audio_tonics, audio_mbids, audio_labels, pcd_files)):
    # print str(i) + " Getting PCD of " + am
    
    if not os.path.isfile(pcdf):
        pcd_temp = getPCD(pf, tf, am)
        pcd_temp.save(pcdf)
        
        pcds.append({'pcd': pcd_temp, 'label': al, 'audio_mbid': am})
    else:
        pcds.append({'pcd':PitchDistribution.load(pcdf), 'label': al, 'audio_mbid': am})

In [18]:
for idx in range(0,len(audio_names)):
    print(str(idx)+'/'+str(len(audio_names)))
    # divide training & testing (leave-one-out)
    training_names = copy.deepcopy(audio_names)
    training_pitch_files = copy.deepcopy(pitch_files)
    training_pcd_files = copy.deepcopy(pcd_files)
    training_pcds = copy.deepcopy(pcds)
    training_mbids = copy.deepcopy(audio_mbids)
    training_tonics = copy.deepcopy(audio_tonics)
    training_labels = copy.deepcopy(audio_labels)

    # pop the test audio from the training
    test_name = training_names.pop(idx)
    test_pitch_file = training_pitch_files.pop(idx)
    test_pcd_file = training_pcd_files.pop(idx)
    test_pcds = training_pcds.pop(idx)
    test_mbid = training_mbids.pop(idx)
    test_tonic = training_tonics.pop(idx)
    test_label = training_labels.pop(idx)

    model_save_dir = os.path.join(experiment_dir, 'chordia', test_mbid)
    
    # Training: get the pcds of each raga
    models = dict()
    for pcd_dict in training_pcds:
        if pcd_dict['label'] not in models.keys():
            models[pcd_dict['label']] = [pcd_dict['pcd']]
        else:
            models[pcd_dict['label']].append(pcd_dict['pcd'])
            che.save_model(models[pcd_dict['label']], model_save_dir, pcd_dict['label'])
            
    # Testing 
    res = che.mode_estimate(test_pitch_file, test_tonic, list(unique_labels), 
                            mode_dir=model_save_dir, distance_method='bhat',
                            metric='pcd')
    
    results['true_raag'].append(test_label)
    results['estimated_raag'].append(res)
    results['eval'].append(int(res == test_label))
    results['recording_mbid'].append(test_mbid)

0/50
1/50
2/50
3/50
4/50
5/50
6/50
7/50
8/50
9/50
10/50
11/50
12/50
13/50
14/50
15/50
16/50
17/50
18/50
19/50
20/50
21/50
22/50
23/50
24/50
25/50
26/50
27/50
28/50
29/50
30/50
31/50
32/50
33/50
34/50
35/50
36/50
37/50
38/50
39/50
40/50
41/50
42/50
43/50
44/50
45/50
46/50
47/50
48/50
49/50


In [19]:
results['accuracy'] = 1.0 * sum(results['eval']) / len(results['eval'])
print(results['accuracy'])
json.dump(results, open(tradition_dir+'_'+num_class_dir+'_results.json', 'w'))

0.98
