In [1]:
import sys
sys.path.insert(0, '../ModeTonicEstimation/')

from fileoperations.fileoperations import getFileNamesInDir

import json
import os
import matplotlib.pyplot as plt
import copy
import numpy as np
from ModeTonicEstimation.Chordia import Chordia
from ModeTonicEstimation import Evaluator as ev
import ModeTonicEstimation.ModeFunctions as mf
from ModeTonicEstimation import PitchDistribution


In [2]:
# I/O
base_dir = '../../test_datasets/RagaRecognition_journal'
tradition_dir = 'hindustani'  # possible: makam, carnatic, hindustani
num_class_dir = '30_classes'  # possible: 5, 10, 15, 20, 25, 30

data_dir = os.path.join(base_dir, tradition_dir)

experiment_dir = os.path.join(base_dir, 'fileLists', tradition_dir, num_class_dir)
experiment_file = getFileNamesInDir(experiment_dir, keyword='*.json')[0][0]


In [3]:
experiments = json.load(open(experiment_file, 'r'))

audio_names = [os.path.join(data_dir,e[2]) for e in experiments]
pitch_files = [os.path.join(data_dir,e[2]+'.pitch') for e in experiments]
pcd_files = [os.path.join(data_dir,e[2]+'.pcd') for e in experiments]
audio_mbids = [e[0] for e in experiments]
audio_tonics = [np.loadtxt(p+'.tonicFine') for p in audio_names]
audio_labels = [e[1] for e in experiments]

unique_labels = set(audio_labels)


In [4]:
# define feature extraction code: compute the pcd's of all recordings
def getPCD(pitchfile, tonic_freq, source):
    pitch_track = np.loadtxt(pitchfile)
    if pitch_track.ndim > 1:  # assume the first col is time, the second is pitch and the rest is labels etc
        pitch_track = pitch_track[:,1]

    # Each chunk is converted to cents
    pitch_cent = mf.hz_to_cent(pitch_track, ref_freq=tonic_freq)

    # PitchDistribution of the current chunk is generated
    pd = mf.generate_pd(pitch_cent, ref_freq=tonic_freq, 
                        smooth_factor=10.0, step_size=10.0,
                        source=source)
    return mf.generate_pcd(pd)


In [5]:
# PCD extraction
pcds = []
for i, (pf, tf, am, al, pcdf) in enumerate(zip(pitch_files, audio_tonics, audio_mbids, audio_labels, pcd_files)):
    print str(i) + " Getting PCD of " + am
    
    if not os.path.isfile(pcdf):
        pcd_temp = getPCD(pf, tf, am)
        pcd_temp.save(pcdf)
        
        pcds.append({'pcd': pcd_temp, 'label': al, 'audio_mbid': am})
    else:
        pcds.append({'pcd':PitchDistribution.load(pcdf), 'label': al, 'audio_mbid': am})
    

0 Getting PCD of 7ed38940-f5e5-46cb-897d-ed408ddf8c85
1 Getting PCD of 9117fa28-df28-4bb8-a0e3-b70e60fed262
2 Getting PCD of 04d506c4-df9e-410c-a3e2-037abffe7b9c
3 Getting PCD of 30261aa8-aa4d-4f66-a2d4-b087668b00a8
4 Getting PCD of f5689624-b636-4a6c-831f-53d1678d9bb6
5 Getting PCD of 3766bcb1-57b6-4c16-9196-c2ed94d80331
6 Getting PCD of 3d0539f5-2d30-422f-9647-b3e14fd9787d
7 Getting PCD of c53396aa-f369-4875-8c61-3f29787da6f5
8 Getting PCD of b22390c8-88bb-4e1f-9e23-bd3e237043ba
9 Getting PCD of 0017d78e-a39d-4151-8d93-826ab2eb00d8
10 Getting PCD of 3fc7b071-68c7-4a5a-bed5-7da6301931f4
11 Getting PCD of f149cad0-c5af-48d6-a726-ef3badb0169a
12 Getting PCD of f5d3cea0-50ad-461f-9aa2-20f2434ae1e8
13 Getting PCD of 980b4a00-6e7c-41c1-81ee-6b021d237343
14 Getting PCD of 99f1a318-f246-40cb-86b9-3639344be94d
15 Getting PCD of 64587462-6f9f-4439-8b48-6375a4fb1f6a
16 Getting PCD of 4a507000-6c5a-4c8b-8922-1bfc29cba805
17 Getting PCD of 52944bc0-4639-4760-be76-f2720a20be81
18 Getting PCD of 2d

In [None]:
# instantiate objects
che = Chordia(step_size=10, smooth_factor=15,
                      chunk_size=0, threshold=0.5, 
                      overlap=0, frame_rate=196.0/44100)


In [None]:
results = []
for idx in range(0,len(audio_names)):
    # divide training & testing (leave-one-out)
    training_names = copy.deepcopy(audio_names)
    training_pitch_files = copy.deepcopy(pitch_files)
    training_pcd_files = copy.deepcopy(pcd_files)
    training_pcds = copy.deepcopy(pcds)
    training_mbids = copy.deepcopy(audio_mbids)
    training_tonics = copy.deepcopy(audio_tonics)
    training_labels = copy.deepcopy(audio_labels)

    # pop the test audio from the training
    test_name = training_names.pop(idx)
    test_pitch_file = training_pitch_files.pop(idx)
    test_pcd_file = training_pcd_files.pop(idx)
    test_pcds = training_pcds.pop(idx)
    test_mbid = training_mbids.pop(idx)
    test_tonic = training_tonics.pop(idx)
    test_label = training_labels.pop(idx)

    model_save_dir = os.path.join(experiment_dir, 'chordia', test_mbid)
    
    # Training: get the pcds of each raga
    models = dict()
    for pcd_dict in training_pcds:
        if pcd_dict['label'] not in models.keys():
            models[pcd_dict['label']] = [pcd_dict['pcd']]
        else:
            models[pcd_dict['label']].append(pcd_dict['pcd'])
            che.save_model(models[pcd_dict['label']], model_save_dir, pcd_dict['label'])
            
    # Testing 
    res = che.estimate(test_pitch_file, mode_names=list(unique_labels), 
                       mode_dir=model_save_dir, est_mode=True, 
                       distance_method='bhat',
                       metric='pcd', tonic_freq=test_tonic)

    # EVALUATION. TODO: ALTUG
    # save the files according to the file that will be sent by sankalp
    print ', '.join([test_mbid, test_label, res[0]])
    print res[0] == test_label

7ed38940-f5e5-46cb-897d-ed408ddf8c85, 2ed9379f-14c9-49af-8e4d-f9b63e96801f, 063ea5a0-23b1-4bb5-8537-3d924fe8ebb3
False
9117fa28-df28-4bb8-a0e3-b70e60fed262, 2ed9379f-14c9-49af-8e4d-f9b63e96801f, 2ed9379f-14c9-49af-8e4d-f9b63e96801f
True
04d506c4-df9e-410c-a3e2-037abffe7b9c, 2ed9379f-14c9-49af-8e4d-f9b63e96801f, 2ed9379f-14c9-49af-8e4d-f9b63e96801f
True
30261aa8-aa4d-4f66-a2d4-b087668b00a8, 2ed9379f-14c9-49af-8e4d-f9b63e96801f, 2ed9379f-14c9-49af-8e4d-f9b63e96801f
True
f5689624-b636-4a6c-831f-53d1678d9bb6, 2ed9379f-14c9-49af-8e4d-f9b63e96801f, 62b79291-73b0-4c77-a353-0f8bc6ed8362
False
3766bcb1-57b6-4c16-9196-c2ed94d80331, 2ed9379f-14c9-49af-8e4d-f9b63e96801f, 2ed9379f-14c9-49af-8e4d-f9b63e96801f
True
3d0539f5-2d30-422f-9647-b3e14fd9787d, 2ed9379f-14c9-49af-8e4d-f9b63e96801f, 2ed9379f-14c9-49af-8e4d-f9b63e96801f
True
c53396aa-f369-4875-8c61-3f29787da6f5, 2ed9379f-14c9-49af-8e4d-f9b63e96801f, 2ed9379f-14c9-49af-8e4d-f9b63e96801f
True
b22390c8-88bb-4e1f-9e23-bd3e237043ba, 2ed9379f-14c9-49

In [None]:
# evaluation
evaluator = ev.Evaluator()


In [None]:
# # get the data into appropriate format
# [pitch_paths, pitch_base, pitch_fname] = fo.getFileNamesInDir(data_dir, '.pitch')
# tonic_paths = [os.path.splitext(p)[0] + '.tonic' for p in pitch_paths]
# mode_labels = []
# for p in pitch_base:
#     for r in modes:
#         if r in p:
#             mode_labels.append(r)
            

In [None]:
# # make the data a single dictionary for housekeeping
# data = []
# for p, f, t, r in zip(pitch_paths, pitch_fname, tonic_paths, mode_labels):
#     data.append({'file':p, 'name':os.path.splitext(f)[0],
#                'tonic':float(np.loadtxt(t)), 'mode':r})


In [None]:
# # experiments
# results = dict()
# for key, fold in folds.iteritems():
#     # Training 
#     print key
#     models = dict()
#     for cur_mode in modes:
#         [file_list, tonic_list] = zip(*[(rec['file'], rec['tonic']) for rec in fold['train']
#                                         if rec['mode'] == cur_mode])
#         models[cur_mode] = che.train(cur_mode, file_list, tonic_list, metric='pcd', 
#                                      save_dir = os.path.join(train_savefolder, key))
                                     
#     # Raag Recognition
#     results[key] = []
#     for rec in fold['test']:
#         rec['pitch'] = np.loadtxt(rec['file'])[:,1]
#         res = che.estimate(rec['pitch'], mode_names=modes, 
#                            mode_dir=os.path.join(train_savefolder, key), 
#                            est_tonic=False, est_mode=True, distance_method='bhat',
#                            metric='pcd', ref_freq=rec['tonic'])[0]

#         # evaluate
#         results[key].append(evaluator.mode_evaluate(rec['file'], res, rec['mode']))

#     print key + ": " + str(100*np.mean([r['mode_eval'] for r in results[key]])) + '%'
    