In [1]:
import sys
sys.path.insert(0, '../ModeTonicEstimation/')

from fileoperations.fileoperations import getFileNamesInDir

import json
import os
import matplotlib.pyplot as plt
import copy
import numpy as np
from ModeTonicEstimation import Chordia
from ModeTonicEstimation import Evaluator as ev
import ModeTonicEstimation.ModeFunctions as mf


In [2]:
# I/O
base_dir = '../../test_datasets/RagaRecognition_journal'
tradition_dir = 'hindustani'  # possible: makam, carnatic, hindustani
num_class_dir = '30_classes'  # possible: 5, 10, 15, 20, 25, 30

data_dir = os.path.join(base_dir, tradition_dir)

experiment_dir = os.path.join(base_dir, 'fileLists', tradition_dir, num_class_dir)
experiment_file = getFileNamesInDir(experiment_dir, keyword='*.json')[0][0]


In [3]:
experiments = json.load(open(experiment_file, 'r'))

audio_files = [os.path.join(data_dir,e[2]) for e in experiments]
pitch_files = [os.path.join(data_dir,e[2]+'.pitch') for e in experiments]
audio_mbids = [e[0] for e in experiments]
audio_tonics = [np.loadtxt(p+'.tonicFine') for p in audio_files]
audio_labels = [e[1] for e in experiments]

unique_labels = set(audio_labels)


In [None]:
# define feature extraction code: compute the pcd's of all recordings
def getPCD(pitchfile, tonic_freq):
    pitch_track = np.loadtxt(pitchfile)
    if pitch_track.ndim > 1:  # assume the first col is time, the second is pitch and the rest is labels etc
        pitch_track = pitch_track[:,1]

    # Each chunk is converted to cents
    pitch_cent = mf.hz_to_cent(pitch_track, ref_freq=tonic_freq)

    # PitchDistribution of the current chunk is generated
    pd = mf.generate_pd(pitch_cent, ref_freq=tonic_freq, 
                        smooth_factor=10.0, step_size=10.0,
                        source=pitchfile)
    return mf.generate_pcd(pd)


In [None]:
# PCD extraction
for i, (pf, tf, am) in enumerate(zip(pitch_files, audio_tonics, audio_mbids)):
    print str(i) + " Extracting PCD of " + am
    # save path
    save_folder, temp_name = os.path.split(pf)
    save_name = os.path.splitext(temp_name)[0] + '.pcd'
    save_path = os.path.join(save_folder, save_name)
    
    if not os.path.isfile(save_path):
        pcd = getPCD(pf, tf)
        pcd.save(save_path)
    

0 Extracting PCD of 7ed38940-f5e5-46cb-897d-ed408ddf8c85
1 Extracting PCD of 9117fa28-df28-4bb8-a0e3-b70e60fed262
2 Extracting PCD of 04d506c4-df9e-410c-a3e2-037abffe7b9c
3 Extracting PCD of 30261aa8-aa4d-4f66-a2d4-b087668b00a8
4 Extracting PCD of f5689624-b636-4a6c-831f-53d1678d9bb6
5 Extracting PCD of 3766bcb1-57b6-4c16-9196-c2ed94d80331
6 Extracting PCD of 3d0539f5-2d30-422f-9647-b3e14fd9787d
7 Extracting PCD of c53396aa-f369-4875-8c61-3f29787da6f5
8 Extracting PCD of b22390c8-88bb-4e1f-9e23-bd3e237043ba
9 Extracting PCD of 0017d78e-a39d-4151-8d93-826ab2eb00d8
10 Extracting PCD of 3fc7b071-68c7-4a5a-bed5-7da6301931f4
11 Extracting PCD of f149cad0-c5af-48d6-a726-ef3badb0169a
12 Extracting PCD of f5d3cea0-50ad-461f-9aa2-20f2434ae1e8
13 Extracting PCD of 980b4a00-6e7c-41c1-81ee-6b021d237343
14 Extracting PCD of 99f1a318-f246-40cb-86b9-3639344be94d
15 Extracting PCD of 64587462-6f9f-4439-8b48-6375a4fb1f6a
16 Extracting PCD of 4a507000-6c5a-4c8b-8922-1bfc29cba805
17 Extracting PCD of 529

In [None]:
# divide training & testing (leave-one-out)
training_files = copy.deepcopy(audio_files)
training_pitch_files = copy.deepcopy(pitch_files)
training_mbids = copy.deepcopy(audio_mbids)
training_tonics = copy.deepcopy(audio_tonics)
training_labels = copy.deepcopy(audio_labels)

# pop the test audio from the training
test_file = training_files.pop(idx)
test_pitch_file = training_pitch_files.pop(idx)
test_mbid = training_mbids.pop(idx)
test_tonic = training_tonics.pop(idx)
test_label = training_labels.pop(idx)

model_save_dir = os.path.join(experiment_dir, 'chordia', test_mbid)


In [None]:
# instantiate objects
che = Chordia.Chordia(step_size=10, smooth_factor=15,
                      chunk_size=0, threshold=0.5, 
                      overlap=0, frame_rate=196.0/44100)


In [None]:
# Training 
models = dict()
for label in unique_labels:
    print '   Training label: ' + label
    model_file = os.path.join(model_save_dir, label+'.json')
    try:
        models[label] = che.load_collection(label, model_save_dir)
    except IOError:
        models[label] = che.train(label, training_pitch_files, training_tonics, 
                                  metric='pcd', save_dir = model_save_dir)


In [None]:
# Testing 
print 'Testing audio: ' + test_mbid
res = che.estimate(test_pitch_file, mode_names=list(unique_labels), 
                   mode_dir=model_save_dir, est_mode=True, 
                   distance_method='bhat',
                   metric='pcd', tonic_freq=test_tonic)[0]

print res == test_label

In [None]:
# evaluation
evaluator = ev.Evaluator()


In [None]:
# # get the data into appropriate format
# [pitch_paths, pitch_base, pitch_fname] = fo.getFileNamesInDir(data_dir, '.pitch')
# tonic_paths = [os.path.splitext(p)[0] + '.tonic' for p in pitch_paths]
# mode_labels = []
# for p in pitch_base:
#     for r in modes:
#         if r in p:
#             mode_labels.append(r)
            

In [None]:
# # make the data a single dictionary for housekeeping
# data = []
# for p, f, t, r in zip(pitch_paths, pitch_fname, tonic_paths, mode_labels):
#     data.append({'file':p, 'name':os.path.splitext(f)[0],
#                'tonic':float(np.loadtxt(t)), 'mode':r})


In [None]:
# # experiments
# results = dict()
# for key, fold in folds.iteritems():
#     # Training 
#     print key
#     models = dict()
#     for cur_mode in modes:
#         [file_list, tonic_list] = zip(*[(rec['file'], rec['tonic']) for rec in fold['train']
#                                         if rec['mode'] == cur_mode])
#         models[cur_mode] = che.train(cur_mode, file_list, tonic_list, metric='pcd', 
#                                      save_dir = os.path.join(train_savefolder, key))
                                     
#     # Raag Recognition
#     results[key] = []
#     for rec in fold['test']:
#         rec['pitch'] = np.loadtxt(rec['file'])[:,1]
#         res = che.estimate(rec['pitch'], mode_names=modes, 
#                            mode_dir=os.path.join(train_savefolder, key), 
#                            est_tonic=False, est_mode=True, distance_method='bhat',
#                            metric='pcd', ref_freq=rec['tonic'])[0]

#         # evaluate
#         results[key].append(evaluator.mode_evaluate(rec['file'], res, rec['mode']))

#     print key + ": " + str(100*np.mean([r['mode_eval'] for r in results[key]])) + '%'
    