In [None]:
import fnmatch
import os
import numpy as np
import librosa
from sklearn.manifold import TSNE
import json

def get_audio_files(path, extension):
	files = []
	for root, dirnames, filenames in os.walk(path):
	    for filename in fnmatch.filter(filenames, '*.'+extension):
	        files.append(os.path.join(root, filename))
	return files

def get_features(y, sr):
	y = y[0:sr] 	# analyze just first second
	#S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
	#S = librosa.feature.mfcc(y, sr=sr)
	#log_S = librosa.logamplitude(S, ref_power=np.max)
	S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
	log_S = librosa.logamplitude(S, ref_power=np.max)
	mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=13)
	delta_mfcc = librosa.feature.delta(mfcc)
	delta2_mfcc = librosa.feature.delta(mfcc, order=2)
	#mean_mfcc = np.mean(S, 1)
	#mean_mfcc = (mean_mfcc-np.mean(mean_mfcc))/np.std(mean_mfcc)
	#var_mfcc = np.var(S, 1)
	#var_mfcc = (var_mfcc-np.mean(var_mfcc))/np.std(var_mfcc)
	#feature_vector = np.concatenate((mean_mfcc, var_mfcc))
	feature_vector = np.concatenate((np.mean(mfcc,1), np.mean(delta_mfcc,1), np.mean(delta2_mfcc,1)))
	feature_vector = (feature_vector-np.mean(feature_vector))/np.std(feature_vector)
	return feature_vector

def segment_analyze_audio_file(source_audio, save_path_audio, hop_length_=512):
	y, sr = librosa.load(source_audio)
	onsets = librosa.onset.onset_detect(y=y, sr=sr, hop_length=hop_length_)
	feature_vectors = []
	for i in range(len(onsets)-1):
		idx_y1 = onsets[i] * hop_length
		idx_y2 = onsets[i+1] * hop_length
		y_ = y[idx_y1:idx_y2]
		feat = get_features(y_, sr)
		file_path = '%s/onset_%d.wav' % (save_path_audio, i)
		feature_vectors.append({"file":file_path, "features":feat})
		librosa.output.write_wav(file_path, y_, sr)
		print "analyzed %d/%d = %s"%(i, len(onsets)-1, file_path)
	return feature_vectors

def analyze_directory(source_audio):	
	files = get_audio_files(source_audio, 'wav')
	feature_vectors = []
	for i,f in enumerate(files):
		print "get: %d/%d = %s"%(i, len(files), f)
		y, sr = librosa.load(f)
		feat = get_features(y, sr)
		feature_vectors.append({"file":f, "features":feat})
	return feature_vectors

def run_tSNE(feature_vectors, save_path_points, perplexity_=30):
	model = TSNE(n_components=2, perplexity=perplexity_, verbose=2, angle=0.1).fit_transform([f["features"] for f in feature_vectors])
	x_axis=model[:,0] # normalize t-SNE
	y_axis=model[:,1]
	x_norm = (x_axis-np.min(x_axis)) / (np.max(x_axis) - np.min(x_axis))
	y_norm = (y_axis-np.min(y_axis)) / (np.max(y_axis) - np.min(y_axis))
	data = []
	for i,f in enumerate(feature_vectors):
		data.append({"path":f["file"], "x":x_norm[i], "y":y_norm[i]})
	with open(save_path_points, 'w') as outfile:
	    json.dump(data, outfile)
	print("finished saving %s"%save_path_points)



source_audio = '/Users/gene/Downloads/QUEEN+-+Bohemian+Rhapsody(music.naij.com).mp3'
save_path_audio = '/Users/gene/Desktop/temp/'
save_path_points = 'tsnetest.json'
hop_length = 512


feature_vectors = segment_analyze_audio_file(source_audio, save_path_audio, hop_length)
run_tSNE(feature_vectors, save_path_points)



source_audio = '/Users/gene/audio/Drum Samples'
save_path_points = 'tsnetest.json'

feature_vectors = analyze_directory(source_audio)
run_tSNE(feature_vectors, save_path_points)

