Skip to content

Commit

Permalink
Made fixed train/dev/test splits for annotation
Browse files Browse the repository at this point in the history
  • Loading branch information
jrgillick committed Feb 21, 2020
1 parent fe34a92 commit 3beb36b
Showing 1 changed file with 149 additions and 52 deletions.
201 changes: 149 additions & 52 deletions scripts/audio_set_loading.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
##################### Audioset (Laughter Detection) #############################
import sys, librosa
sys.path.append('/mnt/data0/jrgillick/projects/audio-feature-learning/')
import audio_utils
import audio_utils, text_utils
#sys.path.insert(0, '../../audio_set/')
from download_audio_set_mp3s import *
from sklearn.utils import shuffle

audioset_train_path='/mnt/data0/jrgillick/projects/laughter-detection/data/audioset/unbalanced_train_laughter_audio'
audioset_test_path='/mnt/data0/jrgillick/projects/laughter-detection/data/audioset/eval_laughter_audio'
Expand All @@ -28,31 +29,34 @@ def get_audioset_ids(csv_file, mode):

# Get a dictionary that maps from an audioset file ID to a list of
# laughter class [<belly laugh, giggle, etc.]
def get_audioset_laughter_classes_dict(csv_file, return_type='vector'):
infolist = get_laughter_infolist(csv_file, mode='positive')
ids = [l['yt_id'] for i, l in enumerate(infolist)]
tag_strings = [l['tag_strings'] for l in infolist]
assert(len(ids) == len(tag_strings))
def get_audioset_laughter_classes_dict(csv_files, return_type='vector'):
d = {}
if return_type == 'vector':
for i in range(len(ids)):
d[ids[i]] = laugh_id_multihot(tag_strings[i])
elif return_type == 'string':
for i in range(len(ids)):
d[ids[i]] = laugh_id_dict[tag_strings[i]]
else:
raise Exception("Invalid return_type")
if type(csv_files) != type([]): csv_files = [csv_files]
for csv_file in csv_files:
infolist = get_laughter_infolist(csv_file, mode='positive')
ids = [l['yt_id'] for i, l in enumerate(infolist)]
tag_strings = [l['tag_strings'] for l in infolist]
assert(len(ids) == len(tag_strings))

if return_type == 'vector':
for i in range(len(ids)):
d[ids[i]] = laugh_id_multihot(tag_strings[i])
elif return_type == 'string':
for i in range(len(ids)):
d[ids[i]] = laugh_id_dict[tag_strings[i]]
else:
raise Exception("Invalid return_type")
return d


def get_ytid_from_filepath(f):
return os.path.splitext(os.path.basename(f))[0].split('yt_')[1]


# For binary laughter detection
def get_audioset_binary_labels(files, positive_ids,negative_ids):
def _get_ytid_from_filepath(f):
return os.path.splitext(os.path.basename(f))[0].split('yt_')[1]
labels = []
for f in files:
fid = _get_ytid_from_filepath(f)
fid = get_ytid_from_filepath(f)
if fid in positive_ids:
labels.append(1)
elif fid in negative_ids:
Expand All @@ -63,22 +67,16 @@ def _get_ytid_from_filepath(f):

# for laughter type classification - e.g. giggle, belly laugh, etc.
def get_audioset_multiclass_labels(files):
def _get_ytid_from_filepath(f):
return os.path.splitext(os.path.basename(f))[0].split('yt_')[1]
labels = []
positive_ids = list(audioset_laughter_classes_dict.keys())
for f in files:
fid = _get_ytid_from_filepath(f)
fid = get_ytid_from_filepath(f)
if fid in positive_ids:
labels.append(audioset_laughter_classes_dict[fid])
else:
labels.append(np.zeros(len(laugh_keys)))
return labels

audioset_train_files, audioset_val_files, audioset_test_files = get_audioset_laughter_train_val_test_files()

audioset_laughter_classes_dict = get_audioset_laughter_classes_dict(audioset_test_labels_path)

audioset_positive_laughter_ids = get_audioset_ids(
audioset_train_labels_path, 'positive') + get_audioset_ids(
audioset_test_labels_path, 'positive')
Expand All @@ -87,32 +85,6 @@ def _get_ytid_from_filepath(f):
audioset_train_labels_path, 'negative') + get_audioset_ids(
audioset_test_labels_path, 'negative')

audioset_test_labels = get_audioset_binary_labels(
audioset_test_files, positive_ids=audioset_positive_laughter_ids, negative_ids=audioset_negative_laughter_ids)
audioset_val_labels = get_audioset_binary_labels(
audioset_val_files, positive_ids=audioset_positive_laughter_ids, negative_ids=audioset_negative_laughter_ids)
audioset_train_labels = get_audioset_binary_labels(
audioset_train_files, positive_ids=audioset_positive_laughter_ids, negative_ids=audioset_negative_laughter_ids)


#train_audios_path="/mnt/data0/jrgillick/projects/laughter-detection/data/audioset/train/audioset_train_audios.pkl"
#val_audios_path="/mnt/data0/jrgillick/projects/laughter-detection/data/audioset/val/audioset_val_audios.pkl"

"""
sr = 8000
with open(train_audios_path, "rb") as f:
all_train_audios = pickle.load(f)
with open(val_audios_path, "rb") as f:
all_val_audios = pickle.load(f)
[train_audios = subsample_time(0, int(len(a)/sr), int(len(a)/sr),
subsample_length=1., padding_length=0.) for a in all_train_audios]
"""

def get_random_1_second_snippets(audio_signals, samples_per_file=1, sr=8000):
audios = []
for j in range(samples_per_file):
Expand All @@ -123,4 +95,129 @@ def get_random_1_second_snippets(audio_signals, samples_per_file=1, sr=8000):
end_time = librosa.core.time_to_samples(audio_times[i][0] + audio_times[i][1], sr=sr)
aud = audio_signals[i][start_time:end_time]
audios.append(aud)
return audios
return audios





########## For evaluation, let's redo the train/test split sizes and save results to a file to make it permanent ####

# audioset_positive_laughter_ids has all the laughter files in audioset.
# we don't need to use audioset's official train/dev split.
# So let's just combine all the files, then split.
# Reserve 1500 for test, 500 for dev, and make the rest training



# 1. Find all audio files
all_audioset_files = librosa.util.find_files(audioset_train_path) + librosa.util.find_files(audioset_test_path)

# 2. Find all the positive and negative files that were successfully downloaded
positive_audioset_files = []
negative_audioset_files = []

filepath_to_ytid = {}
for f in all_audioset_files:
ytid = get_ytid_from_filepath(f)
filepath_to_ytid[f] = ytid
if ytid in audioset_positive_laughter_ids:
positive_audioset_files.append(f)
else:
negative_audioset_files.append(f)

ytid_to_filepath = text_utils.make_reverse_vocab(filepath_to_ytid)

# 3. Trim the negative examples list to be the same size as the positives
negative_audioset_files = negative_audioset_files[0:len(positive_audioset_files)]

# 4. Now Shuffle all files with random seed
positive_audioset_files = sorted(positive_audioset_files)
np.random.seed(0)
positive_audioset_files = shuffle(positive_audioset_files)

negative_audioset_files = sorted(negative_audioset_files)
np.random.seed(0)
negative_audioset_files = shuffle(negative_audioset_files)

# 5. Filter our list of ID's to match the list of files that were successfully downloaded
audioset_positive_laughter_ids = [get_ytid_from_filepath(f) for f in positive_audioset_files]
audioset_negative_laughter_ids = [get_ytid_from_filepath(f) for f in negative_audioset_files]

# 6. Make the splits on both files and ID's, now that all files and ID's are matching and shuffled in the same order
# Laughter files and ID's for test, dev, train
test_positive_laughter_files = positive_audioset_files[0:1500]
test_positive_laughter_ids = audioset_positive_laughter_ids[0:1500]

dev_positive_laughter_files = positive_audioset_files[1500:2000]
dev_positive_laughter_ids = audioset_positive_laughter_ids[1500:2000]

train_positive_laughter_files = positive_audioset_files[2000:]
train_positive_laughter_ids = audioset_positive_laughter_ids[2000:]

# Distractor files and ID's for test, dev, train
test_negative_laughter_files = negative_audioset_files[0:1500]
test_negative_laughter_ids = audioset_negative_laughter_ids[0:1500]

dev_negative_laughter_files = negative_audioset_files[1500:2000]
dev_negative_laughter_ids = audioset_negative_laughter_ids[1500:2000]

train_negative_laughter_files = negative_audioset_files[2000:]
train_negative_laughter_ids = audioset_negative_laughter_ids[2000:]

# 7. save txt files with the splits - only need to do once
"""
#Save IDS
with open('../data/audioset/splits/test_laughter_ids.txt', 'w') as f:
f.write("\n".join(test_positive_laughter_ids))
with open('../data/audioset/splits/dev_laughter_ids.txt', 'w') as f:
f.write("\n".join(dev_positive_laughter_ids))
with open('../data/audioset/splits/train_laughter_ids.txt', 'w') as f:
f.write("\n".join(train_positive_laughter_ids))
with open('../data/audioset/splits/test_negative_ids.txt', 'w') as f:
f.write("\n".join(test_negative_laughter_ids))
with open('../data/audioset/splits/dev_negative_ids.txt', 'w') as f:
f.write("\n".join(dev_negative_laughter_ids))
with open('../data/audioset/splits/train_negative_ids.txt', 'w') as f:
f.write("\n".join(train_negative_laughter_ids))
# Save Filepaths
with open('../data/audioset/splits/test_laughter_files.txt', 'w') as f:
f.write("\n".join(test_positive_laughter_files))
with open('../data/audioset/splits/dev_laughter_files.txt', 'w') as f:
f.write("\n".join(dev_positive_laughter_files))
with open('../data/audioset/splits/train_laughter_files.txt', 'w') as f:
f.write("\n".join(train_positive_laughter_files))
with open('../data/audioset/splits/test_negative_files.txt', 'w') as f:
f.write("\n".join(test_negative_laughter_files))
with open('../data/audioset/splits/dev_negative_files.txt', 'w') as f:
f.write("\n".join(dev_negative_laughter_files))
with open('../data/audioset/splits/train_negative_files.txt', 'w') as f:
f.write("\n".join(train_negative_laughter_files))
"""



# 8. Update the labels so they match the splits

audioset_test_files = test_positive_laughter_files + test_negative_laughter_files
audioset_dev_files = dev_positive_laughter_files + dev_negative_laughter_files
audioset_train_files = train_positive_laughter_files + train_negative_laughter_files

audioset_test_labels = get_audioset_binary_labels(
audioset_test_files, positive_ids=audioset_positive_laughter_ids, negative_ids=audioset_negative_laughter_ids)
audioset_val_labels = get_audioset_binary_labels(
audioset_dev_files, positive_ids=audioset_positive_laughter_ids, negative_ids=audioset_negative_laughter_ids)
audioset_dev_labels = audioset_val_labels # Just in case used somewhere :(
audioset_train_labels = get_audioset_binary_labels(
audioset_train_files, positive_ids=audioset_positive_laughter_ids, negative_ids=audioset_negative_laughter_ids)

0 comments on commit 3beb36b

Please sign in to comment.