In [13]:
from pprint import pprint
import sounddevice as sd
import pandas as pd
import subprocess
import wavio
import os

## Load the annotations

In [14]:
df = pd.read_csv('./classes.csv')
df[:4]

Unnamed: 0,start,path,comment
0,00:00,blender/158737__jackofall29__blender-ice-crush...,y
1,00:10,blender/158737__jackofall29__blender-ice-crush...,y
2,00:15,blender/158737__jackofall29__blender-ice-crush...,y
3,00:00,blender/173319__jaxlynnstaranimate__juicer-shr...,y


## Define the super-classes

In [15]:
appliances = ['blender','dishwasher','fridge','microwave',
              'stove-fan','stove-burner','frying-pan','water-flowing',
              'boiling-water','juicer']

actions = ['cupboard','drawer','clean-dishes','book','chopping',
           'cutlery','eat','peel','plates','sweep']

classes = appliances + actions

In [16]:
for c in classes:
    def getCName(x):
        return x.split('/')[0]
    n_classes = (df.path.apply(getCName) == c).sum()
    print(c, '--', str(n_classes))

blender -- 47
dishwasher -- 66
fridge -- 50
microwave -- 51
stove-fan -- 54
stove-burner -- 45
frying-pan -- 68
water-flowing -- 50
boiling-water -- 46
juicer -- 51
cupboard -- 47
drawer -- 56
clean-dishes -- 60
book -- 52
chopping -- 45
cutlery -- 69
eat -- 42
peel -- 49
plates -- 63
sweep -- 59


## extract audio

In [17]:
%ls
cmd_unformated = 'ffmpeg -i "{}" -ss {} -t 00:05 -ac 1 -ar {} -loglevel error -y "{}"'
audioRate = 44100
userDict = {}
foldDict = {}
summary = {'fold': [],
           'target': [],
           'category': [],
           'usr_id': [],
           'take': [],
           'path': [],
           'orig_idx': []}

def ammend_dict(mDict, key):
    if key not in mDict.keys():
        mDict[key] = -1
    mDict[key] += 1
    
# Create the dataset in ./audio
print('Will take some time to process all the audio files...')
subprocess.call('rm ./audio/*', shell=True)
for idx, row in df.iterrows():
    inPath = os.path.join('audio_raw/', row.path)
    inTime = row.start
    
    # Get fold
    fClass = row.path.split('/')[0]
    ammend_dict(foldDict, fClass)
    fFold = foldDict[fClass] // 8  # 8 samples per fold (8*5=40)
    summary['fold'].append(fFold)
    
    # Get user id
    fId = row.path.split('/')[-1]
    fId = fId.split('_')[0]
    ammend_dict(userDict, fId)
    userNb = chr(ord('A') + userDict[fId])
    summary['usr_id'].append(fId)
    summary['take'].append(userNb)
    
    # Make name
    classId = classes.index(fClass)
    fName = '{}-{}-{}-{}.wav'.format(fFold, fId, userNb, classId)
    outPath = os.path.join('audio/', fName)
    summary['target'].append(classId)
    summary['category'].append(fClass)
    summary['path'].append(outPath)
    summary['orig_idx'].append(idx)
    
    # Call cmd
    cmd = cmd_unformated.format(inPath, inTime, audioRate, outPath)
    command_run = subprocess.call(cmd, shell=True)
    if command_run != 0:  # If cmd failed
        pprint(cmd)

print('example of a command run:')
print(cmd)
print('Finished')
df_summary = pd.DataFrame(summary)

2           classes2.csv            human_classification.ipynb  LICENSE.md
[0m[01;34maudio[0m/      classes.csv             [01;34mkitchen20[0m/                  [01;34mpytorch[0m/
[01;34maudio_raw[0m/  data-exploration.ipynb  kitchen20.csv               README.md
Will take some time to process all the audio files...
example of a command run:
ffmpeg -i "audio_raw/juicer/BLACK+DECKER CJ625 30-Watt 34-Ounce Citrus Juicer Review.wav" -ss 06:02 -t 00:05 -ac 1 -ar 44100 -loglevel error -y "audio/6-BLACK+DECKER CJ625 30-Watt 34-Ounce Citrus Juicer Review.wav-G-9.wav"
Finished


## Evaluate quality of sound

In [19]:
for cat in set(df_summary.category):
    print(cat)
    for _, row in df_summary[df_summary.category == cat].iterrows():
        if df.comment.isna()[row.orig_idx]:
            print(row.path, row.category)
            sound = wavio.read(row.path).data.T[0]
            sd.play(sound, audioRate)
            df.loc[row.orig_idx, 'comment'] = input('y/n/m(eh)')

peel
frying-pan
book
dishwasher
drawer
blender
microwave
plates
water-flowing
eat
clean-dishes
cupboard
stove-fan
fridge
boiling-water
stove-burner
chopping
sweep
cutlery
juicer


In [20]:
print(df[:2])
print(len(df))
do_overwrite = input('overwrite classes.csv ? (Y/n)')
if do_overwrite == 'Y':
    df.to_csv('classes.csv', index=False)
    print('overwritten')
else:
    print('not overwritten')

   start                                               path comment
0  00:00  blender/158737__jackofall29__blender-ice-crush...       y
1  00:10  blender/158737__jackofall29__blender-ice-crush...       y
1070
overwrite classes.csv ? (Y/n)Y
overwritten


## Export dataset

In [21]:
df_summary.fold = df_summary.fold + 1
df_summary.to_csv('kitchen20.csv')
df_summary[:10]

Unnamed: 0,category,fold,orig_idx,path,take,target,usr_id
0,blender,0,0,audio/0-158737-A-0.wav,A,0,158737
1,blender,0,1,audio/0-158737-B-0.wav,B,0,158737
2,blender,0,2,audio/0-158737-C-0.wav,C,0,158737
3,blender,0,3,audio/0-173319-A-0.wav,A,0,173319
4,blender,0,4,audio/0-173319-B-0.wav,B,0,173319
5,blender,0,5,audio/0-173319-C-0.wav,C,0,173319
6,blender,0,6,audio/0-173319-D-0.wav,D,0,173319
7,blender,0,7,audio/0-181625-A-0.wav,A,0,181625
8,blender,1,8,audio/1-181625-B-0.wav,B,0,181625
9,blender,1,9,audio/1-181625-C-0.wav,C,0,181625


In [22]:
for c in classes:
    def getCName(x):
        return x.split('/')[0]
    n_classes = ((df.path.apply(getCName) == c) &
                 ((df.comment == 'y'))).sum()
    if n_classes < 40:
        print(c, '--', str(n_classes))
