### Imports

In [1]:
from google.cloud import storage
from IPython.display import Audio
import os
import pandas as pd
import librosa
from itertools import islice

### GC storage

Initialise connection to google cloud bucket

In [2]:
storage_client = storage.Client()
bucket = storage_client.bucket('soundboard_data')

In [3]:
# audio_path = "gs://your-bucket-name/your-audio-file.mp3"

In [4]:
blobs = storage_client.list_blobs('soundboard_data')

### Load Keys and Label mapping

#### Map keys to tags by converting csv to dataframe to dict

In [5]:
class_labels_df = pd.read_csv('../data/train_labels/class_labels_indices.csv')
class_labels_df.head(20)

Unnamed: 0,index,mid,display_name
0,0,/m/09x0r,Speech
1,1,/m/05zppz,"Male speech, man speaking"
2,2,/m/02zsn,"Female speech, woman speaking"
3,3,/m/0ytgt,"Child speech, kid speaking"
4,4,/m/01h8n0,Conversation
5,5,/m/02qldy,"Narration, monologue"
6,6,/m/0261r1,Babbling
7,7,/m/0brhx,Speech synthesizer
8,8,/m/07p6fty,Shout
9,9,/m/07q4ntr,Bellow


In [6]:
len(class_labels_df)

527

In [7]:
label_dict = pd.Series(class_labels_df.display_name.values,index=class_labels_df.mid).to_dict()
type(label_dict)

dict

Test tags dict

In [8]:
def take(n, iterable):
    """Return the first n items of the iterable as a list."""
    return list(islice(iterable, n))

In [9]:
n_label_keys = take(5, label_dict.items())
n_label_keys

[('/m/09x0r', 'Speech'),
 ('/m/05zppz', 'Male speech, man speaking'),
 ('/m/02zsn', 'Female speech, woman speaking'),
 ('/m/0ytgt', 'Child speech, kid speaking'),
 ('/m/01h8n0', 'Conversation')]

In [10]:
def get_class(label_id):
    return [(label_dict.get(label, '')) for label in label_id]

#### Make dataframe with audio files indexed by YTID, adding in tag names

In [11]:
train_labels_df = pd.read_csv('../data/train_labels/train.csv')
train_labels_df['tags_k'] = train_labels_df['positive_labels'].apply(lambda x: x.split(','))
train_labels_df.head(5)

Unnamed: 0,YTID,start_seconds,end_seconds,positive_labels,tags_k
0,--PJHxphWEs,30.0,40.0,"/m/09x0r,/t/dd00088","[/m/09x0r, /t/dd00088]"
1,--aE2O5G5WE,0.0,10.0,"/m/03fwl,/m/04rlf,/m/09x0r","[/m/03fwl, /m/04rlf, /m/09x0r]"
2,--aaILOrkII,200.0,210.0,"/m/032s66,/m/073cg4","[/m/032s66, /m/073cg4]"
3,--cB2ZVjpnA,30.0,40.0,/m/01y3hg,[/m/01y3hg]
4,--ekDLDTUXA,30.0,40.0,"/m/015lz1,/m/07pws3f","[/m/015lz1, /m/07pws3f]"


In [12]:
# Match keys with english labels, count tags
train_labels_df['tags'] = train_labels_df['tags_k'].apply(get_class)
train_labels_df['num_keys'] = train_labels_df['tags_k'].apply(lambda x: len(x))
train_labels_df['num_tags'] = train_labels_df['tags'].apply(lambda x: len(x))
train_labels_df

Unnamed: 0,YTID,start_seconds,end_seconds,positive_labels,tags_k,tags,num_keys,num_tags
0,--PJHxphWEs,30.0,40.0,"/m/09x0r,/t/dd00088","[/m/09x0r, /t/dd00088]","[Speech, Gush]",2,2
1,--aE2O5G5WE,0.0,10.0,"/m/03fwl,/m/04rlf,/m/09x0r","[/m/03fwl, /m/04rlf, /m/09x0r]","[Goat, Music, Speech]",3,3
2,--aaILOrkII,200.0,210.0,"/m/032s66,/m/073cg4","[/m/032s66, /m/073cg4]","[Gunshot, gunfire, Cap gun]",2,2
3,--cB2ZVjpnA,30.0,40.0,/m/01y3hg,[/m/01y3hg],"[Smoke detector, smoke alarm]",1,1
4,--ekDLDTUXA,30.0,40.0,"/m/015lz1,/m/07pws3f","[/m/015lz1, /m/07pws3f]","[Singing, Bang]",2,2
...,...,...,...,...,...,...,...,...
19639,zyqg4pYEioQ,20.0,30.0,"/m/09x0r,/m/0llzx","[/m/09x0r, /m/0llzx]","[Speech, Sewing machine]",2,2
19640,zz0ddNfz0h0,30.0,40.0,"/m/012f08,/m/03cl9h,/m/07yv9,/m/0k4j","[/m/012f08, /m/03cl9h, /m/07yv9, /m/0k4j]","[Motor vehicle (road), Ice cream truck, ice cr...",4,4
19641,zz8TGV83nkE,80.0,90.0,"/m/012f08,/m/02mk9,/m/04_sv,/m/07yv9","[/m/012f08, /m/02mk9, /m/04_sv, /m/07yv9]","[Motor vehicle (road), Engine, Motorcycle, Veh...",4,4
19642,zzlK8KDqlr0,370.0,380.0,"/m/01m2v,/m/07qc9xj,/m/09x0r,/t/dd00125","[/m/01m2v, /m/07qc9xj, /m/09x0r, /t/dd00125]","[Computer keyboard, Clicking, Speech, Inside, ...",4,4


In [13]:
fn_label_dict = pd.Series(train_labels_df.tags.values,index=train_labels_df.YTID).to_dict()

def fn_get_tags(fn):
    return fn_label_dict.get(fn)

# fn_get_tags('zz8TGV83nkE')
# len(fn_get_tags('zz8TGV83nkE'))

### Display Audio Files with tags

In [14]:
if not os.path.exists('audio_files'):
    os.makedirs('audio_files')

# Display the i audio files
for i, blob in enumerate(blobs):
        if i>4:
            break
        print(f'File {i}: {blob.name}, {fn_get_tags(blob.name.lstrip("train_").rstrip(".wav"))}')
        # Download the audio file to a local file
        file_path = os.path.join('../data/temp_audio', blob.name.split('/')[-1])
        blob.download_to_filename(file_path)

        # Load the audio file with librosa
        y, sr = librosa.load(file_path, sr=None)

        # Display and play the audio file
        display(Audio(y, rate=sr))


File 0: train_--PJHxphWEs.wav, ['Speech', 'Gush']


File 1: train_--aE2O5G5WE.wav, ['Goat', 'Music', 'Speech']


File 2: train_--aaILOrkII.wav, ['Gunshot, gunfire', 'Cap gun']


File 3: train_--cB2ZVjpnA.wav, ['Smoke detector, smoke alarm']


File 4: train_--ekDLDTUXA.wav, ['Singing', 'Bang']


In [15]:
# Check tag counts
# train_labels_df[train_labels_df['num_keys'] != train_labels_df['num_tags']]

In [16]:
## Update JN to linguist-vendored

### Preprocessing / Scanning