In [1]:
import os
import numpy as np
import pandas as pd
import librosa
import librosa.display
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import tqdm
from sklearn.preprocessing import LabelEncoder

2024-02-25 22:12:51.380001: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-25 22:12:51.407417: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-25 22:12:51.407443: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-25 22:12:51.408524: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-25 22:12:51.413737: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-25 22:12:51.414479: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [2]:
# create json dataset with format: 
# [
#   {
#     "path": "path/to/audio/file",
#     "label": "label"
#   },
#   ...
# ]
path = 'train/audio'
data = []

for label in os.listdir(path):
    for file in os.listdir(f'{path}/{label}'):
        if file == 'README.md':
            continue
        data.append({
            'path': f'{path}/{label}/{file}',
            'label': label
        })

df = pd.DataFrame(data)
df

Unnamed: 0,path,label
0,train/audio/_background_noise_/white_noise.wav,_background_noise_
1,train/audio/_background_noise_/running_tap.wav,_background_noise_
2,train/audio/_background_noise_/dude_miaowing.wav,_background_noise_
3,train/audio/_background_noise_/pink_noise.wav,_background_noise_
4,train/audio/_background_noise_/exercise_bike.wav,_background_noise_
...,...,...
64722,train/audio/zero/3ff840aa_nohash_0.wav,zero
64723,train/audio/zero/db7c95b0_nohash_0.wav,zero
64724,train/audio/zero/637c702a_nohash_1.wav,zero
64725,train/audio/zero/9e42ae25_nohash_1.wav,zero


In [3]:
def audio_to_spectrogram(file_path, max_pad_len=174):
    wave, sr = librosa.load(file_path, mono=True, sr=None)
    mfcc = librosa.feature.mfcc(y=wave, sr=sr, n_mfcc=20)
    pad_width = max_pad_len - mfcc.shape[1]
    if pad_width > 0:
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :max_pad_len]
    return mfcc

In [4]:
df

Unnamed: 0,path,label
0,train/audio/_background_noise_/white_noise.wav,_background_noise_
1,train/audio/_background_noise_/running_tap.wav,_background_noise_
2,train/audio/_background_noise_/dude_miaowing.wav,_background_noise_
3,train/audio/_background_noise_/pink_noise.wav,_background_noise_
4,train/audio/_background_noise_/exercise_bike.wav,_background_noise_
...,...,...
64722,train/audio/zero/3ff840aa_nohash_0.wav,zero
64723,train/audio/zero/db7c95b0_nohash_0.wav,zero
64724,train/audio/zero/637c702a_nohash_1.wav,zero
64725,train/audio/zero/9e42ae25_nohash_1.wav,zero


In [5]:
from tqdm.auto import tqdm
tqdm.pandas()

# Apply the function with a progress bar
df['spectrogram'] = df['path'].progress_apply(audio_to_spectrogram)

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 64727/64727 [03:14<00:00, 332.03it/s]


In [6]:
X = np.array(df['spectrogram'].tolist())
X = X[..., np.newaxis]
y = np.array(df['label'].tolist())

# Assuming `y_train` and `y_test` are your categorical labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [7]:
from tensorflow.keras.models import load_model

model = load_model('model.h5')


In [8]:
X[0].shape

(20, 174, 1)

In [9]:
y

array(['_background_noise_', '_background_noise_', '_background_noise_',
       ..., 'zero', 'zero', 'zero'], dtype='<U18')

In [10]:
prediction = model.predict(np.expand_dims(X[0], axis=0))



In [11]:
predicted_class = np.argmax(prediction, axis=1)
print(f"Predicted class: {predicted_class}")

# If you have label encoder, you can decode this prediction back to original label
# decoded_label = label_encoder.inverse_transform(predicted_class)
# print(f"Predicted label: {decoded_label}")

Predicted class: [27]


In [12]:
label_encoder.inverse_transform(predicted_class)

array(['up'], dtype='<U18')

# full test

In [13]:
path = 'test/audio'
data = []

for file in os.listdir(f'{path}'):
    if file == 'README.md':
        continue
    data.append({
        'path': f'{path}/{file}',
    })

df = pd.DataFrame(data)
df

Unnamed: 0,path
0,test/audio/clip_726cc12df.wav
1,test/audio/clip_f442b53d8.wav
2,test/audio/clip_ccb6035b1.wav
3,test/audio/clip_fb3a83674.wav
4,test/audio/clip_006f5bc7b.wav
...,...
158533,test/audio/clip_42eb33be8.wav
158534,test/audio/clip_6bb7e3b4c.wav
158535,test/audio/clip_27a90a0cd.wav
158536,test/audio/clip_c6cc2bbe0.wav


In [14]:
from tqdm.auto import tqdm
tqdm.pandas()

# Apply the function with a progress bar
df['spectrogram'] = df['path'].progress_apply(audio_to_spectrogram)

100%|██████████| 158538/158538 [07:44<00:00, 341.29it/s]


In [15]:
X_test = np.array(df['spectrogram'].tolist())
X_test = X_test[..., np.newaxis]

In [19]:
X_test.shape

(158538, 20, 174, 1)

In [24]:
predictions = model.predict(X_test)
predicted_classes = np.argmax(predictions, axis=1)
predicted = label_encoder.inverse_transform(predicted_classes)

2024-02-25 22:41:16.113804: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 2206848960 exceeds 10% of free system memory.




In [26]:
predicted.shape

(158538,)

In [27]:
df['PredictedLabel'] = predicted

In [29]:
# extract the file name from the path
df['file'] = df['path'].apply(lambda x: x.split('/')[-1])

In [33]:
df[['file', 'PredictedLabel']].rename(columns={'file': 'fname', 'PredictedLabel': 'label'}).to_csv('submission.csv', index=False)

In [None]:


# Save to CSV
df.to_csv('predictions.csv', index=False)

In [None]:
print('hello')

In [None]:
!rm -r test