In [None]:
import io
import zipfile



# Unzip the file.
zip_ref = zipfile.ZipFile("/content/drive/MyDrive/Copy of door_open_door_close_door_stop_max_1sec.zip")
zip_ref.extractall('/tmp')
zip_ref.close()

In [None]:
import os
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import models

from tensorflow.keras.utils import to_categorical

In [None]:
DATASET_PATH = '/tmp/hackathon'

data_dir = pathlib.Path(DATASET_PATH)
commands = np.array(tf.io.gfile.listdir(str(data_dir)))
commands = commands[(commands != 'README.md') & (commands != '.DS_Store')]
print('Commands Available:', commands)


Commands Available: ['door_open' 'door_close' 'door_stop']


In [None]:
train_ds, val_ds = tf.keras.utils.audio_dataset_from_directory(
    directory=data_dir,
    batch_size=64,
    validation_split=0.2,
    seed=0,
    output_sequence_length=16000,
    subset='both')

label_names = np.array(train_ds.class_names)
print()
print("label :", label_names)


Found 12674 files belonging to 3 classes.
Using 10140 files for training.
Using 2534 files for validation.

label : ['door_close' 'door_open' 'door_stop']


In [None]:
def squeeze(audio, labels):
  audio = tf.squeeze(audio, axis=-1)
  return audio, labels

train_ds = train_ds.map(squeeze, tf.data.AUTOTUNE)
val_ds = val_ds.map(squeeze, tf.data.AUTOTUNE)


In [None]:
test_ds = val_ds.shard(num_shards=2, index=0)
val_ds = val_ds.shard(num_shards=2, index=1)


In [None]:
def get_spectrogram(waveform):
  # Convert the waveform to a spectrogram via a STFT.
  spectrogram = tf.signal.stft(
      waveform, frame_length=255, frame_step=128)
  # Obtain the magnitude of the STFT.
  spectrogram = tf.abs(spectrogram)
  # Add a `channels` dimension so that the Spectrogram can be used
  spectrogram = spectrogram[..., tf.newaxis]
  return spectrogram


In [None]:
def make_spec_ds(ds):
  return ds.map(
      map_func=lambda audio,label: (get_spectrogram(audio), label),
      num_parallel_calls=tf.data.AUTOTUNE)

train_spectrogram_ds = make_spec_ds(train_ds)
val_spectrogram_ds = make_spec_ds(val_ds)
test_spectrogram_ds = make_spec_ds(test_ds)


In [None]:
model = models.Sequential()
model.add(layers.Conv2D(16, (3,3), activation='relu', input_shape=(124, 129, 1)))
model.add(layers.Conv2D(16, (3,3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(1, activation='softmax'))

model.summary()


Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_8 (Conv2D)           (None, 122, 127, 16)      160       
                                                                 
 conv2d_9 (Conv2D)           (None, 120, 125, 16)      2320      
                                                                 
 flatten_4 (Flatten)         (None, 240000)            0         
                                                                 
 dense_8 (Dense)             (None, 128)               30720128  
                                                                 
 dense_9 (Dense)             (None, 1)                 129       
                                                                 
Total params: 30722737 (117.20 MB)
Trainable params: 30722737 (117.20 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# Compiling and fitting the model
model.compile('Adam', loss='BinaryCrossentropy', metrics=[tf.keras.metrics.Recall(),tf.keras.metrics.Precision()])

model.fit(train_spectrogram_ds, epochs=10, validation_data=test_spectrogram_ds)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x790a9201a620>

In [None]:
# Prediction for a data

audio, labels = train_ds.take(2)

yhat = model.predict(audio)


In [None]:
for i in zip(audio,y_hat):
    print(i[0].shape)
    plt.figure(figsize=(30,20))
    plt.text(0, 0, "Predicted Class = "+str(i[1])[0],fontsize = 33,
         bbox = dict(facecolor = 'red', alpha = 0.2))
    plt.imshow(np.transpose(i[0])[0])
    plt.show()


In [None]:
model.save('my_model_CNN-1937.h5')

  saving_api.save_model(


Using librose ms

In [None]:
import librosa
import numpy as np


In [None]:
import os

directory_path = "/tmp/hackathon/"  # Replace with the actual path

# Option 1: Using os.listdir()
folders = os.listdir(directory_path)


In [None]:
pad2d = lambda a, i: a[:, 0: i] if a.shape[1] > i else np.hstack((a, np.zeros((a.shape[0],i - a.shape[1]))))

In [None]:
def process_audio_file(label,filename):
    # Load audio
    audio, sr = librosa.load(directory_path+label+"/"+filename)
    print(sr)
    print(audio.shape)
    # Preprocess audio (optional)
    audio = librosa.util.normalize(audio)  # Normalize volume

    # Extract features
    spectrogram = librosa.stft(audio)
    mfccs = librosa.feature.mfcc(y=audio, sr=sr)  # Example feature extraction

    return  mfccs, filename  # Return multiple features and filename

In [None]:
audio_data = []
labels = []
for labelname in folders:
  files=os.listdir(directory_path+labelname+"/")
  for filename in files:
      mfccs, label = process_audio_file(labelname,filename)
      padded_mfcc = pad2d(mfccs,40)
      audio_data.append(padded_mfcc)
      z=0
      if labelname=='door_close':
        z=1
      if labelname=='door_stop':
        z=2
      labels.append(z)  # Assuming labels are extracted from filenames



In [None]:
audio_data=np.array(audio_data)
labels=to_categorical(np.array(labels))

In [None]:
train_X = np.expand_dims(audio_data, -1)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(audio_data, labels, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, Input
from tensorflow.keras.models import Model


In [None]:

inputs = Input(shape=(X_train[0].shape))

# Recurrent layers for sequence processing
x = LSTM(64, return_sequences=True)(inputs)  # First LSTM layer
x = LSTM(32)(x)  # Second LSTM layer
x = LSTM(64, return_sequences=True, dropout=0.2)(inputs)  # Dropout rate of 20%
x = LSTM(32, dropout=0.2)(x)
# Output layer for categorical classification
outputs = Dense(3, activation='softmax')(x)

model = Model(inputs=inputs, outputs=outputs)


In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
model.fit(X_train, y_train, epochs=30, validation_data=(X_val, y_val))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x78cb77d110f0>

In [None]:
model=tf.keras.models.load_model('/content/my-lstm.h5')
data, lab = process_audio_file('door_open','62d61e14449ba87643e73792.wav')
data = pad2d(data,40)
out=model.predict(tf.expand_dims(data, axis=0) )
percentage_list = []
for i in range(len(out[0])):
    percentage_list.append("{0:.2%}".format(out[0][i]))
classes=['door_open','door_close','door_stop']
my_result = list(zip(classes, percentage_list))
for i in range(len(my_result)):
    print(my_result[i])

22050
(22050,)
('door_open', '100.00%')
('door_close', '0.00%')
('door_stop', '0.00%')


In [None]:
model.save("my-lstm.h5")

  saving_api.save_model(


In [None]:
from tensorflow import lite
model=tf.keras.models.load_model('/content/my-lstm.h5')
converter = lite.TFLiteConverter.from_keras_model( model ) # Your model's name
converter.target_spec.supported_ops = [
  tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops.
  tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops.
]
model = converter.convert()
file = open( 'my-lstm.tflite' , 'wb' )
file.write( model )

176624

In [None]:
interpreter = tf.lite.Interpreter(model_path="/content/my-lstm.tflite")

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()


In [None]:
interpreter.allocate_tensors()

print(input_details)
# output details
print(output_details)

[{'name': 'serving_default_input_7:0', 'index': 0, 'shape': array([ 1, 20, 40], dtype=int32), 'shape_signature': array([-1, 20, 40], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
[{'name': 'StatefulPartitionedCall:0', 'index': 58, 'shape': array([1, 3], dtype=int32), 'shape_signature': array([-1,  3], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


In [None]:
data, lab = process_audio_file('door_close','62d71c08449ba86a10f47158.wav')
data = pad2d(data,40)
interpreter.set_tensor(input_details[0]['index'], [data])

In [None]:
interpreter.invoke()

In [None]:
 # output_details[0]['index'] = the index which provides the input
output_data = interpreter.get_tensor(output_details[0]['index'])

output_data

array([[3.8604941e-03, 9.9537969e-01, 7.5987558e-04]], dtype=float32)