Step 1. Unzip dataset

In [1]:
!unzip snoring.zip -d /content/snoring_data

Archive:  snoring.zip
  inflating: /content/snoring_data/Snoring Dataset/0/0_0.wav  
  inflating: /content/snoring_data/Snoring Dataset/0/0_1.wav  
  inflating: /content/snoring_data/Snoring Dataset/0/0_10.wav  
  inflating: /content/snoring_data/Snoring Dataset/0/0_100.wav  
  inflating: /content/snoring_data/Snoring Dataset/0/0_101.wav  
  inflating: /content/snoring_data/Snoring Dataset/0/0_102.wav  
  inflating: /content/snoring_data/Snoring Dataset/0/0_103.wav  
  inflating: /content/snoring_data/Snoring Dataset/0/0_104.wav  
  inflating: /content/snoring_data/Snoring Dataset/0/0_105.wav  
  inflating: /content/snoring_data/Snoring Dataset/0/0_106.wav  
  inflating: /content/snoring_data/Snoring Dataset/0/0_107.wav  
  inflating: /content/snoring_data/Snoring Dataset/0/0_108.wav  
  inflating: /content/snoring_data/Snoring Dataset/0/0_109.wav  
  inflating: /content/snoring_data/Snoring Dataset/0/0_11.wav  
  inflating: /content/snoring_data/Snoring Dataset/0/0_110.wav  
  inflati

Step 2. Imports + Feature Extraction

In [2]:
!pip install librosa tensorflow scikit-learn matplotlib

import os
import numpy as np
import librosa
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models



In [3]:
def extract_mfcc(file_path, n_mfcc=40, max_len=44):
    y_audio, sr = librosa.load(file_path, sr=8000, mono=True)  # downsample
    mfcc = librosa.feature.mfcc(y=y_audio, sr=sr, n_mfcc=n_mfcc)
    # pad or cut
    if mfcc.shape[1] < max_len:
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0,0),(0,pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :max_len]
    return mfcc


Step 3. Load dataset

In [8]:
DATASET_DIR = "/content/snoring_data"
classes = { "0": 0, "1": 1 }   # 0 = non-snore, 1 = snore

X, y = [], []

for label_name, label_id in classes.items():
    folder = os.path.join(DATASET_DIR, label_name)
    for file in os.listdir(folder):
        if file.endswith(".wav"):
            mfcc = extract_mfcc(os.path.join(folder, file))
            X.append(mfcc)
            y.append(label_id)

X = np.array(X)
y = np.array(y)

# CNN needs channels last
X = X[..., np.newaxis]

print("Dataset shape:", X.shape, "Labels:", y.shape)


Dataset shape: (1000, 40, 44, 1) Labels: (1000,)


Step 4. Train/Test Split

In [9]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


Step 5. CNN Model

In [10]:
model = models.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(40,44,1)),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Step 6. Train

In [11]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=32,
    verbose=1
)


Epoch 1/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 74ms/step - accuracy: 0.7222 - loss: 1.0748 - val_accuracy: 0.9533 - val_loss: 0.1619
Epoch 2/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 68ms/step - accuracy: 0.9452 - loss: 0.1333 - val_accuracy: 0.9933 - val_loss: 0.0485
Epoch 3/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 66ms/step - accuracy: 0.9980 - loss: 0.0301 - val_accuracy: 0.9867 - val_loss: 0.0408
Epoch 4/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 110ms/step - accuracy: 1.0000 - loss: 0.0176 - val_accuracy: 0.9867 - val_loss: 0.0341
Epoch 5/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 78ms/step - accuracy: 1.0000 - loss: 0.0077 - val_accuracy: 0.9867 - val_loss: 0.0175
Epoch 6/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 65ms/step - accuracy: 1.0000 - loss: 0.0035 - val_accuracy: 0.9933 - val_loss: 0.0176
Epoch 7/20
[1m22/22[0m [32m━━━

Step 7. Evaluate

In [12]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print("✅ Test Accuracy:", test_acc)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.9860 - loss: 0.0697
✅ Test Accuracy: 0.9800000190734863


Step 8. Real-time Inference

In [13]:
def predict_file(file_path):
    mfcc = extract_mfcc(file_path)
    mfcc = mfcc[np.newaxis, ..., np.newaxis]  # shape (1,40,44,1)
    prob = model.predict(mfcc)[0][0]
    label = "Snore" if prob > 0.5 else "Non-Snore"
    print(f"Prediction: {label} (Confidence: {prob:.2f})")
    return label, prob



In [24]:
predict_file("/content/snoring_data/1/1_103.wav")
#/content/snoring_data/0/0_105.wav
#/content/snoring_data/1/1_0.wav
#/content/snoring_data/0/0_0.wav

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
Prediction: Snore (Confidence: 1.00)


('Snore', np.float32(0.9999963))

In [19]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print("Final Test Accuracy:", test_acc)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.9860 - loss: 0.0697
Final Test Accuracy: 0.9800000190734863


In [25]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]  # quantized
tflite_model = converter.convert()

with open("snore_detector.tflite", "wb") as f:
    f.write(tflite_model)

print("✅ Saved model: snore_detector.tflite")


Saved artifact at '/tmp/tmp_0nr9ykk'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 40, 44, 1), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  132517373218640: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132517373215184: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132517373226896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132517373217488: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132517373219024: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132517373230160: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132517373218064: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132517369187408: TensorSpec(shape=(), dtype=tf.resource, name=None)
✅ Saved model: snore_detector.tflite
