In [None]:
# Install required packages
%pip install librosa soundfile tensorflow scikit-learn

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
from pathlib import Path
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [None]:
# Configuration
ESC50_PATH = Path("../ESC-50-master")
AUDIO_PATH = ESC50_PATH / "audio"
META_PATH = ESC50_PATH / "meta" / "esc50.csv"
MODEL_OUTPUT_DIR = Path("../data/models/sound_classification")

# Audio parameters
SAMPLE_RATE = 22050
DURATION = 5  # seconds
N_MFCC = 40
N_MELS = 128

# Security-relevant classes
SECURITY_CLASSES = [
    'glass_breaking',
    'door_wood_knock',
    'dog',
    'siren',
    'crying_baby',
    'footsteps',
    'car_horn',
    'clock_alarm'
]

## Load ESC-50 Dataset Metadata

In [None]:
# Load metadata
metadata = pd.read_csv(META_PATH)
print(f"Total samples: {len(metadata)}")
print(f"\nCategories:\n{metadata['category'].unique()}")

In [None]:
# Filter for security-relevant classes
security_data = metadata[metadata['category'].isin(SECURITY_CLASSES)]
print(f"\nSecurity-relevant samples: {len(security_data)}")
print(f"\nClass distribution:")
print(security_data['category'].value_counts())

## Audio Preprocessing

In [None]:
def extract_features(audio_path, sr=SAMPLE_RATE):
    """Extract MFCC and Mel spectrogram features from audio."""
    # Load audio
    y, sr = librosa.load(audio_path, sr=sr, duration=DURATION)
    
    # Pad or truncate to fixed length
    target_length = sr * DURATION
    if len(y) < target_length:
        y = np.pad(y, (0, target_length - len(y)))
    else:
        y = y[:target_length]
    
    # Extract MFCC
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC)
    
    # Extract Mel spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=N_MELS)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    
    return mfcc, mel_spec_db

In [None]:
# TODO: Process all audio files and create feature dataset
def create_dataset(metadata, audio_path):
    """Create feature dataset from audio files."""
    features = []
    labels = []
    
    for idx, row in metadata.iterrows():
        file_path = audio_path / row['filename']
        if file_path.exists():
            mfcc, mel_spec = extract_features(str(file_path))
            features.append(mel_spec)
            labels.append(row['category'])
    
    return np.array(features), np.array(labels)

# Uncomment to run:
# X, y = create_dataset(security_data, AUDIO_PATH)

## Build CNN Model

In [None]:
def build_model(input_shape, num_classes):
    """Build CNN model for audio classification."""
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=input_shape),
        
        # Conv block 1
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2, 2)),
        
        # Conv block 2
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2, 2)),
        
        # Conv block 3
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D((2, 2)),
        
        # Classifier
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

## Export for Raspberry Pi

In [None]:
def export_to_tflite(model, output_path):
    """Export model to TensorFlow Lite for Raspberry Pi."""
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    tflite_model = converter.convert()
    
    with open(output_path, 'wb') as f:
        f.write(tflite_model)
    
    print(f"TFLite model saved to: {output_path}")