In [1]:
"""
Abnormal Sound Detection model
Detects car crashes, accidents, gunshots, and other abnormal sounds
Author: AI Assistant
Date: 2024
"""

'\nAbnormal Sound Detection model\nDetects car crashes, accidents, gunshots, and other abnormal sounds\nAuthor: AI Assistant\nDate: 2024\n'

In [2]:


import os
import numpy as np
import pandas as pd
import librosa
import librosa.display
import soundfile as sf
import wave
import scipy.io.wavfile as wavf
from scipy import signal
from scipy.stats import skew, kurtosis
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import (classification_report, confusion_matrix, 
                           accuracy_score, precision_recall_fscore_support,
                           roc_auc_score, roc_curve)
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE, ADASYN
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
import warnings
warnings.filterwarnings('ignore')


In [3]:

# For deep learning
try:
    import tensorflow as tf
    from tensorflow.keras import layers, models, callbacks
    TF_AVAILABLE = True
except ImportError:
    TF_AVAILABLE = False
    print("TensorFlow not available. Deep learning models will be disabled.")


TensorFlow not available. Deep learning models will be disabled.


In [25]:


class SoundDetectionConfig:
    """Configuration parameters for the sound detection system"""
    
    def __init__(self):
        # Audio processing
        self.SAMPLE_RATE = 22050  # Standard for sound detection
        self.DURATION = 4.0  # Duration of each audio clip in seconds
        self.N_MFCC = 40  # Number of MFCC coefficients
        self.N_MELS = 128  # Number of mel bands
        self.N_FFT = 2048  # FFT window size
        self.HOP_LENGTH = 512  # Hop length for STFT
        
        # Feature extraction
        self.ZCR_FRAME_LENGTH = 2048
        self.ZCR_HOP_LENGTH = 512
        self.RMS_FRAME_LENGTH = 2048
        self.RMS_HOP_LENGTH = 512
        
        # Model parameters
        self.TEST_SIZE = 0.2
        self.VAL_SIZE = 0.1
        self.RANDOM_STATE = 42
        
        # Classes to detect
        self.CLASSES = [
            'background',  # 0
            'bumping',     # 1 - from your original data
            'speech',      # 2 - from your original data
            'gunshot',     # 3 - new
            'car_crash',   # 4 - new
            'scream',      # 5 - new
            'explosion',   # 6 - new
            'glass_breaking', # 7 - new
            'alarm'        # 8 - new
        ]
        
        # Feature selection
        self.USE_MFCC = True
        self.USE_MEL_SPECTROGRAM = True
        self.USE_CHROMA = True
        self.USE_TONNETZ = True
        self.USE_SPECTRAL_FEATURES = True
        self.USE_ZCR = True
        self.USE_RMS = True
        self.USE_TEMPORAL_FEATURES = True

config = SoundDetectionConfig()
print(type(config))
print(vars(config))



<class '__main__.SoundDetectionConfig'>
{'SAMPLE_RATE': 22050, 'DURATION': 4.0, 'N_MFCC': 40, 'N_MELS': 128, 'N_FFT': 2048, 'HOP_LENGTH': 512, 'ZCR_FRAME_LENGTH': 2048, 'ZCR_HOP_LENGTH': 512, 'RMS_FRAME_LENGTH': 2048, 'RMS_HOP_LENGTH': 512, 'TEST_SIZE': 0.2, 'VAL_SIZE': 0.1, 'RANDOM_STATE': 42, 'CLASSES': ['background', 'bumping', 'speech', 'gunshot', 'car_crash', 'scream', 'explosion', 'glass_breaking', 'alarm'], 'USE_MFCC': True, 'USE_MEL_SPECTROGRAM': True, 'USE_CHROMA': True, 'USE_TONNETZ': True, 'USE_SPECTRAL_FEATURES': True, 'USE_ZCR': True, 'USE_RMS': True, 'USE_TEMPORAL_FEATURES': True}


In [26]:
class AdvancedAudioFeatureExtractor:
    """Extracts comprehensive audio features for abnormal sound detection"""
    
    def __init__(self, config):
        self.config = config
        self.sample_rate = config.sample_rate
        self.n_mfcc = config.n_mfcc
        self.n_fft = config.n_fft
        self.hop_length = config.hop_length
        
    def extract_all_features(self, audio_path=None, audio_array=None, sr=None):
        """
        Extract all audio features from a file or array
        """
        if audio_path:
            audio, sr = librosa.load(audio_path, sr=self.config.SAMPLE_RATE, 
                                   duration=self.config.DURATION)
        elif audio_array is not None:
            audio = audio_array
            if sr is None:
                sr = self.config.SAMPLE_RATE
        else:
            raise ValueError("Either audio_path or audio_array must be provided")
        
        # Ensure audio is proper length
        target_length = int(self.config.DURATION * sr)
        if len(audio) > target_length:
            audio = audio[:target_length]
        elif len(audio) < target_length:
            audio = np.pad(audio, (0, target_length - len(audio)), mode='constant')
        
        features = {}
        
        # 1. MFCC Features (your existing approach, enhanced)
        if self.config.USE_MFCC:
            mfccs = librosa.feature.mfcc(y=audio, sr=sr, 
                                        n_mfcc=self.config.N_MFCC,
                                        n_fft=self.config.N_FFT,
                                        hop_length=self.config.HOP_LENGTH)
            features['mfcc_mean'] = np.mean(mfccs, axis=1)
            features['mfcc_std'] = np.std(mfccs, axis=1)
            features['mfcc_skew'] = skew(mfccs, axis=1)
            features['mfcc_delta'] = np.mean(librosa.feature.delta(mfccs), axis=1)
            features['mfcc_delta2'] = np.mean(librosa.feature.delta(mfccs, order=2), axis=1)
        
        # 2. Mel-Spectrogram Features
        if self.config.USE_MEL_SPECTROGRAM:
            mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr,
                                                    n_mels=self.config.N_MELS,
                                                    n_fft=self.config.N_FFT,
                                                    hop_length=self.config.HOP_LENGTH)
            mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
            features['mel_mean'] = np.mean(mel_spec_db, axis=1)
            features['mel_std'] = np.std(mel_spec_db, axis=1)
            features['mel_flux'] = np.mean(np.diff(mel_spec_db, axis=1), axis=1)
        
        # 3. Chroma Features (for harmonic content)
        if self.config.USE_CHROMA:
            chroma_stft = librosa.feature.chroma_stft(y=audio, sr=sr,
                                                    n_fft=self.config.N_FFT,
                                                    hop_length=self.config.HOP_LENGTH)
            features['chroma_mean'] = np.mean(chroma_stft, axis=1)
            features['chroma_std'] = np.std(chroma_stft, axis=1)
            features['chroma_cqt'] = np.mean(librosa.feature.chroma_cqt(y=audio, sr=sr), axis=1)
        
        # 4. Spectral Features
        if self.config.USE_SPECTRAL_FEATURES:
            spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)
            spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr)
            spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)
            spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)
            
            features['spectral_centroid'] = np.mean(spectral_centroid)
            features['spectral_bandwidth'] = np.mean(spectral_bandwidth)
            features['spectral_rolloff'] = np.mean(spectral_rolloff)
            features['spectral_contrast'] = np.mean(spectral_contrast, axis=1)
            
            # Zero-crossing rate (important for abrupt sounds)
            if self.config.USE_ZCR:
                zcr = librosa.feature.zero_crossing_rate(y=audio,
                                                        frame_length=self.config.ZCR_FRAME_LENGTH,
                                                        hop_length=self.config.ZCR_HOP_LENGTH)
                features['zcr_mean'] = np.mean(zcr)
                features['zcr_std'] = np.std(zcr)
                features['zcr_max'] = np.max(zcr)
        
        # 5. RMS Energy (for loudness detection)
        if self.config.USE_RMS:
            rms = librosa.feature.rms(y=audio,
                                    frame_length=self.config.RMS_FRAME_LENGTH,
                                    hop_length=self.config.RMS_HOP_LENGTH)
            features['rms_mean'] = np.mean(rms)
            features['rms_std'] = np.std(rms)
            features['rms_max'] = np.max(rms)
            features['rms_ratio'] = np.max(rms) / (np.mean(rms) + 1e-8)  # Peak-to-average ratio
        
        # 6. Temporal Features
        if self.config.USE_TEMPORAL_FEATURES:
            # Attack time (time to reach peak)
            envelope = np.abs(librosa.core.stft(audio, n_fft=1024, hop_length=256))
            envelope = np.mean(envelope, axis=0)
            peak_idx = np.argmax(envelope)
            attack_time = peak_idx / (sr / 256)  # Convert to seconds
            features['attack_time'] = attack_time
            
            # Decay time (time from peak to sustain level)
            sustain_level = np.mean(envelope[peak_idx:peak_idx + 10])
            decay_threshold = 0.1  # 10% of peak
            decay_idx = np.where(envelope[peak_idx:] < decay_threshold * np.max(envelope))[0]
            if len(decay_idx) > 0:
                decay_time = decay_idx[0] / (sr / 256)
            else:
                decay_time = len(envelope) - peak_idx / (sr / 256)
            features['decay_time'] = decay_time
        
        # 7. Tonnetz Features (tonal content)
        if self.config.USE_TONNETZ:
            tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(audio), sr=sr)
            features['tonnetz_mean'] = np.mean(tonnetz, axis=1)
        
        # 8. Additional statistical features
        features['kurtosis'] = kurtosis(audio)
        features['skewness'] = skew(audio)
        features['crest_factor'] = np.max(np.abs(audio)) / (np.sqrt(np.mean(audio**2)) + 1e-8)
        
        # Flatten all features into a single vector
        feature_vector = []
        for key in sorted(features.keys()):
            if isinstance(features[key], np.ndarray):
                feature_vector.extend(features[key].flatten())
            else:
                feature_vector.append(features[key])
        
        return np.array(feature_vector), features
    
    def create_feature_dataframe(self, audio_paths, labels):
        """
        Create a DataFrame of features from multiple audio files
        """
        features_list = []
        labels_list = []
        
        print("Extracting features from audio files...")
        for i, (path, label) in enumerate(zip(audio_paths, labels)):
            if i % 50 == 0:
                print(f"Processing file {i}/{len(audio_paths)}...")
            
            try:
                features, _ = self.extract_all_features(audio_path=path)
                features_list.append(features)
                labels_list.append(label)
            except Exception as e:
                print(f"Error processing {path}: {e}")
                continue
        
        # Create DataFrame
        X = np.vstack(features_list)
        y = np.array(labels_list)
        
        # Normalize features
        X_scaled = self.scaler.fit_transform(X)
        
        return X_scaled, y


In [27]:
# ============================================================================
# 3. DATA PREPROCESSING AND AUGMENTATION
# ============================================================================

class AudioDataPreprocessor:
    """Handles audio data preprocessing and augmentation"""
    
    @staticmethod
    def augment_audio(audio, sr, augment_type='all'):
        """
        Apply audio augmentation techniques
        """
        augmented = audio.copy()
        
        if augment_type == 'all' or augment_type == 'noise':
            # Add Gaussian noise
            noise = np.random.randn(len(audio)) * 0.005 * np.max(audio)
            augmented = augmented + noise
        
        if augment_type == 'all' or augment_type == 'shift':
            # Time shifting
            shift = int(np.random.uniform(-sr//10, sr//10))
            if shift > 0:
                augmented = np.pad(augmented, (shift, 0), mode='constant')[0:len(audio)]
            else:
                augmented = np.pad(augmented, (0, -shift), mode='constant')[shift:]
        
        if augment_type == 'all' or augment_type == 'pitch':
            # Pitch shifting (±2 semitones)
            n_steps = np.random.uniform(-2, 2)
            augmented = librosa.effects.pitch_shift(augmented, sr=sr, n_steps=n_steps)
        
        if augment_type == 'all' or augment_type == 'speed':
            # Speed perturbation (0.9x to 1.1x)
            speed_factor = np.random.uniform(0.9, 1.1)
            augmented = librosa.effects.time_stretch(augmented, rate=speed_factor)
            
            # Ensure same length
            if len(augmented) > len(audio):
                augmented = augmented[:len(audio)]
            else:
                augmented = np.pad(augmented, (0, len(audio) - len(augmented)), mode='constant')
        
        return augmented
    
    @staticmethod
    def balance_dataset(X, y, target_samples_per_class=1000):
        """
        Balance dataset using oversampling and undersampling
        """
        from collections import Counter
        
        class_counts = Counter(y)
        print(f"Original class distribution: {class_counts}")
        
        # Oversample minority classes
        smote = SMOTE(random_state=42, sampling_strategy='auto')
        X_resampled, y_resampled = smote.fit_resample(X, y)
        
        # Undersample majority classes if still imbalanced
        rus = RandomUnderSampler(random_state=42, sampling_strategy='auto')
        X_balanced, y_balanced = rus.fit_resample(X_resampled, y_resampled)
        
        balanced_counts = Counter(y_balanced)
        print(f"Balanced class distribution: {balanced_counts}")
        
        return X_balanced, y_balanced


In [28]:

# ============================================================================
# 4. MODEL TRAINING PIPELINE
# ============================================================================

class AbnormalSoundDetector:
    """Main class for abnormal sound detection"""
    
    def __init__(self, config):
        self.config = config
        self.feature_extractor = AdvancedAudioFeatureExtractor(config)
        self.preprocessor = AudioDataPreprocessor()
        self.models = {}
        self.label_encoder = LabelEncoder()
        
    def prepare_data(self, data_directory=None, audio_paths=None, labels=None):
        """
        Prepare data for training
        """
        if data_directory:
            # Load data from directory structure
            audio_paths = []
            labels = []
            
            for class_idx, class_name in enumerate(self.config.CLASSES):
                class_dir = os.path.join(data_directory, class_name)
                if os.path.exists(class_dir):
                    for file in os.listdir(class_dir):
                        if file.endswith(('.wav', '.mp3', '.flac')):
                            audio_paths.append(os.path.join(class_dir, file))
                            labels.append(class_idx)
        
        # Extract features
        X, y = self.feature_extractor.create_feature_dataframe(audio_paths, labels)
        
        # Encode labels
        y_encoded = self.label_encoder.fit_transform(y)
        
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y_encoded, test_size=self.config.TEST_SIZE, 
            random_state=self.config.RANDOM_STATE, stratify=y_encoded
        )
        
        # Further split for validation
        X_train, X_val, y_train, y_val = train_test_split(
            X_train, y_train, test_size=self.config.VAL_SIZE, 
            random_state=self.config.RANDOM_STATE, stratify=y_train
        )
        
        print(f"Training samples: {X_train.shape[0]}")
        print(f"Validation samples: {X_val.shape[0]}")
        print(f"Testing samples: {X_test.shape[0]}")
        
        return X_train, X_val, X_test, y_train, y_val, y_test
    
    def train_ensemble_model(self, X_train, y_train, X_val, y_val):
        """
        Train an ensemble of models for better performance
        """
        print("\n" + "="*60)
        print("TRAINING ENSEMBLE MODEL")
        print("="*60)
        
        # 1. Random Forest (your existing model, enhanced)
        print("\n1. Training Random Forest...")
        rf_params = {
            'n_estimators': [100, 200, 300],
            'max_depth': [10, 20, 30, None],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4],
            'class_weight': ['balanced', 'balanced_subsample']
        }
        
        rf = RandomForestClassifier(random_state=self.config.RANDOM_STATE, n_jobs=-1)
        rf_grid = GridSearchCV(rf, rf_params, cv=3, scoring='f1_weighted', verbose=1)
        rf_grid.fit(X_train, y_train)
        self.models['random_forest'] = rf_grid.best_estimator_
        print(f"Best RF params: {rf_grid.best_params_}")
        print(f"RF Validation Score: {rf_grid.best_score_:.4f}")
        
        # 2. Gradient Boosting
        print("\n2. Training Gradient Boosting...")
        gb_params = {
            'n_estimators': [100, 200],
            'learning_rate': [0.01, 0.1, 0.2],
            'max_depth': [3, 5, 7]
        }
        
        gb = GradientBoostingClassifier(random_state=self.config.RANDOM_STATE)
        gb_grid = GridSearchCV(gb, gb_params, cv=3, scoring='f1_weighted', verbose=1)
        gb_grid.fit(X_train, y_train)
        self.models['gradient_boosting'] = gb_grid.best_estimator_
        print(f"Best GB params: {gb_grid.best_params_}")
        print(f"GB Validation Score: {gb_grid.best_score_:.4f}")
        
        # 3. Support Vector Machine
        print("\n3. Training SVM...")
        svm_params = {
            'C': [0.1, 1, 10, 100],
            'kernel': ['rbf', 'poly'],
            'gamma': ['scale', 'auto'],
            'class_weight': ['balanced']
        }
        
        svc = SVC(random_state=self.config.RANDOM_STATE, probability=True)
        svm_grid = GridSearchCV(svc, svm_params, cv=3, scoring='f1_weighted', verbose=1)
        svm_grid.fit(X_train, y_train)
        self.models['svm'] = svm_grid.best_estimator_
        print(f"Best SVM params: {svm_grid.best_params_}")
        print(f"SVM Validation Score: {svm_grid.best_score_:.4f}")
        
        # 4. Neural Network
        print("\n4. Training Neural Network...")
        nn_params = {
            'hidden_layer_sizes': [(100,), (100, 50), (200, 100)],
            'activation': ['relu', 'tanh'],
            'alpha': [0.0001, 0.001, 0.01],
            'learning_rate_init': [0.001, 0.01]
        }
        
        nn = MLPClassifier(random_state=self.config.RANDOM_STATE, max_iter=500)
        nn_grid = GridSearchCV(nn, nn_params, cv=3, scoring='f1_weighted', verbose=1)
        nn_grid.fit(X_train, y_train)
        self.models['neural_network'] = nn_grid.best_estimator_
        print(f"Best NN params: {nn_grid.best_params_}")
        print(f"NN Validation Score: {nn_grid.best_score_:.4f}")
        
        # 5. Deep Learning Model (if TensorFlow available)
        if TF_AVAILABLE and len(X_train) > 1000:
            print("\n5. Training Deep Learning Model...")
            self.models['deep_learning'] = self._build_deep_learning_model(X_train.shape[1], 
                                                                         len(self.config.CLASSES))
            self._train_deep_learning_model(self.models['deep_learning'], 
                                          X_train, y_train, X_val, y_val)
        
        # Evaluate on validation set
        print("\n" + "="*60)
        print("VALIDATION SET PERFORMANCE")
        print("="*60)
        
        for name, model in self.models.items():
            if name != 'deep_learning':  # Deep learning evaluated separately
                y_val_pred = model.predict(X_val)
                accuracy = accuracy_score(y_val, y_val_pred)
                print(f"{name.upper()}: Accuracy = {accuracy:.4f}")
    
    def _build_deep_learning_model(self, input_dim, num_classes):
        """Build a deep neural network for sound classification"""
        model = models.Sequential([
            layers.Dense(256, activation='relu', input_dim=input_dim),
            layers.BatchNormalization(),
            layers.Dropout(0.3),
            
            layers.Dense(128, activation='relu'),
            layers.BatchNormalization(),
            layers.Dropout(0.3),
            
            layers.Dense(64, activation='relu'),
            layers.BatchNormalization(),
            layers.Dropout(0.2),
            
            layers.Dense(num_classes, activation='softmax')
        ])
        
        model.compile(optimizer='adam',
                     loss='sparse_categorical_crossentropy',
                     metrics=['accuracy'])
        
        return model
    
    def _train_deep_learning_model(self, model, X_train, y_train, X_val, y_val):
        """Train the deep learning model"""
        early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, 
                                                restore_best_weights=True)
        reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                                               patience=5, min_lr=0.00001)
        
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=100,
            batch_size=32,
            callbacks=[early_stopping, reduce_lr],
            verbose=1
        )
        
        return history
    
    def evaluate_ensemble(self, X_test, y_test):
        """
        Evaluate the ensemble of models
        """
        print("\n" + "="*60)
        print("ENSEMBLE EVALUATION ON TEST SET")
        print("="*60)
        
        # Get predictions from all models
        predictions = {}
        probabilities = {}
        
        for name, model in self.models.items():
            if name == 'deep_learning' and TF_AVAILABLE:
                proba = model.predict(X_test)
                predictions[name] = np.argmax(proba, axis=1)
                probabilities[name] = proba
            elif name != 'deep_learning':
                predictions[name] = model.predict(X_test)
                probabilities[name] = model.predict_proba(X_test)
        
        # 1. Individual model performance
        print("\nINDIVIDUAL MODEL PERFORMANCE:")
        print("-" * 40)
        
        for name, pred in predictions.items():
            accuracy = accuracy_score(y_test, pred)
            precision, recall, f1, _ = precision_recall_fscore_support(y_test, pred, 
                                                                      average='weighted')
            print(f"\n{name.upper()}:")
            print(f"  Accuracy:  {accuracy:.4f}")
            print(f"  Precision: {precision:.4f}")
            print(f"  Recall:    {recall:.4f}")
            print(f"  F1-Score:  {f1:.4f}")
            
            # Detailed classification report
            print("\n  Classification Report:")
            report = classification_report(y_test, pred, 
                                         target_names=self.config.CLASSES)
            for line in report.split('\n'):
                print(f"    {line}")
        
        # 2. Ensemble prediction (voting)
        print("\n" + "="*60)
        print("ENSEMBLE VOTING PERFORMANCE")
        print("="*60)
        
        # Weighted voting based on validation performance
        ensemble_pred = np.zeros((len(X_test), len(self.config.CLASSES)))
        
        for name, proba in probabilities.items():
            if name == 'random_forest':
                weight = 0.35  # RF usually performs well
            elif name == 'gradient_boosting':
                weight = 0.25
            elif name == 'svm':
                weight = 0.20
            elif name == 'neural_network':
                weight = 0.15
            elif name == 'deep_learning':
                weight = 0.05
            else:
                weight = 0.10
            
            ensemble_pred += weight * proba
        
        final_predictions = np.argmax(ensemble_pred, axis=1)
        
        # Evaluate ensemble
        accuracy = accuracy_score(y_test, final_predictions)
        precision, recall, f1, _ = precision_recall_fscore_support(y_test, final_predictions,
                                                                  average='weighted')
        
        print(f"\nENSEMBLE RESULTS:")
        print(f"Accuracy:  {accuracy:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall:    {recall:.4f}")
        print(f"F1-Score:  {f1:.4f}")
        
        # Confusion matrix
        print("\nConfusion Matrix:")
        cm = confusion_matrix(y_test, final_predictions)
        self._plot_confusion_matrix(cm)
        
        # Detailed classification report
        print("\nDetailed Classification Report:")
        print(classification_report(y_test, final_predictions, 
                                  target_names=self.config.CLASSES))
        
        return final_predictions, ensemble_pred
    
    def _plot_confusion_matrix(self, cm):
        """Plot confusion matrix"""
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                   xticklabels=self.config.CLASSES,
                   yticklabels=self.config.CLASSES)
        plt.title('Confusion Matrix')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.tight_layout()
        plt.show()
    
    def predict_single_audio(self, audio_path, threshold=0.7):
        """
        Predict class for a single audio file
        """
        # Extract features
        features, _ = self.feature_extractor.extract_all_features(audio_path=audio_path)
        features = self.feature_extractor.scaler.transform(features.reshape(1, -1))
        
        # Get predictions from all models
        probabilities = []
        
        for name, model in self.models.items():
            if name == 'deep_learning' and TF_AVAILABLE:
                proba = model.predict(features)[0]
            elif name != 'deep_learning':
                proba = model.predict_proba(features)[0]
            else:
                continue
            probabilities.append(proba)
        
        # Weighted ensemble probability
        weights = [0.35, 0.25, 0.20, 0.15, 0.05]  # Same as in evaluate_ensemble
        ensemble_proba = np.zeros(len(self.config.CLASSES))
        
        for i, proba in enumerate(probabilities[:len(weights)]):
            ensemble_proba += weights[i] * proba
        
        # Get prediction
        predicted_class_idx = np.argmax(ensemble_proba)
        confidence = ensemble_proba[predicted_class_idx]
        
        # Check if confidence meets threshold
        if confidence < threshold:
            print(f"Warning: Low confidence ({confidence:.2f}). Sound may be ambiguous.")
        
        predicted_class = self.config.CLASSES[predicted_class_idx]
        
        # Get top 3 predictions
        top_indices = np.argsort(ensemble_proba)[-3:][::-1]
        top_classes = [self.config.CLASSES[i] for i in top_indices]
        top_confidences = [ensemble_proba[i] for i in top_indices]
        
        return {
            'predicted_class': predicted_class,
            'confidence': confidence,
            'top_predictions': list(zip(top_classes, top_confidences)),
            'all_probabilities': dict(zip(self.config.CLASSES, ensemble_proba))
        }
    
    def real_time_detection(self, audio_stream, window_duration=1.0, overlap=0.5):
        """
        Real-time abnormal sound detection from audio stream
        """
        import queue
        import threading
        
        sr = self.config.SAMPLE_RATE
        window_samples = int(window_duration * sr)
        hop_samples = int((1 - overlap) * window_samples)
        
        # Buffer for audio stream
        audio_buffer = queue.Queue()
        detection_results = queue.Queue()
        
        def process_audio():
            while True:
                # Get audio chunk from buffer
                audio_chunk = audio_buffer.get()
                if audio_chunk is None:  # Termination signal
                    break
                
                # Extract features and predict
                features, _ = self.feature_extractor.extract_all_features(
                    audio_array=audio_chunk, sr=sr
                )
                features = self.feature_extractor.scaler.transform(features.reshape(1, -1))
                
                # Get ensemble prediction
                ensemble_proba = np.zeros(len(self.config.CLASSES))
                weights = [0.35, 0.25, 0.20, 0.15, 0.05]
                
                for i, (name, model) in enumerate(self.models.items()):
                    if i >= len(weights):
                        break
                    
                    if name == 'deep_learning' and TF_AVAILABLE:
                        proba = model.predict(features)[0]
                    elif name != 'deep_learning':
                        proba = model.predict_proba(features)[0]
                    else:
                        continue
                    
                    ensemble_proba += weights[i] * proba
                
                # Check for abnormal sounds (classes 3-8)
                abnormal_classes = self.config.CLASSES[3:]
                abnormal_indices = [self.config.CLASSES.index(c) for c in abnormal_classes]
                abnormal_prob = np.sum(ensemble_proba[abnormal_indices])
                
                if abnormal_prob > 0.5:  # Threshold for abnormality
                    predicted_idx = np.argmax(ensemble_proba)
                    predicted_class = self.config.CLASSES[predicted_idx]
                    confidence = ensemble_proba[predicted_idx]
                    
                    detection_results.put({
                        'timestamp': time.time(),
                        'class': predicted_class,
                        'confidence': confidence,
                        'abnormal': True,
                        'all_probabilities': ensemble_proba
                    })
                
                audio_buffer.task_done()
        
        # Start processing thread
        processor_thread = threading.Thread(target=process_audio)
        processor_thread.start()
        
        # Return buffers for external use
        return audio_buffer, detection_results, processor_thread


In [29]:
# ============================================================================
# 5. VISUALIZATION AND ANALYSIS TOOLS
# ============================================================================

class SoundVisualizer:
    """Visualization tools for audio analysis"""
    
    @staticmethod
    def plot_audio_features(audio_path, config):
        """Plot various audio features for analysis"""
        audio, sr = librosa.load(audio_path, sr=config.SAMPLE_RATE)
        
        fig, axes = plt.subplots(3, 2, figsize=(15, 10))
        
        # 1. Waveform
        axes[0, 0].plot(np.arange(len(audio)) / sr, audio)
        axes[0, 0].set_title('Waveform')
        axes[0, 0].set_xlabel('Time (s)')
        axes[0, 0].set_ylabel('Amplitude')
        
        # 2. Spectrogram
        D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
        img = librosa.display.specshow(D, y_axis='log', x_axis='time', sr=sr, ax=axes[0, 1])
        axes[0, 1].set_title('Spectrogram')
        fig.colorbar(img, ax=axes[0, 1], format='%+2.0f dB')
        
        # 3. MFCCs
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
        img = librosa.display.specshow(mfccs, x_axis='time', sr=sr, ax=axes[1, 0])
        axes[1, 0].set_title('MFCCs')
        fig.colorbar(img, ax=axes[1, 0])
        
        # 4. Mel-spectrogram
        mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
        img = librosa.display.specshow(mel_spec_db, y_axis='mel', x_axis='time', 
                                      sr=sr, ax=axes[1, 1])
        axes[1, 1].set_title('Mel-spectrogram')
        fig.colorbar(img, ax=axes[1, 1], format='%+2.0f dB')
        
        # 5. Chromagram
        chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
        img = librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', 
                                      sr=sr, ax=axes[2, 0])
        axes[2, 0].set_title('Chromagram')
        fig.colorbar(img, ax=axes[2, 0])
        
        # 6. Spectral contrast
        contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)
        img = librosa.display.specshow(contrast, x_axis='time', sr=sr, ax=axes[2, 1])
        axes[2, 1].set_title('Spectral Contrast')
        fig.colorbar(img, ax=axes[2, 1])
        
        plt.tight_layout()
        plt.show()
    
    @staticmethod
    def plot_feature_importance(model, feature_names, top_n=20):
        """Plot feature importance for tree-based models"""
        if hasattr(model, 'feature_importances_'):
            importances = model.feature_importances_
            indices = np.argsort(importances)[-top_n:]
            
            plt.figure(figsize=(10, 6))
            plt.title(f'Top {top_n} Feature Importances')
            plt.barh(range(top_n), importances[indices])
            plt.yticks(range(top_n), [feature_names[i] for i in indices])
            plt.xlabel('Relative Importance')
            plt.tight_layout()
            plt.show()
        else:
            print("Model doesn't have feature_importances_ attribute")


In [30]:
# ============================================================================
# 6. MAIN EXECUTION AND EXAMPLE USAGE
# ============================================================================

def main():
    """
    Main execution function with example usage
    """
    print("="*70)
    print("ABNORMAL SOUND DETECTION SYSTEM")
    print("Detects: Gunshots, Car Crashes, Screams, Explosions, etc.")
    print("="*70)
    
    # Initialize configuration
    class SoundDetectionConfig:
        def __init__(self):
            # Audio parameters
            self.sample_rate = 22050
            self.n_mfcc = 40
            self.n_fft = 2048
            self.hop_length = 512

            # Feature parameters
            self.n_mels = 128
            self.duration = 3.0

            # Model parameters
            self.random_state = 42

    
    # Initialize detector
    detector = AbnormalSoundDetector(config)
    
    # Example 1: Prepare data from directory structure
    # Directory should have subfolders named after CLASSES
    data_dir = "path/to/your/audio/dataset"
    
    if os.path.exists(data_dir):
        print(f"\nLoading data from {data_dir}...")
        X_train, X_val, X_test, y_train, y_val, y_test = detector.prepare_data(
            data_directory=data_dir
        )
    else:
        print("\nUsing simulated data for demonstration...")
        # Create simulated data for demonstration
        # In practice, replace this with your actual data loading
        
        # Simulate some audio file paths and labels
        num_samples = 1000
        audio_paths = [f"simulated_audio_{i}.wav" for i in range(num_samples)]
        
        # Simulate labels (adjust distribution as needed)
        labels = np.random.choice([0, 1, 2, 3, 4, 5, 6, 7, 8], 
                                size=num_samples,
                                p=[0.3, 0.1, 0.2, 0.05, 0.05, 0.05, 0.05, 0.1, 0.1])
        
        # For demonstration, we'll use simulated features
        n_features = 500  # Approximate number of features
        X_simulated = np.random.randn(num_samples, n_features)
        y_simulated = labels
        
        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(
            X_simulated, y_simulated, test_size=0.2, random_state=42
        )
        X_train, X_val, y_train, y_val = train_test_split(
            X_train, y_train, test_size=0.1, random_state=42
        )
    
    # Train ensemble model
    detector.train_ensemble_model(X_train, y_train, X_val, y_val)
    
    # Evaluate on test set
    predictions, probabilities = detector.evaluate_ensemble(X_test, y_test)
    
    # Example 2: Predict single audio file
    print("\n" + "="*70)
    print("SINGLE AUDIO PREDICTION EXAMPLE")
    print("="*70)
    
    test_audio = "path/to/test/audio.wav"
    if os.path.exists(test_audio):
        result = detector.predict_single_audio(test_audio)
        print(f"\nPrediction for {test_audio}:")
        print(f"  Class: {result['predicted_class']}")
        print(f"  Confidence: {result['confidence']:.2%}")
        print(f"  Top 3 predictions:")
        for class_name, confidence in result['top_predictions']:
            print(f"    - {class_name}: {confidence:.2%}")
    else:
        print(f"\nTest file {test_audio} not found.")
        print("Using simulated prediction for demonstration...")
        
        # Simulate a prediction result
        simulated_result = {
            'predicted_class': 'gunshot',
            'confidence': 0.87,
            'top_predictions': [
                ('gunshot', 0.87),
                ('car_crash', 0.08),
                ('explosion', 0.03)
            ]
        }
        
        print(f"\nSimulated prediction:")
        print(f"  Class: {simulated_result['predicted_class']}")
        print(f"  Confidence: {simulated_result['confidence']:.2%}")
        print(f"  Top 3 predictions:")
        for class_name, confidence in simulated_result['top_predictions']:
            print(f"    - {class_name}: {confidence:.2%}")
    
    # Example 3: Feature visualization
    print("\n" + "="*70)
    print("FEATURE VISUALIZATION")
    print("="*70)
    
    visualizer = SoundVisualizer()
    
    # Plot feature importance for Random Forest
    if 'random_forest' in detector.models:
        # Create dummy feature names for demonstration
        feature_names = [f'feature_{i}' for i in range(X_train.shape[1])]

        visualizer.plot_feature_importance(
            detector.models['random_forest'],
            feature_names,
            top_n=15
        )

    
    # Example 4: Real-time detection setup
    print("\n" + "="*70)
    print("REAL-TIME DETECTION SETUP")
    print("="*70)
    
    print("\nReal-time detection can be implemented using:")
    print("1. Microphone input with pyaudio")
    print("2. Network audio stream")
    print("3. Pre-recorded audio in chunks")
    print("\nUse detector.real_time_detection() for implementation.")
    
    # Save models for later use
    print("\n" + "="*70)
    print("SAVING MODELS")
    print("="*70)
    
    import joblib
    import pickle
    
    # Save the detector object
    with open('abnormal_sound_detector.pkl', 'wb') as f:
        pickle.dump(detector, f)
    
    # Save individual models
    for name, model in detector.models.items():
        if name != 'deep_learning':  # Keras models need special handling
            joblib.dump(model, f'{name}_model.joblib')
    
    print("Models saved successfully!")
    
    return detector

def load_and_use_saved_model():
    """Load a saved model and use it for prediction"""
    import pickle
    
    print("\nLoading saved model...")
    
    try:
        with open('abnormal_sound_detector.pkl', 'rb') as f:
            detector = pickle.load(f)
        
        print("Model loaded successfully!")
        
        # Example usage
        test_file = "path/to/your/test_sound.wav"
        
        if os.path.exists(test_file):
            result = detector.predict_single_audio(test_file)
            print(f"\nPrediction: {result['predicted_class']}")
            print(f"Confidence: {result['confidence']:.2%}")
            
            if result['confidence'] > 0.7:
                if result['predicted_class'] in ['gunshot', 'car_crash', 'explosion', 'scream']:
                    print(f"⚠️  ALERT: Abnormal sound detected! ({result['predicted_class']})")
                    # You could trigger an alarm, send notification, etc.
        else:
            print(f"Test file {test_file} not found.")
            
    except FileNotFoundError:
        print("Saved model not found. Please train a model first.")


In [31]:
# ============================================================================
# EXECUTION
# ============================================================================

if __name__ == "__main__":
    # Option 1: Train a new model
    detector = main()
    
    # Option 2: Load and use saved model
    # load_and_use_saved_model()
    
    # Option 3: Adapt your existing code
    # X, y = prepare_dataset_from_existing_code()

ABNORMAL SOUND DETECTION SYSTEM
Detects: Gunshots, Car Crashes, Screams, Explosions, etc.


AttributeError: 'SoundDetectionConfig' object has no attribute 'sample_rate'