<a href="https://colab.research.google.com/github/mithila20/CSE445/blob/main/SNR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### This is Ubuntu System so we are installing important linux package.

In [None]:
!apt-get install tree

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
tree is already the newest version (2.0.2-1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


### Downoad Dataset from kaggle

##### Kaggle account Username and API key

In [None]:
import os

os.environ['KAGGLE_USERNAME'] = "[mirbayazidprotik]" # username from the json file
os.environ['KAGGLE_KEY'] = "8acb02e15a0f0b9cda04fac84ff9021a" # key from the json file

#### Command to Download the dataset

In [None]:
!kaggle datasets download muhmagdy/valentini-noisy

Dataset URL: https://www.kaggle.com/datasets/muhmagdy/valentini-noisy
License(s): CC-BY-SA-4.0
valentini-noisy.zip: Skipping, found more recently modified local copy (use --force to force download)


#### Unzip the Dataset

In [None]:
import zipfile
from tqdm import tqdm

# Define the path
zip_path = '/content/valentini-noisy.zip'
unzip_path = '/content/valentini-noisy/'

# Unzipping with progress bar
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    total_files = len(zip_ref.infolist())  # Get total number of files
    with tqdm(total=total_files, desc="Unzipping") as pbar:
        for file in zip_ref.infolist():
            zip_ref.extract(file, unzip_path)
            pbar.update(1)  # Update progress for each extracted file

print("Unzipping completed.")


Unzipping: 100%|██████████| 70942/70942 [03:59<00:00, 296.29it/s]

Unzipping completed.





#### Dataset Folder Tree

In [None]:
!ls /content/valentini-noisy/

clean_testset_wav	  clean_trainset_56spk_wav  noisy_trainset_28spk_wav
clean_trainset_28spk_wav  noisy_testset_wav	    noisy_trainset_56spk_wav


In [None]:
!tree -h --du -o tree_output.txt /content/valentini-noisy

In [None]:
!ls -l /content/valentini-noisy/clean_testset_wav > /content/clean_testset_wav.txt

#### Sample Sound Data to listen

##### Noisy Sound Data

In [None]:
import IPython.display as ipd
ipd.Audio('/content/valentini-noisy/noisy_trainset_28spk_wav/p226_001.wav')

##### Clean Sound Data

In [None]:
ipd.Audio('/content/valentini-noisy/clean_trainset_28spk_wav/p226_001.wav')

#### Number of data in each folder

In [None]:
import os

def count_files_in_folders(root_dir):

  file_counts = {}
  for dirpath, dirnames, filenames in os.walk(root_dir):
    folder_name = os.path.basename(dirpath)
    if folder_name != os.path.basename(root_dir):
      file_counts[folder_name] = len(filenames)
  return file_counts


root_directory = "/content/valentini-noisy"
counts = count_files_in_folders(root_directory)

for folder, count in counts.items():
  print(f"Folder: {folder}, Number of files: {count}")

Folder: clean_trainset_56spk_wav, Number of files: 23075
Folder: clean_testset_wav, Number of files: 824
Folder: noisy_trainset_28spk_wav, Number of files: 11572
Folder: noisy_testset_wav, Number of files: 824
Folder: clean_trainset_28spk_wav, Number of files: 11572
Folder: noisy_trainset_56spk_wav, Number of files: 23075


## Signal-to-Noise Ratio (SNR) and Connectivity Quality

Here's a breakdown of SNR ranges and their corresponding connection quality:

| SNR Range (dB) | Connectivity Quality | Description |
|---|---|---|
| 5 to 10 | **Unusable** | Below minimum level; noise indistinguishable from signal. |
| 10 to 15 | **Unreliable** | Accepted minimum for unreliable connection. |
| 15 to 25 | **Poor** | Minimally acceptable for poor connectivity. |
| 25 to 40 | **Good** | Acceptable level for good connectivity. |
| 41+ | **Excellent** | Ideal level for excellent connectivity. |

In [None]:
import librosa
import numpy as np
import os

# Function to load audio files
def load_audio(file_path, sr=22050):
    # Load the audio file with librosa
    y, sr = librosa.load(file_path, sr=sr)
    return y

# Function to calculate RMS (Root Mean Square) value of a signal
def rms(signal):
    return np.sqrt(np.mean(signal**2))

# Function to calculate SNR for a clean and noisy audio pair
def calculate_snr(clean_audio, noisy_audio):
    # Ensure both signals are the same length
    min_length = min(len(clean_audio), len(noisy_audio))
    clean_audio = clean_audio[:min_length]
    noisy_audio = noisy_audio[:min_length]

    # Compute the noise by subtracting the clean audio from the noisy audio
    noise = noisy_audio - clean_audio

    # Calculate the RMS values for signal (clean) and noise
    signal_rms = rms(clean_audio)
    noise_rms = rms(noise)

    # Calculate SNR in decibels
    snr = 10 * np.log10(signal_rms**2 / noise_rms**2)
    return snr

# Function to calculate SNR for the first N clean and noisy audio files in the datasets
def calculate_snr_for_first_n_datasets(clean_dir, noisy_dir, num_files=200, sr=22050):
    clean_files = sorted(os.listdir(clean_dir))[:num_files]
    noisy_files = sorted(os.listdir(noisy_dir))[:num_files]

    # Ensure both directories have the same number of files
    assert len(clean_files) == len(noisy_files), "Number of files in clean and noisy directories must be the same"

    for clean_file, noisy_file in zip(clean_files, noisy_files):
        # Get file paths
        clean_path = os.path.join(clean_dir, clean_file)
        noisy_path = os.path.join(noisy_dir, noisy_file)

        # Load the clean and noisy audio
        clean_audio = load_audio(clean_path, sr)
        noisy_audio = load_audio(noisy_path, sr)

        # Calculate SNR for the current pair
        snr = calculate_snr(clean_audio, noisy_audio)

        # Print the result
        print(f"SNR for {clean_file} and {noisy_file}: {snr:.2f} dB")

# Paths to the clean and noisy audio directories
clean_audio_dir = '/content/valentini-noisy/clean_trainset_56spk_wav'
noisy_audio_dir = '/content/valentini-noisy/noisy_trainset_56spk_wav'

# Calculate SNR for the first 200 pairs of clean and noisy files in the datasets
calculate_snr_for_first_n_datasets(clean_audio_dir, noisy_audio_dir, num_files=200)


SNR for p234_001.wav and p234_001.wav: 13.47 dB
SNR for p234_002.wav and p234_002.wav: 9.09 dB
SNR for p234_003.wav and p234_003.wav: 4.09 dB
SNR for p234_004.wav and p234_004.wav: -1.67 dB
SNR for p234_005.wav and p234_005.wav: 14.53 dB
SNR for p234_006.wav and p234_006.wav: 9.08 dB
SNR for p234_007.wav and p234_007.wav: 3.60 dB
SNR for p234_008.wav and p234_008.wav: -0.51 dB
SNR for p234_009.wav and p234_009.wav: 13.75 dB
SNR for p234_010.wav and p234_010.wav: 7.59 dB
SNR for p234_012.wav and p234_012.wav: 4.26 dB
SNR for p234_013.wav and p234_013.wav: -1.35 dB
SNR for p234_014.wav and p234_014.wav: 14.01 dB
SNR for p234_015.wav and p234_015.wav: 9.18 dB
SNR for p234_016.wav and p234_016.wav: 4.86 dB
SNR for p234_017.wav and p234_017.wav: -0.78 dB
SNR for p234_018.wav and p234_018.wav: 14.37 dB
SNR for p234_019.wav and p234_019.wav: 9.39 dB
SNR for p234_020.wav and p234_020.wav: 3.95 dB
SNR for p234_021.wav and p234_021.wav: -0.43 dB
SNR for p234_022.wav and p234_022.wav: 14.19 dB
SN

### Making Dataset small taking 200 sound sample from 23075 sample

In [None]:
import os
import shutil

# Define paths for source and destination directories
noisy_source = '/content/valentini-noisy/noisy_trainset_56spk_wav'
clean_source = '/content/valentini-noisy/clean_trainset_56spk_wav'
small_dataset_noisy = '/content/small_dataset/noisy'
small_dataset_clean = '/content/small_dataset/clean'

# Create destination directories
os.makedirs(small_dataset_noisy, exist_ok=True)
os.makedirs(small_dataset_clean, exist_ok=True)

# Get list of all files in each directory
noisy_files = os.listdir(noisy_source)
clean_files = os.listdir(clean_source)

# Create a set for quick lookup of clean files
clean_files_set = set(clean_files)

# Initialize lists to store the selected pairs
selected_noisy_files = []
selected_clean_files = []

# Iterate over noisy files and select corresponding clean files
for noisy_file in noisy_files:
    # Check if the noisy file has a matching clean file (same filename)
    if noisy_file in clean_files_set:
        selected_noisy_files.append(noisy_file)
        selected_clean_files.append(noisy_file)

    # Stop once we've selected 100 pairs
    if len(selected_noisy_files) == 100:
        break

# Ensure we have exactly 100 pairs
if len(selected_noisy_files) == 100:
    # Copy selected noisy and clean files to the new dataset directories
    for file_name in selected_noisy_files:
        shutil.copy(os.path.join(noisy_source, file_name), small_dataset_noisy)

    for file_name in selected_clean_files:
        shutil.copy(os.path.join(clean_source, file_name), small_dataset_clean)

    print("100 pairwise sample dataset created successfully.")
else:
    print("Not enough matching pairs found in the directories.")

100 pairwise sample dataset created successfully.


##### Size Clean Sound Dataset

In [None]:
!du -sh /content/small_dataset

53M	/content/small_dataset


In [None]:
!tree /content/small_dataset > small_dataset.txt

In [None]:
!ls small_dataset

clean  noisy


# Classification of Clean and Noisy Sound

## LSTM

### Trail 1 of LSTM Model

##### Libraries

In [None]:
import os
import librosa
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
import matplotlib.pyplot as plt

##### Function to Extract Features from Sound Files

In [None]:
def extract_features(file_path):
    # Load the audio file
    audio, sr = librosa.load(file_path, sr=None)

    # Extract MFCCs
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)

    # Mean of MFCCs across time axis (columns)
    mfcc = np.mean(mfcc.T, axis=0)

    return mfcc

##### Prepare the Dataset

In [None]:
def load_data(clean_dir, noisy_dir):
    features = []
    labels = []

    # Load clean audio files and assign label 0 (clean)
    for file in os.listdir(clean_dir):
        file_path = os.path.join(clean_dir, file)
        mfcc = extract_features(file_path)
        features.append(mfcc)
        labels.append(0)  # Clean sound

    # Load noisy audio files and assign label 1 (noisy)
    for file in os.listdir(noisy_dir):
        file_path = os.path.join(noisy_dir, file)
        mfcc = extract_features(file_path)
        features.append(mfcc)
        labels.append(1)  # Noisy sound

    return np.array(features), np.array(labels)

# Load the data
clean_dir = '/content/small_dataset/clean'
noisy_dir = '/content/small_dataset/noisy'
X, y = load_data(clean_dir, noisy_dir)


##### Train-Test Split

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

##### Define the LSTM Model

In [None]:
# Define the LSTM model
def build_lstm_model(input_shape):
    model = Sequential()

    # LSTM layer
    model.add(LSTM(64, input_shape=input_shape, return_sequences=False))

    # Dropout to prevent overfitting
    model.add(Dropout(0.5))

    # Fully connected (Dense) layer
    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())

    # Output layer (binary classification: clean(0) or noisy(1))
    model.add(Dense(1, activation='sigmoid'))

    return model

# Reshaping the input data to be compatible with LSTM (samples, time_steps, features)
X_train_reshaped = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_reshaped = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Build the model
model = build_lstm_model((X_train_reshaped.shape[1], 1))


##### Compile the Model

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

##### Train the Model

In [None]:
# Train the model
history = model.fit(X_train_reshaped, y_train, epochs=10, batch_size=32, validation_data=(X_test_reshaped, y_test))

##### Plot Training History

In [None]:
# Plot training and validation accuracy
plt.plot(history.history['accuracy'], label='train accuracy')
plt.plot(history.history['val_accuracy'], label='test accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Training and Validation Accuracy')
plt.show()

# Plot training and validation loss
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='test loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss')
plt.show()

According to the plot this model not good the model has overfit problem.

##### Save the Model

In [None]:
model.save('/content/sound_classification_lstm_model.h5')

##### Load the Saved Model

In [None]:
loaded_model = tf.keras.models.load_model('/content/sound_classification_lstm_model.h5')

##### Test the Model with a New Audio File for tail 1.

In [None]:
def classify_audio(file_path, model):
    # Extract features from the audio file
    mfcc = extract_features(file_path)

    # Reshape the feature to be compatible with the LSTM input shape
    mfcc_reshaped = mfcc.reshape((1, mfcc.shape[0], 1))

    # Predict using the model
    prediction = model.predict(mfcc_reshaped)

    # Convert the prediction to label (0: clean, 1: noisy)
    if prediction >= 0.5:
        label = "Noisy"
    else:
        label = "Clean"

    return label

# Test with a new audio file (replace this path with the path to your test audio file)
test_audio_path = '//content/small_dataset/noisy/p234_284.wav'
predicted_label = classify_audio(test_audio_path, loaded_model)
print(f"Predicted Label for the test audio file: {predicted_label}")


The model is baised because it has overfiting problem. Always classify any sample as Noisy.

### Trail 2 of LSTM model

##### Some more operations of the model

In [None]:
from sklearn.utils.class_weight import compute_class_weight

# Compute class weights to handle imbalance
class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)
class_weight_dict = dict(enumerate(class_weights))

# Train the model with class weights
history = model.fit(X_train_reshaped, y_train, epochs=10, batch_size=32,
                    validation_data=(X_test_reshaped, y_test), class_weight=class_weight_dict)


In [None]:
def extract_features(file_path):
    # Load the audio file
    audio, sr = librosa.load(file_path, sr=None)

    # Extract MFCCs and other features
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)

    # Mean of the features
    mfcc = np.mean(mfcc.T, axis=0)
    chroma = np.mean(chroma.T, axis=0)
    spectral_contrast = np.mean(spectral_contrast.T, axis=0)

    # Concatenate all the features
    return np.concatenate((mfcc, chroma, spectral_contrast))

In [None]:
# Increase the complexity of the LSTM model
def build_lstm_model(input_shape):
    model = Sequential()

    model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
    model.add(Dropout(0.5))

    model.add(LSTM(64, return_sequences=False))
    model.add(Dropout(0.5))

    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())

    model.add(Dense(1, activation='sigmoid'))

    return model

In [None]:
# Add EarlyStopping to monitor the validation loss
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(X_train_reshaped, y_train, epochs=50, batch_size=32,
                    validation_data=(X_test_reshaped, y_test),
                    class_weight=class_weight_dict, callbacks=[early_stopping])


In [None]:
from sklearn.metrics import classification_report

# Evaluate the model on the test data
y_pred = (model.predict(X_test_reshaped) > 0.5).astype(int)

# Classification report
print(classification_report(y_test, y_pred))


In [None]:
# Train the updated model with class weights and early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = model.fit(X_train_reshaped, y_train, epochs=50, batch_size=32,
                    validation_data=(X_test_reshaped, y_test),
                    class_weight=class_weight_dict, callbacks=[early_stopping])

# Save the updated model
model.save('/content/updated_sound_classification_lstm_model.h5')


In [None]:
# Load the updated model
updated_model = tf.keras.models.load_model('/content/updated_sound_classification_lstm_model.h5')

##### Test the trail 2 model.

In [None]:
def classify_audio(file_path, model):
    # Extract features from the audio file
    mfcc = extract_features(file_path)

    # Reshape the feature to be compatible with the LSTM input shape
    mfcc_reshaped = mfcc.reshape((1, mfcc.shape[0], 1))

    # Predict using the model
    prediction = model.predict(mfcc_reshaped)

    # Convert the prediction to label (0: clean, 1: noisy)
    if prediction >= 0.5:
        label = "Noisy"
    else:
        label = "Clean"

    return label

# Test with a new audio file (replace this path with the path to your test audio file)
test_audio_path = '/content/small_dataset/noisy/p234_284.wav'  # Update the path to your test file
predicted_label = classify_audio(test_audio_path, updated_model)
print(f"Predicted Label for the test audio file: {predicted_label}")


In [None]:
import matplotlib.pyplot as plt

# Plot the training and validation loss
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Plot the training and validation accuracy
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

The Dataset has still overfiting problem.

### Check the Dataset Quality.

In [None]:
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np

# Directories for clean and noisy datasets
clean_dir = '/content/small_dataset/clean'
noisy_dir = '/content/small_dataset/noisy'

def analyze_audio_files(audio_dir, label, num_samples=3):
    """
    Load a few audio files from the directory, display waveform and spectrogram.
    """
    print(f"\nAnalyzing {label} audio samples...\n")
    audio_files = os.listdir(audio_dir)[:num_samples]

    for i, file_name in enumerate(audio_files):
        file_path = os.path.join(audio_dir, file_name)

        # Load audio file
        y, sr = librosa.load(file_path, sr=None)

        # Plot waveform
        plt.figure(figsize=(12, 4))
        plt.subplot(1, 2, 1)
        librosa.display.waveshow(y, sr=sr)
        plt.title(f"{label.capitalize()} Audio Waveform - {file_name}")

        # Plot spectrogram
        plt.subplot(1, 2, 2)
        D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
        librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
        plt.colorbar(format="%+2.0f dB")
        plt.title(f"{label.capitalize()} Spectrogram - {file_name}")

        plt.show()

# Analyze Clean Audio Samples
analyze_audio_files(clean_dir, 'clean')

# Analyze Noisy Audio Samples
analyze_audio_files(noisy_dir, 'noisy')


**Clean Environment:**

| Metric | Range (Good) |
|---|---|
| Zero-Crossing Rate (ZCR) | 0 < ZCR < 0.1 |
| Root Mean Square (RMS) | 0.01 < RMS < 0.1 |
| Mel-Frequency Cepstral Coefficients (MFCC) | 0.5 < MFCC < 1.5 |


**Noisy Environment:**

| Metric | Range (Bad) |
|---|---|
| Zero-Crossing Rate (ZCR) | 0.4 < ZCR < 1.0 |
| Root Mean Square (RMS) | 0.1 < RMS < 1.0 |
| Mel-Frequency Cepstral Coefficients (MFCC) | 1.5 < MFCC < 5 |


In [None]:
import librosa
import numpy as np
import os

def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    features = {
        'zcr': np.mean(librosa.feature.zero_crossing_rate(y=y)), # Zero-Crossing Rate
        'spectral_centroid': np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)), # Spectral Centroid
        'rms': np.mean(librosa.feature.rms(y=y)), # RMS Energy
        'mfcc': np.mean(librosa.feature.mfcc(y=y, sr=sr), axis=1) # MFCCs
    }
    return features

# Analyze datasets
def analyze_dataset(audio_dir):
    features_list = []
    for file_name in os.listdir(audio_dir):
        file_path = os.path.join(audio_dir, file_name)
        features = extract_features(file_path)
        features_list.append(features)
    return features_list

clean_features = analyze_dataset('/content/small_dataset/clean')
noisy_features = analyze_dataset('/content/small_dataset/noisy')

# Calculate mean and standard deviation for each feature across the dataset
def calculate_statistics(features_list, feature_name):
    values = [features[feature_name] for features in features_list]
    return np.mean(values), np.std(values)

print("Clean Dataset - ZCR:", calculate_statistics(clean_features, 'zcr'))
print("Noisy Dataset - ZCR:", calculate_statistics(noisy_features, 'zcr'))

print("Clean Dataset - Spectral Centroid:", calculate_statistics(clean_features, 'spectral_centroid'))
print("Noisy Dataset - Spectral Centroid:", calculate_statistics(noisy_features, 'spectral_centroid'))

print("Clean Dataset - RMS:", calculate_statistics(clean_features, 'rms'))
print("Noisy Dataset - RMS:", calculate_statistics(noisy_features, 'rms'))

print("Clean Dataset - MFCC:", calculate_statistics(clean_features, 'mfcc'))
print("Noisy Dataset - MFCC:", calculate_statistics(noisy_features, 'mfcc'))



In [None]:
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np



# Extract feature vectors (e.g., ZCR, Spectral Centroid, RMS) and use PCA for visualization
clean_vectors = np.array([[f['zcr'], f['spectral_centroid'], f['rms']] for f in clean_features])
noisy_vectors = np.array([[f['zcr'], f['spectral_centroid'], f['rms']] for f in noisy_features])

# Combine clean and noisy data
all_vectors = np.vstack((clean_vectors, noisy_vectors))

# Reduce dimensions to 2D using PCA
pca = PCA(n_components=2)
reduced_data = pca.fit_transform(all_vectors)

# Assign labels to clean and noisy datasets for color coding
labels = np.array([0] * len(clean_features) + [1] * len(noisy_features))

# Apply K-means clustering
kmeans = KMeans(n_clusters=2)
clusters = kmeans.fit_predict(reduced_data)

# Plotting
plt.figure(figsize=(8, 6))

# Scatter plot with color coding by original labels (clean = 0, noisy = 1)
scatter = plt.scatter(reduced_data[:, 0], reduced_data[:, 1], c=labels, cmap='coolwarm', alpha=0.7, edgecolors='k')

# Plot cluster centroids
centroids = kmeans.cluster_centers_
plt.scatter(centroids[:, 0], centroids[:, 1], c='black', marker='x', s=100, label='Centroids')

# Add colorbar to show label mapping
plt.colorbar(scatter, label='Dataset (0: Clean, 1: Noisy)')

# Title and axis labels
plt.title("Clustering of Audio Features (Clean vs Noisy)", fontsize=14)
plt.xlabel("Principal Component 1", fontsize=12)
plt.ylabel("Principal Component 2", fontsize=12)

# Add legend
plt.legend()

# Display plot
plt.tight_layout()
plt.show()



In [None]:
from scipy.stats import ttest_ind

clean_zcr = [f['zcr'] for f in clean_features]
noisy_zcr = [f['zcr'] for f in noisy_features]

t_stat, p_val = ttest_ind(clean_zcr, noisy_zcr)
print(f"T-test for Zero-Crossing Rate: t_stat={t_stat}, p_val={p_val}")


❤ The dataset Quality is good.

### Trail 3 for making the model more accurate and avoid overfiting problem

In [None]:
import os
import numpy as np
import librosa

# Directories for clean and noisy sounds
clean_dir = '/content/small_dataset/clean'
noisy_dir = '/content/small_dataset/noisy'

def load_audio_files(directory, label):
    data = []
    labels = []
    for file_name in os.listdir(directory):
        file_path = os.path.join(directory, file_name)
        audio, sr = librosa.load(file_path, sr=None)

        # Extract MFCC features (adjust n_mfcc for feature dimension)
        mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=20) # Use keyword argument y=
        mfcc = np.mean(mfcc.T, axis=0)  # Take mean of each MFCC over time

        data.append(mfcc)
        labels.append(label)
    return data, labels

# Load clean and noisy data
clean_data, clean_labels = load_audio_files(clean_dir, label=0)
noisy_data, noisy_labels = load_audio_files(noisy_dir, label=1)

# Combine and shuffle the data
X = np.array(clean_data + noisy_data)
y = np.array(clean_labels + noisy_labels)

# Shuffle the dataset
from sklearn.utils import shuffle # Complete the import statement
X, y = shuffle(X, y, random_state=42)

In [None]:
from sklearn.model_selection import train_test_split

# Split the dataset
train_data, val_data, train_labels, val_labels = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape for LSTM (samples, time_steps, features)
train_data = np.expand_dims(train_data, axis=1)
val_data = np.expand_dims(val_data, axis=1)


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2

def create_lstm_model(input_shape):
    model = Sequential()

    # LSTM layer with L2 regularization, dropout, and batch normalization
    model.add(LSTM(64, return_sequences=True, input_shape=input_shape, kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.3))
    model.add(BatchNormalization())

    # Second LSTM layer
    model.add(LSTM(64, return_sequences=False, kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.3))
    model.add(BatchNormalization())

    # Dense layer
    model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.3))

    # Output layer for binary classification
    model.add(Dense(1, activation='sigmoid'))

    return model

# Define input shape
feature_dim = train_data.shape[-1]  # Extract feature dimension dynamically
input_shape = (train_data.shape[1], feature_dim)  # (time_steps, feature_dim)

# Initialize and compile the model
model = create_lstm_model(input_shape)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
from tensorflow.keras.callbacks import EarlyStopping

# Early stopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(
    train_data, train_labels,
    validation_data=(val_data, val_labels),
    epochs=50,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)


In [None]:
# Evaluate on validation data
val_loss, val_accuracy = model.evaluate(val_data, val_labels)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation accuracy values
plt.figure(figsize=(12, 4))  # Adjust figure size if needed

plt.subplot(1, 2, 1)  # Create a subplot for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)  # Create a subplot for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt

In [None]:
# Save the final model
model.save('/content/Final_model.h5')

In [None]:
import tensorflow as tf

# Load the saved model
loaded_model = tf.keras.models.load_model('/content/Final_model.h5')

def classify_audio(file_path, model):
    import librosa
    import numpy as np

    # Load audio file and extract MFCC features
    audio, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=20)
    mfcc = np.mean(mfcc.T, axis=0)

    # Reshape the features for the model input
    mfcc_reshaped = np.expand_dims(mfcc, axis=0)
    mfcc_reshaped = np.expand_dims(mfcc_reshaped, axis=1)

    # Make a prediction
    prediction = model.predict(mfcc_reshaped)

    # Classify based on the prediction
    if prediction >= 0.5:
        label = "Noisy"
    else:
        label = "Clean"
    return label

# Test with the specified audio file
test_audio_path = '/content/small_dataset/noisy/p234_284.wav'
predicted_label = classify_audio(test_audio_path, loaded_model)
print(f"Predicted Label for {test_audio_path}: {predicted_label}")

In [None]:
pip install speechbrain

#### Hugging Face model

In [None]:
from speechbrain.inference.separation import SepformerSeparation as separator
import torchaudio

model = separator.from_hparams(source="speechbrain/sepformer-wham-enhancement", savedir='pretrained_models/sepformer-wham-enhancement')

# for custom file, change path
est_sources = model.separate_file(path='/content/small_dataset/noisy/p234_284.wav')

torchaudio.save("enhanced_wham.wav", est_sources[:, :, 0].detach().cpu(), 8000)


In [None]:
ipd.Audio('/content/small_dataset/noisy/p234_284.wav')

In [None]:
ipd.Audio('enhanced_wham.wav')

#### RNN Model for reduce Noise

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import SimpleRNN, Dense, Input, Reshape
from tensorflow.keras.models import Model
import librosa
import soundfile as sf

In [None]:
# Dataset paths
clean_dir = '/content/small_dataset/clean'
noisy_dir = '/content/small_dataset/noisy'

In [None]:
# Load and preprocess audio files
def load_and_preprocess_audio(directory, sample_rate=16000, max_length=16000):
    audio_files = []
    for file_name in os.listdir(directory):
        file_path = os.path.join(directory, file_name)
        audio, _ = librosa.load(file_path, sr=sample_rate)
        # Trim or pad audio to max_length
        if len(audio) > max_length:
            audio = audio[:max_length]
        else:
            audio = np.pad(audio, (0, max_length - len(audio)))
        audio_files.append(audio)
    return np.array(audio_files)

In [None]:
 #Load clean and noisy audio data
clean_audio = load_and_preprocess_audio(clean_dir)
noisy_audio = load_and_preprocess_audio(noisy_dir)

In [None]:
# Reshape for model input (RNN expects 3D input)
noisy_audio = noisy_audio[..., np.newaxis]  # Shape: (samples, timesteps, 1)
clean_audio = clean_audio[..., np.newaxis]  # Shape: (samples, timesteps, 1)

In [None]:
# Define the RNN model
input_shape = (noisy_audio.shape[1], 1)
inputs = Input(shape=input_shape)
x = SimpleRNN(64, return_sequences=True)(inputs)
x = SimpleRNN(64)(x)
outputs = Dense(clean_audio.shape[1])(x)
outputs = Reshape((clean_audio.shape[1], 1))(outputs)

model = Model(inputs, outputs)
model.compile(optimizer='adam', loss='mse')

In [None]:
# Train the model
model.fit(noisy_audio, clean_audio, epochs=20, batch_size=8, validation_split=0.1)

In [None]:

# Plot the training and validation loss
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Plot the training and validation accuracy
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Function to denoise an audio file
def denoise_audio(model, noisy_audio_file, output_file, sample_rate=16000):
    audio, _ = librosa.load(noisy_audio_file, sr=sample_rate)
    audio = np.pad(audio, (0, 16000 - len(audio))) if len(audio) < 16000 else audio[:16000]
    audio = audio[np.newaxis, :, np.newaxis]  # Reshape for the model: (1, time_steps, 1)

    # Predict denoised audio
    denoised_audio = model.predict(audio)
    denoised_audio = denoised_audio.reshape(-1)  # Flatten to 1D array

    # Save denoised audio
    sf.write(output_file, denoised_audio, sample_rate)

In [None]:
# Test the model on a sample noisy file
noisy_audio_path = '/content/small_dataset/noisy/p234_284.wav'  # Replace with actual path
output_audio_path = 'output_denoised_sample.wav'  # Path for output
denoise_audio(model, noisy_audio_path, output_audio_path)

In [None]:
# prompt: Accuray and other matrix in table

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming y_test and y_pred are already defined from your model's prediction

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Create a table using a simple string formatting
print("-" * 30)
print("|{:^14}|{:^14}|".format("Metric", "Score"))
print("-" * 30)
print("|{:^14}|{:^14.4f}|".format("Accuracy", accuracy))
print("|{:^14}|{:^14.4f}|".format("Precision", precision))
print("|{:^14}|{:^14.4f}|".format("Recall", recall))
print("|{:^14}|{:^14.4f}|".format("F1-score", f1))
print("-" * 30)

In [None]:
ipd.Audio('/content/small_dataset/noisy/p234_284.wav')

In [None]:
ipd.Audio('output_denoised_sample.wav')

#### RNN for Classification

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout, LSTM, GRU
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:
# Dataset paths
clean_dir = '/content/small_dataset/clean'
noisy_dir = '/content/small_dataset/noisy'

# Function to load and extract MFCC features
def load_data(data_dir, label):
    features, labels = [], []
    for file_name in os.listdir(data_dir):
        if file_name.endswith('.wav'):
            file_path = os.path.join(data_dir, file_name)
            audio, sr = librosa.load(file_path, sr=None)
            mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
            mfccs_mean = np.mean(mfccs.T, axis=0)
            features.append(mfccs_mean)
            labels.append(label)
    return features, labels

# Load clean and noisy data
clean_features, clean_labels = load_data(clean_dir, label=0)  # Label "0" for clean
noisy_features, noisy_labels = load_data(noisy_dir, label=1)  # Label "1" for noisy

# Combine features and labels
X = np.array(clean_features + noisy_features)
y = np.array(clean_labels + noisy_labels)

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape for RNN (samples, timesteps, features)
X_train = np.expand_dims(X_train, axis=1)
X_test = np.expand_dims(X_test, axis=1)

In [None]:
model = Sequential([
    SimpleRNN(64, input_shape=(X_train.shape[1], X_train.shape[2]), activation='relu', return_sequences=True),
    Dropout(0.3),
    LSTM(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(X_train, y_train, epochs=20, batch_size=16, validation_data=(X_test, y_test))

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

In [None]:
model.save("sound_classification_rnn.h5")

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation accuracy
plt.figure(figsize=(14, 5))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

In [None]:


from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming y_test and y_pred are already defined from your model's prediction
# Example:
# y_pred = model.predict(X_test)  # Replace with your prediction method
# y_pred = (y_pred > 0.5).astype(int) # Convert probabilities to class labels (0 or 1)


# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Create a table using a simple string formatting
print("-" * 30)
print("|{:^14}|{:^14}|".format("Metric", "Score"))
print("-" * 30)
print("|{:^14}|{:^14.4f}|".format("Accuracy", accuracy))
print("|{:^14}|{:^14.4f}|".format("Precision", precision))
print("|{:^14}|{:^14.4f}|".format("Recall", recall))
print("|{:^14}|{:^14.4f}|".format("F1-score", f1))
print("-" * 30)

#### LSTM for Denoising

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, TimeDistributed
import matplotlib.pyplot as plt

In [None]:
import librosa
import numpy as np
import os

# Dataset paths
clean_dir = '/content/small_dataset/clean'
noisy_dir = '/content/small_dataset/noisy'

# Function to load audio and extract MFCC features
def load_audio_features(file_path, max_length=None):
    audio, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)

    # Optionally pad or truncate the MFCC to a maximum length
    if max_length:
        # If the length is shorter, pad, else truncate
        if mfcc.shape[1] < max_length:
            pad_width = max_length - mfcc.shape[1]
            mfcc = np.pad(mfcc, ((0, 0), (0, pad_width)), mode='constant')
        else:
            mfcc = mfcc[:, :max_length]

    return mfcc.T  # Transpose for (time, features) format

# Load pairs of noisy and clean data
def load_data_pairs(clean_dir, noisy_dir):
    clean_data, noisy_data = [], []
    max_length = 0  # Track the longest MFCC sequence

    # First pass: find the longest MFCC sequence
    for file_name in os.listdir(clean_dir):
        if file_name.endswith('.wav') and os.path.exists(os.path.join(noisy_dir, file_name)):
            clean_path = os.path.join(clean_dir, file_name)
            noisy_path = os.path.join(noisy_dir, file_name)

            clean_mfcc = load_audio_features(clean_path)
            noisy_mfcc = load_audio_features(noisy_path)

            # Update max_length
            max_length = max(max_length, clean_mfcc.shape[0], noisy_mfcc.shape[0])

    # Second pass: load and pad/truncate MFCC sequences
    for file_name in os.listdir(clean_dir):
        if file_name.endswith('.wav') and os.path.exists(os.path.join(noisy_dir, file_name)):
            clean_path = os.path.join(clean_dir, file_name)
            noisy_path = os.path.join(noisy_dir, file_name)

            clean_mfcc = load_audio_features(clean_path, max_length)
            noisy_mfcc = load_audio_features(noisy_path, max_length)

            clean_data.append(clean_mfcc)
            noisy_data.append(noisy_mfcc)

    return np.array(clean_data), np.array(noisy_data)

clean_data, noisy_data = load_data_pairs(clean_dir, noisy_dir)

# Reshape for LSTM: (samples, timesteps, features)
timesteps, features = clean_data.shape[1], clean_data.shape[2]


In [None]:
# Dataset paths
clean_dir = '/content/small_dataset/clean'
noisy_dir = '/content/small_dataset/noisy'

# Function to load audio and extract MFCC features
def load_audio_features(file_path):
    audio, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    return mfcc.T  # Transpose for (time, features) format

# Load pairs of noisy and clean data
def load_data_pairs(clean_dir, noisy_dir):
    clean_data, noisy_data = [], []
    max_length = 0  # To store the maximum sequence length

    for file_name in os.listdir(clean_dir):
        if file_name.endswith('.wav') and os.path.exists(os.path.join(noisy_dir, file_name)):
            clean_path = os.path.join(clean_dir, file_name)
            noisy_path = os.path.join(noisy_dir, file_name)

            clean_mfcc = load_audio_features(clean_path)
            noisy_mfcc = load_audio_features(noisy_path)

            # Update max_length
            max_length = max(max_length, clean_mfcc.shape[0], noisy_mfcc.shape[0])

            clean_data.append(clean_mfcc)
            noisy_data.append(noisy_mfcc)

    # Pad sequences to max_length
    padded_clean_data = [np.pad(data, ((0, max_length - data.shape[0]), (0, 0)), 'constant') for data in clean_data]
    padded_noisy_data = [np.pad(data, ((0, max_length - data.shape[0]), (0, 0)), 'constant') for data in noisy_data]

    return np.array(padded_clean_data), np.array(padded_noisy_data)

# Load the data
clean_data, noisy_data = load_data_pairs(clean_dir, noisy_dir)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape for RNN (samples, timesteps, features)
X_train = np.expand_dims(X_train, axis=1)
X_test = np.expand_dims(X_test, axis=1)

In [None]:
model = Sequential([
    SimpleRNN(64, input_shape=(X_train.shape[1], X_train.shape[2]), activation='relu', return_sequences=True),
    Dropout(0.3),
    LSTM(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(X_train, y_train, epochs=20, batch_size=16, validation_data=(X_test, y_test))

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

In [None]:
model.save("sound_classification_rnn.h5")

In [None]:
import matplotlib.pyplot as plt

# Assuming 'history' is the training history object from model.fit()
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.tight_layout()
plt.show()