In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display

In [None]:
import os
import random

icbhi_path = 'ICBHI_final_database'
audio_files = [f for f in os.listdir(icbhi_path) if f.endswith('.wav')]

random.seed(42)  
selected_icbhi_files = random.sample([f for f in audio_files], 5)

def preprocess_audio(file_path):
    audio, sr = librosa.load(file_path, sr=None)
    audio = librosa.util.normalize(audio)
    return audio, sr

icbhi_data = [preprocess_audio(os.path.join(icbhi_path, f)) for f in selected_icbhi_files]

print('Selected ICBHI files:')
for f in selected_icbhi_files:
    print(f)

In [None]:
!git clone https://github.com/iiscleap/Coswara-Data.git
coswara_path = 'Coswara-Data'

def get_wav_files(directory):
    wav_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.wav'):
                wav_files.append(os.path.join(root, file))
    return wav_files

coswara_files = get_wav_files(coswara_path)

selected_coswara_files = random.sample(coswara_files, 5)

def filter_and_preprocess(file_path):
    audio, sr = librosa.load(file_path, sr=None)
    if len(audio) > sr:
        audio = librosa.util.normalize(audio)
        return audio, sr
    return None

coswara_data = [filter_and_preprocess(f) for f in selected_coswara_files if filter_and_preprocess(f) is not None]

print('\nSelected Coswara files:')
for f in selected_coswara_files:
    print(f)

In [None]:
def augment_audio(audio, sr):
    
    augmented = []
    pitch_shifts = [-2, -1, 1, 2]
    for steps in pitch_shifts:
        augmented.append(librosa.effects.pitch_shift(audio, sr=sr, n_steps=steps))
    
    stretch_rates = [0.8, 0.9, 1.1, 1.2]
    for rate in stretch_rates:
        augmented.append(librosa.effects.time_stretch(audio, rate=rate))
    
    noise_levels = [0.005, 0.01, 0.02]
    for level in noise_levels:
        augmented.append(audio + level * np.random.randn(len(audio)))
    return augmented

augmented_icbhi = []
for audio, sr in icbhi_data:
    augmented_icbhi.extend([(aug, sr) for aug in augment_audio(audio, sr)])

augmented_coswara = []
for audio, sr in coswara_data:
    augmented_coswara.extend([(aug, sr) for aug in augment_audio(audio, sr)])

print(f'Original ICBHI samples: {len(icbhi_data)}')
print(f'Augmented ICBHI samples: {len(augmented_icbhi)}')
print(f'Original Coswara samples: {len(coswara_data)}')
print(f'Augmented Coswara samples: {len(augmented_coswara)}')

In [None]:
from tqdm import tqdm

def extract_mel_spectrogram(audio, sr):
    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128, fmax=8000)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    return mel_spec_db

print('Extracting features from ICBHI dataset...')
icbhi_specs = [extract_mel_spectrogram(audio, sr) for audio, sr in tqdm(icbhi_data + augmented_icbhi)]

print('\nExtracting features from Coswara dataset...')
coswara_specs = [extract_mel_spectrogram(audio, sr) for audio, sr in tqdm(coswara_data + augmented_coswara)]

mel_spectrograms = icbhi_specs + coswara_specs
print(f'\nTotal number of processed samples: {len(mel_spectrograms)}')

In [None]:
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, None, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.RepeatVector(10),
    layers.LSTM(64, return_sequences=True),
    layers.LSTM(64),
    layers.Dense(5, activation='softmax') 
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
X_train, y_train = np.random.rand(100, 128, 500, 1), np.random.randint(0, 5, (100, 5))  
X_val, y_val = np.random.rand(20, 128, 500, 1), np.random.randint(0, 5, (20, 5)) 

history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=16)

In [None]:
X_test, y_test = np.random.rand(20, 128, 500, 1), np.random.randint(0, 5, (20, 5))  

test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

In [None]:
def plot_training_history(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
   
    ax1.plot(history.history['accuracy'])
    ax1.plot(history.history['val_accuracy'])
    ax1.set_title('Model Accuracy')
    ax1.set_ylabel('Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.legend(['Train', 'Validation'])

    ax2.plot(history.history['loss'])
    ax2.plot(history.history['val_loss'])
    ax2.set_title('Model Loss')
    ax2.set_ylabel('Loss')
    ax2.set_xlabel('Epoch')
    ax2.legend(['Train', 'Validation'])
    
    plt.tight_layout()
    plt.show()

In [None]:
def plot_mel_spectrogram(mel_spec, title='Mel Spectrogram'):
    plt.figure(figsize=(12, 4))
    librosa.display.specshow(mel_spec, y_axis='mel', x_axis='time', cmap='viridis')
    plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
    plt.show()

In [None]:
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    grad_model = tf.keras.models.Model(
        [model.inputs], 
        [model.get_layer(last_conv_layer_name).output, model.output]
    )
    
    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]
    
    grads = tape.gradient(class_channel, last_conv_layer_output)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

def plot_confusion_matrix(y_true, y_pred, classes):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
               xticklabels=classes, yticklabels=classes)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.show()

    print('\nClassification Report:')
    print(classification_report(y_true, y_pred, target_names=classes))

In [None]:
plot_training_history(history)

if len(mel_spectrograms) > 0:
    plot_mel_spectrogram(mel_spectrograms[0], 'Sample Lung Sound Mel Spectrogram')

sample_input = X_test[0:1]
heatmap = make_gradcam_heatmap(sample_input, model, 'conv2d_2')

plt.figure(figsize=(10, 4))
plt.imshow(heatmap, cmap='viridis')
plt.title('Grad-CAM Visualization')
plt.colorbar()
plt.tight_layout()
plt.show()