In [256]:
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import random
import pandas as pd
import soundfile as sf
import noisereduce as nr
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier, KernelDensity
from sklearn.metrics import *
from scipy.stats import ttest_rel

In [217]:
class ProbabilisticNeuralNetwork:
    def __init__(self):
        self.classes_ = None
        self.models_ = None
    
    def fit(self, X_train, y_train):
        self.classes_ = np.unique(y_train)
        self.models_ = {}
        for c in self.classes_:
            class_indices = (y_train == c)
            class_data = X_train[class_indices]
            self.models_[c] = KernelDensity().fit(class_data)
    
    def predict_proba(self, X_test):
        probas = np.zeros((len(X_test), len(self.classes_)))
        for i, c in enumerate(self.classes_):
            probas[:, i] = np.exp(self.models_[c].score_samples(X_test))
        return probas / np.sum(probas, axis=1, keepdims=True)
    
    def predict(self, X_test):
        probas = self.predict_proba(X_test)
        return np.argmax(probas, axis=1)

In [218]:
def generate_spectrogram(audio_file, output_path, flag):
    y, sr = librosa.load(audio_file)
    
    min_audio_length = 10 * sr
    start_point = 0
    y_segment = y[start_point:start_point + min_audio_length]
    
    spectrogram = librosa.feature.melspectrogram(y=y_segment, sr=sr)
    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)
    
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spectrogram_db, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel Spectrogram')
    plt.tight_layout()
    plt.savefig(output_path)
    plt.close()

In [219]:
def shorten_audio(audio_file):
    y, sr = librosa.load(audio_file)
    min_audio_length = 12 * sr
    if len(y) < min_audio_length:
        return
    start_point = random.randint(0, len(y) - min_audio_length)
    y_segment = y[start_point:start_point + min_audio_length]
    sf.write(audio_file, y_segment, sr, format='wav')


In [220]:
def remove_noise_and_save(input_folder, output_folder):
    for filename in os.listdir(input_folder):
        if filename.endswith('.mp3'):
            input_file = os.path.join(input_folder, filename)
            output_file = os.path.join(output_folder, filename)
            audio_data, sample_rate = sf.read(input_file)
            reduced_noise = nr.reduce_noise(y=audio_data, sr=sample_rate, stationary=True) # Perform noise reduction
            sf.write(output_file, reduced_noise, sample_rate)

In [251]:
def draw_confusion_matrix(classifier, X_test, y_test, labels):
    disp = plot_confusion_matrix(classifier, X_test, y_test, display_labels=labels, cmap=plt.cm.Blues)
    disp.ax_.set_title("Confusion Matrix")
    plt.show()

In [221]:
asd_folder = 'asd_speech'
normal_folder = 'normal_speech'

In [222]:
output_asd_folder = 'asd_spectrograms'
output_normal_folder = 'normal_spectrograms'
os.makedirs(output_asd_folder, exist_ok=True)
os.makedirs(output_normal_folder, exist_ok=True)

In [223]:
for filename in os.listdir(asd_folder):
    if filename.endswith('.mp3'):
        audio_file = os.path.join(asd_folder, filename)
        shorten_audio(audio_file)

In [224]:
remove_noise_and_save(asd_folder, asd_folder)
remove_noise_and_save(normal_folder, normal_folder)

In [276]:
for filename in os.listdir(asd_folder):
    if filename.endswith('.mp3'):
        audio_file = os.path.join(asd_folder, filename)
        output_path = os.path.join(output_asd_folder, os.path.splitext(filename)[0] + '.png')
        generate_spectrogram(audio_file, output_path, 1)
x
for filename in os.listdir(normal_folder):
    if filename.endswith('.mp3'):
        audio_file = os.path.join(normal_folder, filename)
        output_path = os.path.join(output_normal_folder, os.path.splitext(filename)[0] + '.png')
        generate_spectrogram(audio_file, output_path, 0)

In [225]:
def extract_features(audio_file):
    y, sr = librosa.load(audio_file)

    f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=75, fmax=600)
    soe = np.mean(librosa.feature.rms(y=y))

    # First five formant frequencies 
    formants = librosa.effects.harmonic(y)
    formants_freq = librosa.effects.harmonic(y)
    formants_freq = formants_freq[:5] if len(formants_freq) >= 5 else np.pad(formants_freq, (0, 5-len(formants_freq)))
    # Dominant frequencies
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    fd1 = np.mean(np.max(mfccs[1:], axis=1))
    fd2 = np.mean(np.max(mfccs[2:], axis=1))
    # Zero Crossing Rate (ZCR)
    zcr = np.mean(librosa.feature.zero_crossing_rate(y))
    # Energy (E)
    energy = np.mean(librosa.feature.rms(y=y))
    # Mel-Frequency Cepstral Coefficients (MFCC)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    # Linear Predictive Cepstral Coefficients (LPCC)
    lpcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    
    # Concatenate features into a single array
    features = np.concatenate([[soe], [fd1, fd2, zcr, energy], mfcc.mean(axis=1)])
    return features

In [226]:
asd_output_file = 'asd_features.csv'
normal_output_file = 'normal_features.csv'

# Initialize dataframes to store features
asd_features_df = pd.DataFrame()
normal_features_df = pd.DataFrame()

In [227]:
temp_asd_df = []

for filename in os.listdir(asd_folder):
    if filename.endswith('.mp3'):
        audio_file = os.path.join(asd_folder, filename)
        features = extract_features(audio_file)
        temp_asd_df.append(pd.DataFrame([features]))
        
asd_features_df = pd.concat(temp_asd_df, ignore_index=True)

In [228]:
temp_normal_df = []

for filename in os.listdir(normal_folder):
    if filename.endswith('.mp3'):
        audio_file = os.path.join(normal_folder, filename)
        features = extract_features(audio_file)
        temp_normal_df.append(pd.DataFrame([features]))

normal_features_df = pd.concat(temp_normal_df, ignore_index=True)

In [229]:
asd_features_df.to_csv(os.path.join(output_asd_folder, 'asd_features.csv'), index=False)
normal_features_df.to_csv(os.path.join(output_normal_folder, 'normal_features.csv'), index=False)

In [230]:
combined_df = pd.concat([asd_features_df.assign(label='asd'), normal_features_df.assign(label='normal')])

In [273]:
X = combined_df.drop('label', axis=1)
y = combined_df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

svm_classifier = SVC(kernel='poly', degree=3)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

accuracy

0.9530516431924883

In [248]:
pnn_classifier = ProbabilisticNeuralNetwork()
pnn_classifier.fit(X_train, y_train)

# Convert categorical labels to numeric labels for y_test
label_mapping = {'asd': 0, 'normal': 1}
y_test_numeric = np.array([label_mapping[label] for label in y_test])

y_pred_pnn = pnn_classifier.predict(X_test)

# Convert numeric labels to categorical labels for y_pred_pnn
label_mapping_inverse = {0: 'asd', 1: 'normal'}
y_pred_pnn_categorical = np.array([label_mapping_inverse[label] for label in y_pred_pnn])

accuracy_pnn = accuracy_score(y_test_numeric, y_pred_pnn)

accuracy_pnn

0.9577464788732394

In [249]:
mlp_classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000)
mlp_classifier.fit(X_train, y_train)

y_pred_mlp = mlp_classifier.predict(X_test)
accuracy_mlp = accuracy_score(y_test, y_pred_mlp)

accuracy_mlp

0.971830985915493

In [238]:
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_train, y_train)
y_pred_knn = knn_classifier.predict(X_test)
accuracy_knn = accuracy_score(y_test, y_pred_knn)

accuracy_knn

0.9765258215962441

In [266]:
def evaluate_model(classifier, X_test, y_test, y_pred):
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    
    # Calculate precision
    precision = precision_score(y_test, y_pred, average='weighted')
    
    # Calculate recall
    recall = recall_score(y_test, y_pred, average='weighted')
    
    # Calculate F1 score
    f1 = f1_score(y_test, y_pred, average='weighted')
    label_mapping = {'asd': 1, 'normal': 0}
    
    # Convert string labels to numeric labels
    y_test_li = [label_mapping[val] if isinstance(val, str) else val for val in y_test]
    y_pred_li = [label_mapping[val] if isinstance(val, str) else val for val in y_pred]
            
            
    p_value = ttest_rel(y_test_li, y_pred_li).pvalue
    
    return accuracy, precision, recall, f1, p_value


In [275]:
# Evaluate SVM model
accuracy_svm, precision_svm, recall_svm, f1_svm, p_value_svm = evaluate_model(svm_classifier, X_test, y_test, y_pred)
print("SVM Metrics:")
print("Accuracy:", accuracy_svm)
print("Precision:", precision_svm)
print("Recall:", recall_svm)
print("F1 Score:", f1_svm)
print("p-value:", p_value_svm)


SVM Metrics:
Accuracy: 0.9530516431924883
Precision: 0.9083074345919019
Recall: 0.9530516431924883
F1 Score: 0.930141747923438
p-value: 0.001427169295636645


  _warn_prf(average, modifier, msg_start, len(result))


In [268]:
# Evaluate MLP model
accuracy_mlp, precision_mlp, recall_mlp, f1_mlp, p_value_mlp = evaluate_model(mlp_classifier, X_test, y_test, y_pred_mlp)
print("\nMLP Metrics:")
print("Accuracy:", accuracy_mlp)
print("Precision:", precision_mlp)
print("Recall:", recall_mlp)
print("F1 Score:", f1_mlp)
print("p-value:", p_value_mlp)



MLP Metrics:
Accuracy: 0.971830985915493
Precision: 0.9691547027738088
Recall: 0.971830985915493
F1 Score: 0.9684472689797321
p-value: 0.10260257000434293


In [269]:
# Evaluate KNN model
accuracy_knn, precision_knn, recall_knn, f1_knn, p_value_knn = evaluate_model(knn_classifier, X_test, y_test, y_pred_knn)
print("\nKNN Metrics:")
print("Accuracy:", accuracy_knn)
print("Precision:", precision_knn)
print("Recall:", recall_knn)
print("F1 Score:", f1_knn)
print("p-value:", p_value_knn)


KNN Metrics:
Accuracy: 0.9765258215962441
Precision: 0.9770901047309497
Recall: 0.9765258215962441
F1 Score: 0.9727562455022103
p-value: 0.02499797097519792


In [270]:
# Evaluate PNN model
accuracy_pnn, precision_pnn, recall_pnn, f1_pnn, p_value_pnn = evaluate_model(pnn_classifier, X_test, y_test_numeric, y_pred_pnn)
print("\nPNN Metrics:")
print("Accuracy:", accuracy_pnn)
print("Precision:", precision_pnn)
print("Recall:", recall_pnn)
print("F1 Score:", f1_pnn)
print("p-value:", p_value_pnn)


PNN Metrics:
Accuracy: 0.9577464788732394
Precision: 0.949146803900325
Recall: 0.9577464788732394
F1 Score: 0.9509612419039785
p-value: 0.09566452788671304
