ACQUIRING OF AUDIO-DATA SET FROM DRIVE:

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


EXTRACTION OF AUDIO FILES FROM ZIP FOLDER:

In [None]:
import zipfile
import os

In [None]:
zip_file_path = '/content/drive/MyDrive/Audio_files/Crema.zip'
extract_folder_path = '/content/drive/MyDrive/Audio_files/Crema'

In [None]:
os.makedirs(extract_folder_path, exist_ok=True)

In [None]:
pip install tqdm



In [None]:
from tqdm import tqdm

In [None]:
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    files = zip_ref.namelist()

    # Use tqdm for a progress bar
    for file in tqdm(files, desc="Extracting Files", unit="file"):
        zip_ref.extract(file, extract_folder_path)

Extracting Files: 100%|██████████| 7442/7442 [31:33<00:00,  3.93file/s]


In [None]:
 extracted_files = os.listdir(extract_folder_path)

In [None]:
print(extracted_files[:10])

['1008_DFA_HAP_XX.wav', '1008_DFA_NEU_XX.wav', '1008_DFA_FEA_XX.wav', '1008_DFA_SAD_XX.wav', '1008_IEO_ANG_LO.wav', '1008_IEO_ANG_HI.wav', '1008_IEO_ANG_MD.wav', '1008_IEO_DIS_HI.wav', '1008_IEO_DIS_MD.wav', '1008_IEO_DIS_LO.wav']


In [None]:
import tensorflow as tf
import os

In [None]:
audio_folder_path = '/content/drive/MyDrive/Audio_files/Crema'

In [None]:
audio_files = os.listdir(audio_folder_path)

In [None]:
def load_audio(filename):
    audio_binary = tf.io.read_file(filename)
    audio, sample_rate = tf.audio.decode_wav(audio_binary)
    return audio, sample_rate

In [None]:
audio_file_path = os.path.join(audio_folder_path, audio_files[0])
audio, sample_rate = load_audio(audio_file_path)

In [None]:
print(f"Audio shape: {audio.shape}")
print(f"Sample rate: {sample_rate}")

Audio shape: (51251, 1)
Sample rate: 16000


EXTRACTION OF FEATURES USING MEL-SPECTROGRAM WITH TENSORFLOW:

In [None]:
import os
import librosa
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tqdm import tqdm

In [None]:
audio_folder_path = '/content/drive/MyDrive/Audio_files/Crema'

In [None]:
def extract_mel_features(audio, sample_rate=16000, n_mels=64):  # Reduced n_mels for faster computation
    spectrogram = tf.signal.stft(audio, frame_length=1024, frame_step=512)
    spectrogram = tf.abs(spectrogram)
    mel_filter = tf.signal.linear_to_mel_weight_matrix(num_mel_bins=n_mels, num_spectrogram_bins=spectrogram.shape[-1], sample_rate=sample_rate)
    mel_spectrogram = tf.matmul(spectrogram, mel_filter)
    mel_spectrogram_db = tf.math.log(mel_spectrogram + 1e-6)
    return mel_spectrogram_db.numpy()

In [None]:
features = []
labels = []

In [None]:
audio_files = os.listdir(audio_folder_path)

In [None]:
for file in tqdm(audio_files):
    try:

        audio_path = os.path.join(audio_folder_path, file)
        audio, sr = librosa.load(audio_path, sr=16000)


        mel_features = extract_mel_features(audio)
        mel_features_flat = mel_features.flatten()
        features.append(mel_features_flat)


        labels.append(1 if "positive" in file else 0)
    except Exception as e:
        print(f"Error processing file {file}: {e}")

100%|██████████| 7442/7442 [06:58<00:00, 17.79it/s]


TRAINING,TESTING AND SPLITTING OF DATASET:

In [None]:
import tensorflow as tf
FIXED_SIZE = 128 * 64

In [None]:
def pad_or_truncate(features, target_size=FIXED_SIZE):
    if len(features) > target_size:
        return features[:target_size]
    else:
        return np.pad(features, (0, target_size - len(features)), mode='constant')

In [None]:
features_padded = [pad_or_truncate(feature.flatten(), FIXED_SIZE) for feature in features]

In [None]:
X = np.array(features_padded)
y = np.array(labels)

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

IMPLEMENTATION OF CONVOLUTION NEURAL NETWORKS-ML MODEL:

In [None]:
FIXED_SHAPE = (128, 64)
model = models.Sequential([
    layers.InputLayer(shape=(FIXED_SHAPE[0], FIXED_SHAPE[1], 1)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


X_train = X_train.reshape(-1, FIXED_SHAPE[0], FIXED_SHAPE[1], 1)
X_test = X_test.reshape(-1, FIXED_SHAPE[0], FIXED_SHAPE[1], 1)

model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test))

Epoch 1/5
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 429ms/step - accuracy: 0.9890 - loss: 0.0335 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 2/5
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 447ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 3/5
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 536ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 4/5
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 464ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 5/5
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 451ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00


<keras.src.callbacks.history.History at 0x7eed5edb1a20>

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)

[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 93ms/step - accuracy: 1.0000 - loss: 0.0000e+00


In [None]:
print(f'Test Loss: {loss:.4f}')
print(f'Test Accuracy: {accuracy:.4f}')


Test Loss: 0.0000
Test Accuracy: 1.0000


In [None]:
y_pred = model.predict(X_test)

[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 76ms/step


In [None]:
y_pred_binary = (y_pred > 0.5).astype(int)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_binary))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1489

    accuracy                           1.00      1489
   macro avg       1.00      1.00      1.00      1489
weighted avg       1.00      1.00      1.00      1489



In [None]:
model.save('Audio_recognition.keras')


In [None]:
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import RMSprop

In [None]:
model = load_model('Audio_recognition.keras', compile=False)
model.compile(optimizer=RMSprop(), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()