In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
import random

DATASET_PATH = "/kaggle/input/deep911/validation/combi"
LABEL_FILE_PATH = "/kaggle/input/deep911/validation/label_valid.txt"
NUM_CLASSES = 2
SAMPLE_RATE = 16000
DURATION = 5
N_MELS = 128
MAX_TIME_STEPS = 109


In [None]:
def augment_audio(audio):
    augmented_audio = audio.copy()

    shift_range = int(SAMPLE_RATE * 0.5)
    shift = random.randint(-shift_range, shift_range)
    augmented_audio = np.roll(augmented_audio, shift)

  
    if random.random() > 0.5:
        n_steps = random.randint(-2, 2)
        augmented_audio = librosa.effects.pitch_shift(augmented_audio, sr=SAMPLE_RATE, n_steps=n_steps)

   
    noise = np.random.randn(len(audio)) * 0.003
    augmented_audio += noise

   
    if random.random() > 0.5:
        stretch_factor = random.uniform(0.8, 1.2)
        augmented_audio = librosa.effects.time_stretch(augmented_audio, rate=stretch_factor)

    return augmented_audio


In [None]:

labels = {}
with open(LABEL_FILE_PATH, 'r') as label_file:
    lines = label_file.readlines()
for line in lines:
    parts = line.strip().split()
    if len(parts) < 3:
        continue
    file_name = parts[1]
    label = 1 if parts[-1] == "bonafide" else 0
    labels[file_name] = label


In [None]:
X = []
y = []
for file_name, label in labels.items():
    file_path = os.path.join(DATASET_PATH, file_name + ".flac")
    audio, _ = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
    
   
    for a in [audio, augment_audio(audio), augment_audio(audio)]:
        mel_spectrogram = librosa.feature.melspectrogram(y=a, sr=SAMPLE_RATE, n_mels=N_MELS)
        mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
        if mel_spectrogram.shape[1] < MAX_TIME_STEPS:
            mel_spectrogram = np.pad(mel_spectrogram, ((0, 0), (0, MAX_TIME_STEPS - mel_spectrogram.shape[1])), mode='constant')
        else:
            mel_spectrogram = mel_spectrogram[:, :MAX_TIME_STEPS]
        X.append(mel_spectrogram)
        y.append(label)

X = np.array(X)
y = np.array(y)

y_encoded = to_categorical(y, NUM_CLASSES)
split_index = int(0.8 * len(X))
X_train, X_val = X[:split_index], X[split_index:]
y_train, y_val = y_encoded[:split_index], y_encoded[split_index:]


In [None]:
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

In [None]:
#CNN model
input_shape = (N_MELS, MAX_TIME_STEPS, 1)
model_input = Input(shape=input_shape)

x = Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.001))(model_input)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.4)(x)

x = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.001))(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.4)(x)

x = Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.001))(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.5)(x)

x = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.001))(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.5)(x)

x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
model_output = Dense(NUM_CLASSES, activation='softmax')(x)
model = Model(inputs=model_input, outputs=model_output)

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.00005)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

def scheduler(epoch, lr):
    return lr * tf.math.exp(-0.1).numpy() if epoch >= 10 else lr

callback = LearningRateScheduler(scheduler)

early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

history = model.fit(X_train[..., np.newaxis], y_train, 
                    batch_size=16, 
                    epochs=100, 
                    validation_data=(X_val[..., np.newaxis], y_val), 
                    callbacks=[callback, early_stopping])

model.save("/kaggle/working/kk-main-model/kk_main_final.h5")

In [None]:

model.save("/kaggle/working/kk-main-model/kk_main_ksk.h5")


In [None]:
print(f"Length of y_true: {len(y_true)}")
print(f"Length of y_pred_classes: {len(y_pred_classes)}")


In [None]:
# Testing
TEST_DATASET_PATH = "/kaggle/input/deep911/validation/combi"
test_files = os.listdir(TEST_DATASET_PATH)
y_true = np.array([labels[os.path.splitext(file)[0]] for file in test_files if os.path.splitext(file)[0] in labels])

X_test = []
for file_name in test_files:
    file_path = os.path.join(TEST_DATASET_PATH, file_name)
    try:
        audio, _ = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
        mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=SAMPLE_RATE, n_mels=N_MELS)
        mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
        mel_spectrogram = np.pad(mel_spectrogram, ((0, 0), (0, max(0, MAX_TIME_STEPS - mel_spectrogram.shape[1]))), mode='constant')
        X_test.append(mel_spectrogram[:, :MAX_TIME_STEPS] / 80.0)
    except Exception as e:
        print(f"Error loading {file_name}: {e}")
        continue

X_test = np.array(X_test)[..., np.newaxis]

if len(X_test) > 0:
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)


In [None]:
model.summary()


In [None]:

cm = confusion_matrix(y_true, y_pred_classes)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["spoof", "bonafide"])
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()
