In [1]:
import os
from PIL import Image
import numpy as np
from tensorflow.keras import backend 
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

2025-05-10 18:28:39.336147: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Loading data

In [2]:
dataset= '/home/madan005/dev/deeplearning/MFCC'

train_dataset      = os.path.join(dataset, 'train')
validation_dataset = os.path.join(dataset, 'dev')
test_dataset       = os.path.join(dataset, 'eval')


def load_images_from_folder(folder, label, target_size=(200, 200)):
    
    images = []
    labels = []
    
    for filename in os.listdir(folder):
        filepath = os.path.join(folder, filename)
        image = Image.open(filepath)
        image = image.convert('RGB')
        image = image.resize(target_size, Image.LANCZOS)
        images.append(np.array(image))
        labels.append(label)
    return images, labels

# Initialize lists for train, validation, and test sets
X_train, y_train = [], []
X_val,   y_val   = [], []
X_test,  y_test  = [], []

# training data
for class_name in ['genuine', 'spoof']:
    class_folder = os.path.join(train_dataset, class_name)
    label = 1 if class_name == 'genuine' else 0
    imgs, labels = load_images_from_folder(class_folder, label)
    X_train.extend(imgs)
    y_train.extend(labels)
    
# validation data
for class_name in ['genuine', 'spoof']:
    class_folder = os.path.join(validation_dataset, class_name)
    label = 1 if class_name == 'genuine' else 0
    imgs, labels = load_images_from_folder(class_folder, label)
    X_val.extend(imgs)
    y_val.extend(labels)

# test data
for class_name in ['genuine', 'spoof']:
    class_folder = os.path.join(test_dataset, class_name)
    label = 1 if class_name == 'genuine' else 0
    imgs, labels = load_images_from_folder(class_folder, label)
    X_test.extend(imgs)
    y_test.extend(labels)



# numpy arrays
X_train = np.array(X_train, dtype='float32') / 255.0
y_train = np.array(y_train)
X_val   = np.array(X_val,   dtype='float32') / 255.0
y_val   = np.array(y_val)
X_test  = np.array(X_test,  dtype='float32') / 255.0
y_test  = np.array(y_test)

# shapes
print("Training set:", X_train.shape)
print("Validation set:", X_val.shape)
print("Test set:", X_test.shape)



Training set: (3014, 200, 200, 3)
Validation set: (1710, 200, 200, 3)
Test set: (13306, 200, 200, 3)


class distributions

In [3]:
# class counts 
for split_name, labels in [
    ('Training', y_train),
    ('Validation', y_val),
    ('Test', y_test)
]:
    unique, counts = np.unique(labels, return_counts=True)
    print(f"\n{split_name}:")
    for cls, cnt in zip(unique, counts):
        name = 'genuine' if cls == 1 else 'spoof'
        print(f"  {name}: {cnt}")



Training:
  spoof: 1507
  genuine: 1507

Validation:
  spoof: 950
  genuine: 760

Test:
  spoof: 12008
  genuine: 1298


In [4]:
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)) \
    .shuffle(buffer_size=1000) \
    .batch(BATCH_SIZE) \
    .prefetch(AUTOTUNE)

val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val)) \
    .batch(BATCH_SIZE) \
    .prefetch(AUTOTUNE)

test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)) \
    .batch(BATCH_SIZE) \
    .prefetch(AUTOTUNE)

2025-05-10 18:29:28.183663: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 6386880000 exceeds 10% of free system memory.


In [5]:
backend.clear_session()



from tensorflow.keras import regularizers


img_height, img_width = 200, 200
input_shape = (img_height, img_width, 3)

# model
model = Sequential([
  Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001), input_shape=input_shape), 
  MaxPooling2D(pool_size=(2, 2)),
  BatchNormalization(),
  
  Conv2D(64, (3, 3), activation='relu',  kernel_regularizer=regularizers.l2(0.001)), 
  MaxPooling2D(pool_size=(2, 2)),
  BatchNormalization(),
  
  Conv2D(32, (3, 3), activation='relu',  kernel_regularizer=regularizers.l2(0.001)), 
  MaxPooling2D(pool_size=(2, 2)),
  BatchNormalization(),
  
  Flatten(),
  Dense(256, activation="relu"),
  Dropout(0.2),
  Dense(1, activation="sigmoid")
])

model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='binary_crossentropy', 
              metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])


model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
# Updated training call using the tf.data datasets
history = model.fit(
    train_ds,
    steps_per_epoch=8,
    epochs=30,
    validation_data=val_ds
)


Epoch 1/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2s/step - accuracy: 0.8815 - auc: 0.0000e+00 - loss: 0.3757 - val_accuracy: 0.4450 - val_auc: 0.5045 - val_loss: 0.8291
Epoch 2/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step - accuracy: 1.0000 - auc: 0.0000e+00 - loss: 0.1328 - val_accuracy: 0.4444 - val_auc: 0.4850 - val_loss: 0.8472
Epoch 3/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 3s/step - accuracy: 0.9004 - auc: 0.4990 - loss: 2.6162 - val_accuracy: 0.4444 - val_auc: 0.5289 - val_loss: 0.8608
Epoch 4/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - accuracy: 0.6254 - auc: 0.6130 - loss: 0.9274 - val_accuracy: 0.4444 - val_auc: 0.5370 - val_loss: 0.8622
Epoch 5/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2s/step - accuracy: 0.6520 - auc: 0.6908 - loss: 0.7795 - val_accuracy: 0.4444 - val_auc: 0.5453 - val_loss: 0.8765
Epoch 6/30
[1m8/8[0m [32m━━━━━━━━━━━━━━

2025-05-10 18:32:34.618236: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
  self.gen.throw(typ, value, traceback)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - accuracy: 0.9131 - auc: 0.8880 - loss: 0.4174 - val_accuracy: 0.4444 - val_auc: 0.5558 - val_loss: 1.0648
Epoch 13/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1s/step - accuracy: 0.6467 - auc: 0.0000e+00 - loss: 0.7721 - val_accuracy: 0.4444 - val_auc: 0.5541 - val_loss: 1.1352
Epoch 14/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step - accuracy: 1.0000 - auc: 0.0000e+00 - loss: 0.1533 - val_accuracy: 0.4444 - val_auc: 0.5541 - val_loss: 1.2001
Epoch 15/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1s/step - accuracy: 0.9245 - auc: 0.7194 - loss: 0.5191 - val_accuracy: 0.4444 - val_auc: 0.5574 - val_loss: 1.1905
Epoch 16/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1s/step - accuracy: 0.7250 - auc: 0.7039 - loss: 1.0238 - val_accuracy: 0.4444 - val_auc: 0.5636 - val_loss: 1.1024
Epoch 17/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━

2025-05-10 18:34:47.902965: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - accuracy: 0.8855 - auc: 0.9449 - loss: 0.3981 - val_accuracy: 0.4444 - val_auc: 0.6145 - val_loss: 1.2083
Epoch 25/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step - accuracy: 0.8605 - auc: 0.0000e+00 - loss: 0.4023 - val_accuracy: 0.4444 - val_auc: 0.6121 - val_loss: 1.2060
Epoch 26/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1s/step - accuracy: 1.0000 - auc: 0.0000e+00 - loss: 0.1222 - val_accuracy: 0.4444 - val_auc: 0.6100 - val_loss: 1.2217
Epoch 27/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step - accuracy: 0.9227 - auc: 0.6488 - loss: 0.5849 - val_accuracy: 0.4444 - val_auc: 0.6107 - val_loss: 1.1715
Epoch 28/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step - accuracy: 0.7937 - auc: 0.8972 - loss: 0.5505 - val_accuracy: 0.4444 - val_auc: 0.5922 - val_loss: 1.1526
Epoch 29/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━

In [7]:

y_pred_train_raw = model.predict(X_train)
y_pred_val_raw   = model.predict(X_val)
y_pred_test_raw  = model.predict(X_test)

#discrete labels
def to_labels(probs):
    # multi-class?
    if probs.ndim > 1 and probs.shape[1] > 1:
        return probs.argmax(axis=1)
    # binary-probabilities => threshold at 0.5
    return (probs > 0.5).astype(int).ravel()

y_pred_train = to_labels(y_pred_train_raw)
y_pred_val   = to_labels(y_pred_val_raw)
y_pred_test  = to_labels(y_pred_test_raw)

#   class names
target_names = ['spoof', 'genuine']

#Loop through each split
for split_name, y_true, y_pred in [
    ('Train',      y_train, y_pred_train),
    ('Validation', y_val,   y_pred_val),
    ('Test',       y_test,  y_pred_test)
]:
    print(f"\n=== Classification Report — {split_name} ===")
    print(classification_report(y_true, y_pred, target_names=target_names))
    
   


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 76ms/step
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 76ms/step
[1m  1/416[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m46s[0m 111ms/step

2025-05-10 18:36:13.261544: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 6386880000 exceeds 10% of free system memory.


[1m416/416[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 76ms/step

=== Classification Report — Train ===
              precision    recall  f1-score   support

       spoof       0.00      0.00      0.00      1507
     genuine       0.50      1.00      0.67      1507

    accuracy                           0.50      3014
   macro avg       0.25      0.50      0.33      3014
weighted avg       0.25      0.50      0.33      3014


=== Classification Report — Validation ===
              precision    recall  f1-score   support

       spoof       0.00      0.00      0.00       950
     genuine       0.44      1.00      0.62       760

    accuracy                           0.44      1710
   macro avg       0.22      0.50      0.31      1710
weighted avg       0.20      0.44      0.27      1710


=== Classification Report — Test ===
              precision    recall  f1-score   support

       spoof       0.00      0.00      0.00     12008
     genuine       0.10      1.00      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
