In [1]:
import os
import cv2
import csv
import numpy as np
import pandas as pd
from skimage.feature import local_binary_pattern
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.fft import fft2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Data Preprocessing

In [None]:
import os
import cv2
import pandas as pd

def extract_frames(video_path, folder_name, output_dir):
    cap = cv2.VideoCapture(video_path)
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    middle_frame = length // 2
    cap.set(cv2.CAP_PROP_POS_FRAMES, middle_frame)
    ret, frame = cap.read()

    if ret:
      frame_name = f"{folder_name}_{video_name}_frame.jpg"
      frame_path = os.path.join(output_dir, frame_name)
      cv2.imwrite(frame_path, frame)

    cap.release()
    return frame_path

def preprocess_dataset(data_dir, output_dir, label_file):
    all_frame_paths = []
    all_labels = []
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for folder in os.listdir(data_dir):
        folder_path = os.path.join(data_dir, folder)
        if os.path.isdir(folder_path):
            for video in os.listdir(folder_path):
                video_path = os.path.join(folder_path, video)
                if video.endswith('.avi'):
                    if video in ['1.avi', '2.avi', 'HR_1.avi', 'HR_4.avi']:
                        label = 1  # live
                    else:
                        label = 0  # spoofed
                    frame = extract_frames(video_path, folder, output_dir)
                    all_frame_paths.append(frame)
                    all_labels.append(label)

    # Save labels to a CSV file
    label_df = pd.DataFrame({
        'frame_path': all_frame_paths,
        'label': all_labels
    })
    label_df.to_csv(label_file, index=False)


In [None]:
data_dir = '/content/drive/My Drive/train_release'
output_dir = '/content/drive/My Drive/frames'
label_file = '/content/drive/My Drive/labels.csv'

preprocess_dataset(data_dir, output_dir, label_file)
print("Preprocessing complete. Frames and labels are saved.")

Preprocessing complete. Frames and labels are saved.


# feature extraction model

In [118]:
import cv2
import numpy as np
import pandas as pd
from skimage.feature import local_binary_pattern
from numpy.fft import fft2
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import csv
import tensorflow as tf
from sklearn import svm
import joblib

def extract_hsv_histograms(img):
    img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(img_hsv)

    return s.flatten()

def extract_lbp_features(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp = local_binary_pattern(gray, P=8, R=1, method='uniform')
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, 11), range=(0, 10))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist

def extract_frequency_features(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    f_transform = fft2(gray)
    f_transform = np.abs(f_transform)
    return f_transform.flatten()

def load_and_extract_features_in_batches(label_file, batch_size=32, target_size=(224, 224)):
    data = pd.read_csv(label_file)
    features = []
    labels = []
    filenames = []
    total_samples = len(data)
    num_batches = (total_samples + batch_size - 1) // batch_size

    for batch in range(num_batches):
        batch_data = data.iloc[batch * batch_size:(batch + 1) * batch_size]
        for _, row in batch_data.iterrows():
            frame_path = row['frame_path']
            label = row['label']
            try:
                frame = cv2.imread(frame_path)
                if frame is None:
                    print(f"Warning: Could not read frame {frame_path}")
                    continue
                frame = cv2.resize(frame, target_size)

                hist_features = extract_hsv_histograms(frame)
                lbp_features = extract_lbp_features(frame)
                #freq_features = extract_frequency_features(frame)
                combined_features = np.concatenate((hist_features, lbp_features))
                features.append(combined_features)
                labels.append(label)
                filenames.append(frame_path)
            except Exception as e:
                print(f"Error processing frame {frame_path}: {e}")
    return np.array(features), np.array(labels), filenames

# Load and extract features in batches
label_file = '/content/drive/My Drive/labels.csv'
X_features, y_features, filenames = load_and_extract_features_in_batches(label_file)

# Check if features were extracted successfully
if len(X_features) == 0 or len(y_features) == 0:
    raise ValueError("No features extracted. Check the data and preprocessing steps.")

# Split the data
X_train_feat, X_test_feat, y_train_feat, y_test_feat, train_filenames, test_filenames = train_test_split(
    X_features, y_features, filenames, test_size=0.2, random_state=42
)

# Train a Random Forest classifier
clf = svm.SVC()
clf.fit(X_train_feat, y_train_feat)

# Save the trained model
joblib.dump(clf, '/content/drive/My Drive/svm.joblib')

# Predict and evaluate
y_pred_feat = clf.predict(X_test_feat)
print("Feature extraction method accuracy:", accuracy_score(y_test_feat, y_pred_feat))

Feature extraction method accuracy: 0.9083333333333333


In [117]:
# Load the saved model
import joblib
clf_model = joblib.load('/content/drive/My Drive/svm.joblib')

# Deep model

In [None]:
import cv2
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Flatten
from tensorflow.keras.applications import MobileNetV2, VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# Build MobileFaceNet model
def build_mobilefacenet_model(input_shape):
    base_model = MobileNetV2(input_shape=input_shape, include_top=False, weights='imagenet')
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    predictions = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    for layer in base_model.layers:
        layer.trainable = False
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Data generator
def data_generator_for_dl(data, batch_size=32, target_size=(224, 224)):
    datagen = ImageDataGenerator(rescale=1./255)

    while True:
        for start in range(0, len(data), batch_size):
            end = min(start + batch_size, len(data))
            batch_data = data.iloc[start:end]
            batch_images = []
            batch_labels = []
            for _, row in batch_data.iterrows():
                frame_path = row['frame_path']
                label = row['label']
                image = cv2.imread(frame_path)
                if image is not None:
                    image = cv2.resize(image, target_size)
                    image = image / 255.0
                    batch_images.append(image)
                    batch_labels.append(label)
            yield np.array(batch_images), np.array(batch_labels)

# Load and split the data
label_file = '/content/drive/My Drive/labels.csv'
#train_data, test_data = train_test_split(pd.read_csv(label_file), test_size=0.2, random_state=42)
train_data = pd.read_csv(label_file)

# Generators for training and validation
batch_size = 32
train_gen = data_generator_for_dl(train_data, batch_size=batch_size)
#val_gen = data_generator_for_dl(test_data, batch_size=batch_size)

# Build model
input_shape = (224, 224, 3)
mobilefacenet_model = build_mobilefacenet_model(input_shape)

# Train models
epochs = 7
steps_per_epoch = len(train_data) // batch_size
#validation_steps = len(test_data) // batch_size

mobilefacenet_model.fit(train_gen, steps_per_epoch=steps_per_epoch, epochs=epochs)#, validation_data=val_gen, validation_steps=validation_steps)

# Save model
mobilefacenet_model.save('/content/drive/My Drive/mobilefacenet_model2.h5')

# Evaluate the model
# def evaluate_model(model, data, batch_size=32, target_size=(224, 224)):
#     gen = data_generator_for_dl(data, batch_size, target_size)
#     steps = len(data) // batch_size
#     return model.evaluate(gen, steps=steps)

# mobilefacenet_acc = evaluate_model(mobilefacenet_model, test_data)

# print("MobileFaceNet Test Accuracy:", mobilefacenet_acc)

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


  saving_api.save_model(


In [5]:
# load a model from file
mobilefacenet_model = tf.keras.models.load_model('/content/drive/My Drive/mobilefacenet_model.h5')

# Test deep

In [6]:
def read_dataset(file_path):
    with open(file_path, 'r') as file:
        video_paths = [line.strip() for line in file]
    return video_paths

In [7]:
# Function to extract the middle frame of a video
def extract_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    middle_frame = length // 2
    cap.set(cv2.CAP_PROP_POS_FRAMES, middle_frame)
    ret, frame = cap.read()
    cap.release()
    if ret:
        return frame
    else:
        return None

def preprocess_frame(frame):
    frame = cv2.resize(frame, (224, 224))
    frame = preprocess_input(frame)
    return frame

In [8]:
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

def calculate_liveness_score(frame):
    preprocessed_frame = preprocess_frame(frame)
    preprocessed_frame = np.expand_dims(preprocessed_frame, axis=0)
    preds = mobilefacenet_model.predict(preprocessed_frame)
    # Assuming the liveness score can be inferred from the prediction
    liveness_score = np.max(preds)
    return liveness_score

In [9]:
pip install face_recognition

Collecting face_recognition
  Downloading face_recognition-1.3.0-py2.py3-none-any.whl (15 kB)
Collecting face-recognition-models>=0.3.0 (from face_recognition)
  Downloading face_recognition_models-0.3.0.tar.gz (100.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.1/100.1 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: face-recognition-models
  Building wheel for face-recognition-models (setup.py) ... [?25l[?25hdone
  Created wheel for face-recognition-models: filename=face_recognition_models-0.3.0-py2.py3-none-any.whl size=100566170 sha256=3f5eb215bed4b89ef8d3b8361cce28bee154c51f699262f9e3f4b70ec408f85b
  Stored in directory: /root/.cache/pip/wheels/7a/eb/cf/e9eced74122b679557f597bb7c8e4c739cfcac526db1fd523d
Successfully built face-recognition-models
Installing collected packages: face-recognition-models, face_recognition
Successfully installed face-recognition-m

In [11]:
import face_recognition


def recognize_face(frame,file_name):
    # Convert the frame to RGB (OpenCV uses BGR by default)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Find all face locations and face encodings in the frame
    face_locations = face_recognition.face_locations(rgb_frame)
    face_encodings = face_recognition.face_encodings(rgb_frame, face_locations)

    for i, face_location in enumerate(face_locations):
        # Extract the face location
        top, right, bottom, left = face_location

        # Add a margin to the face location
        # margin = 30
        # top = max(0, top - margin)
        # right = min(frame.shape[1], right + margin)
        # bottom = min(frame.shape[0], bottom + margin)
        # left = max(0, left - margin)
        top = max(0, top - 250)
        right = min(frame.shape[1], right + 40)
        bottom = min(frame.shape[0], bottom + 40)
        left = max(0, left - 40)
        # Crop the face from the frame
        face_image = frame[top:bottom, left:right]
        # print(file_name)
        full_path = f'/content/recog/{file_name}.jpg'
        #print(full_path)
        cv2.imwrite(full_path, face_image)
        return face_image

In [12]:
import cv2
import numpy as np

def extract_frequency_features2(image, target_size=(224, 224)):

    # Convert to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply 2D Fourier Transform
    dft = cv2.dft(np.float32(gray_image), flags=cv2.DFT_COMPLEX_OUTPUT)

    # Shift the zero-frequency component
    dft_shift = np.fft.fftshift(dft)

    # Calculate the magnitude spectrum
    magnitude_spectrum = 20 * np.log(cv2.magnitude(dft_shift[:, :, 0], dft_shift[:, :, 1]))

    # Resize the magnitude spectrum
    resized_magnitude_spectrum = cv2.resize(magnitude_spectrum, target_size)

    # Create a 3-channel image from the magnitude spectrum
    frequency_features = np.stack((resized_magnitude_spectrum, resized_magnitude_spectrum, resized_magnitude_spectrum), axis=-1)

    return frequency_features

In [13]:
import os
video_paths = read_dataset('/content/drive/My Drive/dataset.txt')
result = pd.DataFrame(columns=['filename','liveness_score','liveness_score_crop','liveness_score_frequency'])
for video_path in video_paths:
  parts = video_path.split('/')
  filename = parts[-1].split('.')[0]
  frame = extract_frame(video_path)
  if frame is not None:
    liveness_score = calculate_liveness_score(frame)
    face = recognize_face(frame,filename)
    if face is not None:
      liveness_score_crop = calculate_liveness_score(face)
    else:
      liveness_score_crop = 0
    frequency_features = extract_frequency_features2(frame)
    if frequency_features is not None:
      liveness_score_frequency = calculate_liveness_score(frequency_features)
    else:
      liveness_score_frequency = 0

    row = pd.DataFrame([{'filename': filename, 'liveness_score': liveness_score, 'liveness_score_crop': liveness_score_crop, 'liveness_score_frequency': liveness_score_frequency}])
    result = pd.concat([result, row], ignore_index=True)



In [14]:
result.to_csv('/content/drive/My Drive/prediction_deep.csv', index=False)

# Test feature

In [112]:
import cv2
import numpy as np
import pandas as pd
import os
from skimage.feature import local_binary_pattern
from numpy.fft import fft2
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import tensorflow as tf
import joblib

# Function to preprocess frame for RandomForest model
def preprocess_frame_rf(frame):
    frame = cv2.resize(frame, (224, 224))
    hist_features = extract_hsv_histograms(frame)
    lbp_features = extract_lbp_features(frame)
    #freq_features = extract_frequency_features(frame)
    combined_features = np.concatenate((hist_features, lbp_features))
    return combined_features.reshape(1, -1)

# Function to calculate liveness score
def calculate_liveness_score2(frame):
    frame = preprocess_frame_rf(frame)
    preds = clf_model.predict(frame)
    liveness_score = preds[0]
    return liveness_score

# Read video paths
video_paths = read_dataset('/content/drive/My Drive/dataset.txt')

result = pd.DataFrame(columns=['filename','liveness_score','liveness_score_crop','liveness_score_frequency'])

for video_path in video_paths:
    parts = video_path.split('/')
    filename = parts[-1].split('.')[0]
    frame = extract_frame(video_path)
    if frame is not None:
        face = recognize_face(frame, filename)
        frequency_features = extract_frequency_features2(frame)

        liveness_score = calculate_liveness_score2(frame)
        if face is not None:
            liveness_score_crop = calculate_liveness_score2(face)
        else:
            liveness_score_crop = 0
        if frequency_features is not None:
            liveness_score_frequency = calculate_liveness_score2(frequency_features)
        else:
            liveness_score_frequency = 0

        row = pd.DataFrame([{
            'filename': filename,
            'liveness_score': liveness_score,
            'liveness_score_crop': liveness_score_crop,
            'liveness_score_frequency': liveness_score_frequency,
        }])
        result = pd.concat([result, row], ignore_index=True)

In [113]:
print(result)

       filename liveness_score liveness_score_crop liveness_score_frequency
0   anti-spoof1              1                   1                        1
1   anti-spoof2              0                   1                        1
2   anti-spoof3              0                   0                        1
3   anti-spoof4              0                   0                        1
4   anti-spoof5              0                   0                        1
5   anti-spoof6              1                   1                        1
6   anti-spoof7              1                   1                        1
7   anti-spoof8              1                   0                        1
8   anti-spoof9              1                   1                        1
9        spoof1              0                   1                        1
10       spoof2              0                   0                        1
11       spoof3              0                   0                        1
12       spo

In [115]:
result.to_csv('/content/drive/My Drive/prediction_feature.csv', index=False)