In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.feature import hog, local_binary_pattern, graycomatrix, graycoprops
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.applications import VGG19
from tensorflow.keras.applications.vgg19 import preprocess_input
import seaborn as sns
import zipfile
import random

In [2]:
# Step 1: Data Preparation
def unzip_files():
    for zip_file in ['male1.zip', 'female.zip']:
        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
            zip_ref.extractall()

def augment_image(image):
    # Flip horizontally
    flipped = cv2.flip(image, 1)
    # Rotate 90 degrees
    rotated = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
    # Random brightness and contrast
    alpha = 0.8 + random.uniform(0, 0.4)  # Contrast control (0.8-1.2)
    beta = random.randint(-20, 20)  # Brightness control
    adjusted = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
    return [image, flipped, rotated, adjusted]


In [None]:
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))  # Resize for VGG19
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Histogram equalization
    gray = cv2.equalizeHist(gray)
    # Contrast normalization
    gray = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)
    # Convert back to color for VGG19
    image = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
    image = preprocess_input(image)  # For VGG19
    return image, gray

def load_data():
    images, grays, labels = [], [], []
    # Count original images in each folder
    male_count = len([f for f in os.listdir('male1') if f.endswith(('.jpg', '.png'))])
    female_count = len([f for f in os.listdir('female') if f.endswith(('.jpg', '.png'))])
    max_samples = min(male_count, female_count) * 4  # Max samples after augmentation (4 per image)

    for folder, label in [('male1', 0), ('female', 1)]:
        sample_count = 0
        for filename in os.listdir(folder):
            if filename.endswith('.jpg') or filename.endswith('.png'):
                if sample_count >= max_samples // 4:  # Limit to max original images
                    break
                img_path = os.path.join(folder, filename)
                image, gray = preprocess_image(img_path)
                # Augment images
                augmented_images = augment_image(image)
                augmented_grays = augment_image(gray)
                images.extend(augmented_images)
                grays.extend(augmented_grays)
                labels.extend([label] * len(augmented_images))
                sample_count += 1
    return np.array(images), np.array(grays), np.array(labels)

# Step 2: Feature Engineering
def extract_hog_features(gray_images):
    hog_features = []
    for gray in gray_images:
        features, hog_image = hog(gray, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True)
        hog_features.append(features)
    return np.array(hog_features), hog_image

def extract_lbp_features(gray_images):
    lbp_features = []
    lbp_image = None
    for i, gray in enumerate(gray_images):
        lbp = local_binary_pattern(gray, P=8, R=1, method='uniform')
        hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 11), density=True)
        lbp_features.append(hist)
        if i == 0:  # Save the first LBP for visualization
            lbp_image = lbp
    return np.array(lbp_features), lbp_image

def extract_glcm_features(gray_images):
    glcm_features = []
    glcm_sample = None
    for i, gray in enumerate(gray_images):
        glcm = graycomatrix(gray, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
        contrast = graycoprops(glcm, 'contrast')[0, 0]
        dissimilarity = graycoprops(glcm, 'dissimilarity')[0, 0]
        homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
        glcm_features.append([contrast, dissimilarity, homogeneity])
        if i == 0:  # Save the first GLCM features for visualization
            glcm_sample = [contrast, dissimilarity, homogeneity]
    return np.array(glcm_features), glcm_sample

def visualize_features(hog_image, lbp_image, glcm_sample):
    # Visualize HOG
    plt.figure(figsize=(6, 4))
    plt.imshow(hog_image, cmap='gray')
    plt.title('HOG Visualization')
    plt.axis('off')
    plt.savefig('hog_visualization.png')
    plt.close()

    # Visualize LBP
    plt.figure(figsize=(6, 4))
    plt.imshow(lbp_image, cmap='gray')
    plt.title('LBP Visualization')
    plt.axis('off')
    plt.savefig('lbp_visualization.png')
    plt.close()

    # Visualize GLCM features as a bar chart
    plt.figure(figsize=(6, 4))
    glcm_labels = ['Contrast', 'Dissimilarity', 'Homogeneity']
    plt.bar(glcm_labels, glcm_sample, color='skyblue')
    plt.title('GLCM Features Visualization')
    plt.ylabel('Value')
    plt.savefig('glcm_visualization.png')
    plt.close()

def extract_vgg19_features(images):
    # Load VGG19 with fully connected layers (include_top=True)
    model = VGG19(weights='imagenet', include_top=True, input_shape=(224, 224, 3))
    # Create models for fc2 and predictions layers
    fc2_model = tf.keras.Model(inputs=model.input, outputs=model.get_layer('fc2').output)
    pred_model = tf.keras.Model(inputs=model.input, outputs=model.get_layer('predictions').output)
    fc2_features = fc2_model.predict(images, batch_size=32)# Extract features
    pred_features = pred_model.predict(images, batch_size=32)
    vgg_features = np.concatenate((fc2_features, pred_features), axis=1) # Concatenate features
    return vgg_features

# Step 3: Feature Fusion and Dimensionality Reduction
def feature_fusion(hog, lbp, glcm, vgg19):
    # Serial-based fusion
    low_level = np.hstack((hog, lbp, glcm))
    fused = np.hstack((low_level, vgg19))
    # Standardize features
    scaler = StandardScaler()
    fused = scaler.fit_transform(fused)
    # PCA
    pca = PCA(n_components=0.95)  # Retain 95% variance
    fused_reduced = pca.fit_transform(fused)
    # Visualize PCA explained variance
    plt.figure(figsize=(8, 4))
    plt.bar(range(len(pca.explained_variance_ratio_)), pca.explained_variance_ratio_, color='skyblue')
    plt.title('PCA Explained Variance Ratio')
    plt.xlabel('Principal Component')
    plt.ylabel('Explained Variance Ratio')
    plt.savefig('pca_variance.png')
    plt.close()
    return fused_reduced

# Step 4 & 5: Classification and Evaluation
def classify_and_evaluate(features, labels):
    # Use class_weight='balanced' to handle class imbalance
    svm = LinearSVC(max_iter=10000, class_weight='balanced')
    kf = KFold(n_splits=10, shuffle=True, random_state=42)

    accuracies, precisions, recalls, f1s = [], [], [], []
    all_y_true, all_y_pred = [], []

    for train_idx, test_idx in kf.split(features):
        X_train, X_test = features[train_idx], features[test_idx]
        y_train, y_test = labels[train_idx], labels[test_idx]

        svm.fit(X_train, y_train)
        y_pred = svm.predict(X_test)

        accuracies.append(accuracy_score(y_test, y_pred))
        precisions.append(precision_score(y_test, y_pred, average='binary'))
        recalls.append(recall_score(y_test, y_pred, average='binary'))
        f1s.append(f1_score(y_test, y_pred, average='binary'))

        all_y_true.extend(y_test)
        all_y_pred.extend(y_pred)

    # Print metrics
    print(f"Accuracy: {np.mean(accuracies):.4f} ± {np.std(accuracies):.4f}")
    print(f"Precision: {np.mean(precisions):.4f} ± {np.std(precisions):.4f}")
    print(f"Recall: {np.mean(recalls):.4f} ± {np.std(recalls):.4f}")
    print(f"F1-Score: {np.mean(f1s):.4f} ± {np.std(f1s):.4f}")

    # Confusion Matrix
    cm = confusion_matrix(all_y_true, all_y_pred)
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Male', 'Female'], yticklabels=['Male', 'Female'])
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.savefig('confusion_matrix.png')
    plt.close()

# Main execution
if __name__ == "__main__":
    # Unzip files
    unzip_files()

    # Load and preprocess data
    images, grays, labels = load_data()

    # Extract features and get visualizations for the first image
    hog_features, hog_image = extract_hog_features(grays)
    lbp_features, lbp_image = extract_lbp_features(grays)
    glcm_features, glcm_sample = extract_glcm_features(grays)
    vgg19_features = extract_vgg19_features(images)

    # Visualize HOG, LBP, and GLCM for the first image
    visualize_features(hog_image, lbp_image, glcm_sample)

    # Feature fusion and dimensionality reduction
    fused_features = feature_fusion(hog_features, lbp_features, glcm_features, vgg19_features)

    # Classify and evaluate
    classify_and_evaluate(fused_features, labels)

[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1119s[0m 16s/step
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2183s[0m 30s/step
Accuracy: 0.8476 ± 0.0188
Precision: 0.8536 ± 0.0236
Recall: 0.8373 ± 0.0327
F1-Score: 0.8450 ± 0.0226
