In [8]:
from keras.models import load_model, Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Activation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import joblib
import numpy as np
import lightgbm as lgb
from sklearn.metrics import accuracy_score

In [9]:
train_dir = 'C:/Users/DELL/Downloads/New_folder/SEC_Five/dataset/train' 
test_dir  = 'C:/Users/DELL/Downloads/New_folder/SEC_Five/dataset/test'

In [10]:
# Set the input shape
input_shape = (128, 128, 3)
batch_size = 32

In [11]:
# Use ImageDataGenerator for data augmentation
data_generator = ImageDataGenerator(rescale=1./255)

In [12]:
# Load the training images
train_generator = data_generator.flow_from_directory(
    train_dir,
    target_size=(input_shape[0], input_shape[1]),
    batch_size=batch_size,
    class_mode='binary',  # Set to 'binary' for binary classification
    shuffle=False
)

Found 12017 images belonging to 2 classes.


In [13]:
# Load the test images
test_generator = data_generator.flow_from_directory(
    test_dir,
    target_size=(input_shape[0], input_shape[1]),
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False
)

Found 216 images belonging to 2 classes.


In [14]:
# Define the CNN model up to the feature extraction layer
model = Sequential([
    Conv2D(32, (3, 3), input_shape=input_shape),
    Activation('relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3)),
    Activation('relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(64),
    Activation('relu'),
    Dropout(0.5)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
# Extract features from training images 
train_features = model.predict(train_generator)
train_features = train_features.reshape(train_features.shape[0], -1)  # Flatten features
train_labels = train_generator.classes

[1m  1/376[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:07[0m 179ms/step

  self._warn_if_super_not_called()


[1m376/376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 44ms/step


In [16]:
# Extract features from test images 
test_features = model.predict(test_generator)
test_features = test_features.reshape(test_features.shape[0], -1)
test_labels = test_generator.classes

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step


In [17]:
# Use LightGBM with binary classification objective
lgb_classifier = lgb.LGBMClassifier(
    boosting_type='gbdt',
    objective='binary',       # Set for binary classification
    num_leaves=31,
    max_depth=-1,
    learning_rate=0.05,       # Lower learning rate for potential improvement
    n_estimators=200,         # Increase number of trees for better accuracy
    subsample=0.8,            # Use subsampling to prevent overfitting
    colsample_bytree=0.8,
    random_state=42
)
lgb_classifier.fit(train_features, train_labels)

[LightGBM] [Info] Number of positive: 6006, number of negative: 6011
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001742 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11200
[LightGBM] [Info] Number of data points in the train set: 12017, number of used features: 52
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499792 -> initscore=-0.000832
[LightGBM] [Info] Start training from score -0.000832


In [18]:
# Make predictions on the test data
test_predictions = lgb_classifier.predict(test_features)
# Calculate accuracy
test_accuracy = accuracy_score(test_labels, test_predictions)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Test Accuracy: 75.93%


In [19]:
# Save the Keras CNN model as an .h5 file
model.save('cnn_feature_extractor_gender.h5')
# Save the trained LightGBM model
joblib.dump(lgb_classifier, 'lgb_classifier_gender.pkl')
# Load the saved models
cnn_model = load_model('cnn_feature_extractor_gender.h5')
lgb_classifier = joblib.load('lgb_classifier_gender.pkl')



In [20]:
def classify_image(img_path):
    # Load and preprocess the new image
    img = image.load_img(img_path, target_size=(128, 128))  # Resize to match the CNN input
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array /= 255.0  # Normalize to match the training preprocessing

    # Extract features using the CNN model
    features = cnn_model.predict(img_array)
    features = features.reshape(1, -1)  # Flatten to 1D for lightgbm

    # Classify the features with the lightgbm model
    prediction = lgb_classifier.predict(features)
    
    # Interpret the prediction as male or female
    class_labels = {0: 'Female', 1: 'Male'}  # Update based on binary classification
    result = class_labels[prediction[0]]
    return result

# Test the function with a new image
img_path = r'C:/Users/DELL/Downloads/New_folder/SEC_Five/dataset/train/female/113037.jpg'
result = classify_image(img_path)
print(f"The image is classified as: {result}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
The image is classified as: Female
