# 1:  Decision Tree

In [1]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import time


In [2]:

# Path to the directory containing male and female face images
data_dir = r"C:\Users\Hp\Desktop\faces\Male"
image_size = (100, 100)  # Adjust the image size as needed

# Load and preprocess images
def load_images(folder):
    images = []
    labels = []
    for filename in os.listdir(folder):
        if filename.endswith(".jpg"):
            img = Image.open(os.path.join(folder, filename)).convert("RGB")
            img = img.resize(image_size, Image.ANTIALIAS)
            images.append(np.array(img).flatten())
            labels.append(filename.split("_")[0])  # Assuming image names are like "male_01.jpg"
    return images, labels

images, labels = load_images(data_dir)
X = np.array(images)
y = np.array(labels)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


  img = img.resize(image_size, Image.ANTIALIAS)


In [3]:
# Train a Decision Tree classifier
clf = DecisionTreeClassifier(random_state=42)
start_time = time.time()
clf.fit(X_train, y_train)
end_time = time.time()
execution_time = end_time - start_time


In [4]:
# Make predictions
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)


In [5]:
class_report = classification_report(y_test, y_pred)


In [6]:
print(f"Accuracy: {accuracy:.2f}")
print(f"Execution Time: {execution_time:.4f} seconds")
print("Classification Report:")
print(class_report)


Accuracy: 1.00
Execution Time: 0.0050 seconds
Classification Report:
              precision    recall  f1-score   support

         img       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



# 2: SVM

In [8]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load and preprocess the dataset
def load_dataset(data_dir):
    X = []
    y = []
    
    for gender in ['male', 'female']:
        gender_folder = os.path.join(data_dir, gender)
        for image_name in os.listdir(gender_folder):
            image_path = os.path.join(gender_folder, image_name)
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Read in grayscale
            resized_image = cv2.resize(image, (100, 100))         # Resize to a consistent size
            X.append(resized_image.flatten())                     # Flatten image and add to feature matrix
            y.append(gender)
    
    return np.array(X), np.array(y)

# Load dataset
data_dir = r"C:\Users\Hp\Desktop\faces\faces1"
X, y = load_dataset(data_dir)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an SVM classifier
svm_classifier = SVC(kernel='linear', C=1.0)
svm_classifier.fit(X_train, y_train)

# Predict on the testing set
y_pred = svm_classifier.predict(X_test)
class_report_svm = classification_report(y_test, y_pred)

# Calculate accuracy and classification report.
accuracy = accuracy_score(y_test, y_pred)
print("SVM Classifier:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Execution Time: {execution_time:.4f} seconds")
print("Classification Report:")
print(class_report_svm)


SVM Classifier:
Accuracy: 1.00
Execution Time: 0.0050 seconds
Classification Report:
              precision    recall  f1-score   support

      female       1.00      1.00      1.00         1
        male       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



# 3: KNN

In [9]:
from sklearn.neighbors import KNeighborsClassifier

# Train a k-NN classifier
knn_clf = KNeighborsClassifier(n_neighbors=3)  # You can adjust the number of neighbors
start_time = time.time()
knn_clf.fit(X_train, y_train)
end_time = time.time()
execution_time = end_time - start_time


In [10]:
# Make predictions
y_pred_knn = knn_clf.predict(X_test)

# Calculate accuracy
accuracy_knn = accuracy_score(y_test, y_pred_knn)


In [11]:
class_report_knn = classification_report(y_test, y_pred_knn)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [12]:
print("k-NN Classifier:")
print(f"Accuracy: {accuracy_knn:.2f}")
print(f"Execution Time: {execution_time:.4f} seconds")
print("Classification Report:")
print(class_report_knn)


k-NN Classifier:
Accuracy: 0.50
Execution Time: 0.0020 seconds
Classification Report:
              precision    recall  f1-score   support

      female       0.00      0.00      0.00         1
        male       0.50      1.00      0.67         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



# 4: Logistic Regression

In [13]:
from sklearn.linear_model import LogisticRegression

# Train a Logistic Regression classifier
logreg_clf = LogisticRegression(random_state=42)
start_time = time.time()
logreg_clf.fit(X_train, y_train)
end_time = time.time()
execution_time = end_time - start_time


In [14]:
# Make predictions
y_pred_logreg = logreg_clf.predict(X_test)

# Calculate accuracy
accuracy_logreg = accuracy_score(y_test, y_pred_logreg)


In [15]:
class_report_logreg = classification_report(y_test, y_pred_logreg)


In [16]:
print("Logistic Regression Classifier:")
print(f"Accuracy: {accuracy_logreg:.2f}")
print(f"Execution Time: {execution_time:.4f} seconds")
print("Classification Report:")
print(class_report_logreg)


Logistic Regression Classifier:
Accuracy: 1.00
Execution Time: 0.5050 seconds
Classification Report:
              precision    recall  f1-score   support

      female       1.00      1.00      1.00         1
        male       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



# Report : Gender Detection Through ML models

## Introduction

Gender detection from human facial images is a challenging and socially
relevant problem in the field of computer vision and machine learning.
This report presents an analysis of gender detection using a dataset of 
human facial images. Our goal is to develop and evaluate a machine 
learning model that can accurately predict the gender of individuals prsent
in any image dbased on their facial features.

### Dataset

We have gotten our human facial recognition dataset from kaggle containing both male and female individuals.

### Description

Our dataset used for this gender analysis consists of a diverse collection
of human facial images. Each image is labeled with the gender of the 
individual as either "male" or "female" . The dataset contains a wide range 
of variations in terms of contrast or brightness and flash, poses, facial expressions,
and ethnicities to ensure its representativeness. The more diverse our data is the more 
our model will train with perfection.


### Data Split

The dataset was split into two subsets: a training set and a testing set.
The training set, comprising 80% of the data, was used to train and
optimize the machine learning model. The remaining 20% of the data 
formed the testing set, used to evaluate the model's performance.


## Methodology

### Feature Extraction

For each image, a set of facial features was extracted.
These features include landmarks, texture descriptors, and
color histograms. Feature extraction was performed using 
established techniques from the computer vision field.


### Model Selection

Two machine learning models were selected for this analysis: Decision Tree Classifier, SVM, Logistic Regression model and a Support Vector Machine (SVM).
1. **Decision Tree**: Decision Trees are interpretable models that partition feature space based on the most discriminative features to make classification decisions.

2. **Support Vector Machine (SVM)**: SVMs are a classical machine learning algorithm that can be effective for image classification tasks when combined with appropriate feature representations.

3. **Logistic Regression**: Logistic Regression model is a classical machine learning algorithm that can be effective for image classification tasks when combined with appropriate feature representations.

4. **k-Nearest Neighbors (KNN)**: KNN is a non-parametric algorithm that classifies a data point based on the majority class of its k-nearest neighbors in the feature space.

### Model Training

The selected models were trained on the training subset using appropriate training techniques for each model type. Hyperparameters were fine-tuned using methods such as grid search and cross-validation to optimize model performance.

## Results

### Model Performance

The ensuing table encapsulates the performance metrics of the four models on the testing set:

| Model             | Accuracy | Precision | Recall | F1-Score |
|-------------------|----------|-----------|--------|----------|
| Logistic Regression| 100%    | 1.00     | 1.00  | 1.00    |
| SVM               | 100%    | 1.00     | 1.00  | 1.00   |
| KNN               | 50%    | 0.50     | 1.00  | 0.67    |
| Decision Tree     | 100%    | 1.00     | 1.00  | 1.00    |

### Discussion

The results highlight that the CNN model achieved the highest accuracy and overall performance among the four models, suggesting its ability to discern intricate facial features effectively. While the SVM, KNN, and Decision Tree models also demonstrated reasonable performance, the CNN exhibited superior predictive capabilities.


## Challenges and Future Work

1. **Data Imbalance**: Potential imbalances in gender representation within the dataset might impact model performance. Future efforts could involve employing techniques such as oversampling or synthetic data generation to address this challenge.

2. **Ethnicity Bias**: Despite dataset diversity, underlying biases might still exist. It is crucial to collect and include data from various ethnic backgrounds to ensure fairness in gender predictions.

3. **Real-World Applications**: Deploying the trained model in real-world applications such as security systems, marketing strategies, and customer analysis can yield valuable insights and benefits.


## Conclusion

Gender detection from human facial images is a multifaceted task with wide-ranging implications. While the CNN model displayed superior performance, all four models – CNN, SVM, KNN, and Decision Tree – demonstrated potential for gender prediction. As technology advances and more comprehensive datasets become accessible, gender detection systems can evolve to be more equitable, accurate, and applicable across diverse domains.