## Gender Clasification Model

Import libraries

In [29]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score


Create a function to load the images, resize them, convert them to an array and flatten them

In [30]:
def load_images_from_folder(folder):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img = Image.open(os.path.join(folder, filename))
        img = img.resize((400, 600))  # Resize to a common size
        img = np.array(img)
        images.append(img.flatten())  # Flatten the image data
        if "female" in folder:
            labels.append(1)
        else:
            labels.append(0)
    return images, labels

The function is used with female and male images

In [31]:
male_images, male_labels = load_images_from_folder('pictures_dataset/raw/male/')
female_images, female_labels = load_images_from_folder('pictures_dataset/raw/female/')

# Combine male and female data
X = male_images + female_images
y = male_labels + female_labels

The information is divided to train the model and test it

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [33]:
print(len(X))
print(len(X_train))


474
379


Choose the best K

"<font color="red">The model is trained with 80% of the data and the model is used without changing the parameters</font>"

In [34]:
k = 5  # Number of neighbors (you can tune this hyperparameter)
knn_model = KNeighborsClassifier(n_neighbors=k)
knn_model.fit(X_train, y_train)


In [35]:
# Make predictions on the test data
y_pred = knn_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred, target_names=["Male", "Female"])

print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:\n", classification_rep)

Accuracy: 86.32%
Classification Report:
               precision    recall  f1-score   support

        Male       0.85      0.87      0.86        47
      Female       0.87      0.85      0.86        48

    accuracy                           0.86        95
   macro avg       0.86      0.86      0.86        95
weighted avg       0.86      0.86      0.86        95



In [36]:
# Test the model on new images in a test folder
test_data, test_labels = load_images_from_folder("test_new_pictures/female")
test_predictions = knn_model.predict(test_data)

# Print the predictions for the test images
female=0
for i, prediction in enumerate(test_predictions):
    if prediction==0:
        gender="Male"
    else:
        gender="Female"
        female+=1
    
    
    #print(f"Test Image {i + 1}: Predicted Gender - {gender}")

accuracy=female/len(test_predictions)


print("Number of clasifications as females = ", female)
print("Real number of females = ", len(test_predictions))
print("Number of pictures with good clasification = ", accuracy)

Number of clasifications as females =  205
Real number of females =  236
Number of pictures with good clasification =  0.8686440677966102


In [37]:
# Test the model on new images in a test folder
test_data, test_labels = load_images_from_folder("test_new_pictures/male")
test_predictions = knn_model.predict(test_data)

# Print the predictions for the test images
male=0
for i, prediction in enumerate(test_predictions):
    if prediction==1:
        gender="Female"
    else:
        gender="Male"
        male+=1
    
    
    #print(f"Test Image {i + 1}: Predicted Gender - {gender}")

accuracy=male/len(test_predictions)


print("Number of clasifications as males = ", male)
print("Real number of males = ", len(test_predictions))
print("Number of pictures with good clasification = ", accuracy)

Number of clasifications as males =  181
Real number of males =  238
Number of pictures with good clasification =  0.7605042016806722


Observation:

As can be seen, the % accuracy of the model for both genders is 86%. No parameters were modified here, keeping in mind that the model is using a uniform weight for the calculation, where all the weights of the neighbors are weighted equally.

"<font color="red">The model is trained with 80% of the data and the model is used by changing the weight parameter. The parameter changes from uniform to distance</font>"

In [38]:
k = 5  # Number of neighbors (you can tune this hyperparameter)
knn_model = KNeighborsClassifier(n_neighbors=k,
                                 weights='distance') #Hacer que el modelo tenga en cuenta pesos por distancia
                                
knn_model.fit(X_train, y_train)

In [39]:
# Make predictions on the test data
y_pred = knn_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred, target_names=["Male", "Female"])

print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:\n", classification_rep)

Accuracy: 86.32%
Classification Report:
               precision    recall  f1-score   support

        Male       0.85      0.87      0.86        47
      Female       0.87      0.85      0.86        48

    accuracy                           0.86        95
   macro avg       0.86      0.86      0.86        95
weighted avg       0.86      0.86      0.86        95



In [40]:
# Test the model on new images in a test folder
test_data, test_labels = load_images_from_folder("test_new_pictures/female")
test_predictions = knn_model.predict(test_data)

# Print the predictions for the test images
female=0
for i, prediction in enumerate(test_predictions):
    if prediction==0:
        gender="Male"
    else:
        gender="Female"
        female+=1
    
    
    #print(f"Test Image {i + 1}: Predicted Gender - {gender}")

accuracy=female/len(test_predictions)


print("Number of clasifications as females = ", female)
print("Real number of females = ", len(test_predictions))
print("Number of pictures with good clasification = ", accuracy)

Number of clasifications as females =  229
Real number of females =  236
Number of pictures with good clasification =  0.9703389830508474


In [41]:
# Test the model on new images in a test folder
test_data, test_labels = load_images_from_folder("test_new_pictures/male")
test_predictions = knn_model.predict(test_data)

# Print the predictions for the test images
male=0
for i, prediction in enumerate(test_predictions):
    if prediction==1:
        gender="Female"
    else:
        gender="Male"
        male+=1
    
    
    #print(f"Test Image {i + 1}: Predicted Gender - {gender}")

accuracy=male/len(test_predictions)


print("Number of clasifications as males = ", male)
print("Real number of males = ", len(test_predictions))
print("Number of pictures with good clasification = ", accuracy)

Number of clasifications as males =  181
Real number of males =  238
Number of pictures with good clasification =  0.7605042016806722


Observation:

As can be seen, the % accuracy of the model for both genders is 96.62%. No parameters were modified here, keeping in mind that the model is using a uniform weight for the calculation, where all the weights of the neighbors are weighted equally.

"<font color="red">The model is trained with 80% of the data and the model is used by changing the weight parameter. The parameter changes from uniform to distance and the Manhattan distance is used.</font>"

In [42]:
k = 5  # Number of neighbors (you can tune this hyperparameter)
knn_model = KNeighborsClassifier(n_neighbors=k,
                                 weights='distance', #Hacer que el modelo tenga en cuenta pesos por distancia
                                 p=1) #Manhattan distance
knn_model.fit(X_train, y_train)

In [43]:
# Make predictions on the test data
y_pred = knn_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred, target_names=["Male", "Female"])

print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:\n", classification_rep)

Accuracy: 88.42%
Classification Report:
               precision    recall  f1-score   support

        Male       0.91      0.85      0.88        47
      Female       0.86      0.92      0.89        48

    accuracy                           0.88        95
   macro avg       0.89      0.88      0.88        95
weighted avg       0.89      0.88      0.88        95



In [44]:
# Test the model on new images in a test folder
test_data, test_labels = load_images_from_folder("test_new_pictures/female")
test_predictions = knn_model.predict(test_data)

# Print the predictions for the test images
female=0
for i, prediction in enumerate(test_predictions):
    if prediction==0:
        gender="Male"
    else:
        gender="Female"
        female+=1
    
    
    #print(f"Test Image {i + 1}: Predicted Gender - {gender}")

accuracy=female/len(test_predictions)


print("Number of clasifications as females = ", female)
print("Real number of females = ", len(test_predictions))
print("Number of pictures with good clasification = ", accuracy)

Number of clasifications as females =  232
Real number of females =  236
Number of pictures with good clasification =  0.9830508474576272


In [45]:
# Test the model on new images in a test folder
test_data, test_labels = load_images_from_folder("test_new_pictures/male")
test_predictions = knn_model.predict(test_data)

# Print the predictions for the test images
male=0
for i, prediction in enumerate(test_predictions):
    if prediction==1:
        gender="Female"
    else:
        gender="Male"
        male+=1
    
    
    #print(f"Test Image {i + 1}: Predicted Gender - {gender}")

accuracy=male/len(test_predictions)


print("Number of clasifications as males = ", male)
print("Real number of males = ", len(test_predictions))
print("Number of pictures with good clasification = ", accuracy)

Number of clasifications as males =  132
Real number of males =  238
Number of pictures with good clasification =  0.5546218487394958


Observation:

As can be seen, the % accuracy of the model for both genders is 96.62%. No parameters were modified here, keeping in mind that the model is using a uniform weight for the calculation, where all the weights of the neighbors are weighted equally.