# Gender Clasification Model

## Importing libraries

In [21]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

In [22]:
# Step 2: Load and Preprocess the Images
def load_images_from_folder(folder):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img = Image.open(os.path.join(folder, filename))
        img = img.resize((400, 600))  # Resize to a common size
        img = np.array(img)
        images.append(img.flatten())  # Flatten the image data
        if "female" in folder:
            labels.append(1)
        else:
            labels.append(0)
    return images, labels

In [23]:
male_images, male_labels = load_images_from_folder('pictures_dataset/raw/male/')
female_images, female_labels = load_images_from_folder('pictures_dataset/raw/female/')

# Combine male and female data
X = male_images + female_images
y = male_labels + female_labels

In [24]:

# Step 3: Split the Dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [25]:
print(len(X))
print(len(X_train))

474
379


In [26]:

# Step 4: Train the KNN Model
k = 3  # Number of neighbors (you can tune this hyperparameter)
knn_model = KNeighborsClassifier(n_neighbors=k,
                                 weights='distance', #Hacer que el modelo tenga en cuenta pesos por distancia
                                 p=1) #Ditancia euclidiana
knn_model.fit(X_train, y_train)

In [27]:

# Make predictions on the test data
y_pred = knn_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred, target_names=["Male", "Female"])

print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:\n", classification_rep)

Accuracy: 87.37%
Classification Report:
               precision    recall  f1-score   support

        Male       0.87      0.87      0.87        47
      Female       0.88      0.88      0.88        48

    accuracy                           0.87        95
   macro avg       0.87      0.87      0.87        95
weighted avg       0.87      0.87      0.87        95



In [28]:
# Test the model on new images in a test folder
test_data, test_labels = load_images_from_folder("test_new_pictures/female")
test_predictions = knn_model.predict(test_data)

# Print the predictions for the test images
female=0
for i, prediction in enumerate(test_predictions):
    if prediction==0:
        gender="Male"
    else:
        gender="Female"
        female+=1
    
    
    #print(f"Test Image {i + 1}: Predicted Gender - {gender}")

accuracy=female/len(test_predictions)


print("Number of clasifications as females = ", female)
print("Real number of females = ", len(test_predictions))
print("Number of pictures with good clasification = ", accuracy)

Number of clasifications as females =  230
Real number of females =  236
Number of pictures with good clasification =  0.9745762711864406


In [29]:
# Test the model on new images in a test folder
test_data, test_labels = load_images_from_folder("test_new_pictures/male")
test_predictions = knn_model.predict(test_data)

# Print the predictions for the test images
male=0
for i, prediction in enumerate(test_predictions):
    if prediction==1:
        gender="Female"
    else:
        gender="Male"
        male+=1
    
    
    #print(f"Test Image {i + 1}: Predicted Gender - {gender}")

accuracy=female/len(test_predictions)


print("Number of clasifications as males = ", female)
print("Real number of males = ", len(test_predictions))
print("Number of pictures with good clasification = ", accuracy)

Number of clasifications as males =  230
Real number of males =  238
Number of pictures with good clasification =  0.9663865546218487
