# Problema 1
Identificar el genero a partir de un retrato de persona.

In [1]:
import threading
from PIL import Image
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression

In [2]:
female_data = "Female Faces"
male_data = "Male Faces"

target_width = 128
target_height = 128

data_dict = {}

In [3]:
def convert_image_to_tuple(image_path):
    # Load the image
    image = Image.open(image_path).convert("RGB")
    resized_image = image.resize(
        (target_width, target_height), Image.Resampling.LANCZOS
    )
    array = np.array(resized_image).flatten()
    return tuple(array.tolist())


def load_data(data, label):
    for i in os.listdir(data):
        if (
            i.endswith(".jpg") or i.endswith(".png") or i.endswith(".jpeg")
        ):  # Add more extensions if needed
            image_path = os.path.join(data, i)
            array_tuple = convert_image_to_tuple(image_path)
            # Append the resized image to the list
            data_dict[array_tuple] = label

In [4]:
thread1 = threading.Thread(target=load_data, args=(female_data, 0))
thread2 = threading.Thread(target=load_data, args=(male_data, 1))

# Start the threads
thread1.start()
thread2.start()

# Wait for both threads to finish
thread1.join()
thread2.join()



In [5]:
keys = list(data_dict.keys())
values = list(data_dict.values())

In [6]:
X = keys  # The image data (features)
y = values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [7]:
model = LogisticRegression()
model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [8]:
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(report)

Accuracy: 0.6459627329192547
              precision    recall  f1-score   support

           0       0.61      0.63      0.62       146
           1       0.68      0.66      0.67       176

    accuracy                           0.65       322
   macro avg       0.64      0.64      0.64       322
weighted avg       0.65      0.65      0.65       322



In [9]:
data_test_female = "Test_images/Female/"
data_test_male = "Test_images/Male/"
test_female_array = []
test_male_array = []
for i in os.listdir(data_test_female):
    if (
        i.endswith(".jpg") or i.endswith(".png") or i.endswith(".jpeg")
    ):  # Add more extensions if needed
        image_path = os.path.join(data_test_female, i)
        array_tuple = convert_image_to_tuple(image_path)
        test_female_array.append(array_tuple)
for i in os.listdir(data_test_male):
    if (
        i.endswith(".jpg") or i.endswith(".png") or i.endswith(".jpeg")
    ):  # Add more extensions if needed
        image_path = os.path.join(data_test_male, i)
        array_tuple = convert_image_to_tuple(image_path)
        test_male_array.append(array_tuple)

In [10]:
predicted_labels_female = model.predict(test_female_array)
predicted_labels_male = model.predict(test_male_array)

In [11]:
total_female = len(predicted_labels_female)
print(sum(predicted_labels_female) / total_female)

0.07482014388489208


In [12]:
total_male = len(predicted_labels_male)
print(sum(predicted_labels_male) / total_male)

0.8941504178272981


# Usando otras medidas de distancia

In [13]:
def euclidean(a, b):
    a = np.array(a)
    b = np.array(b)
    return np.sqrt(np.exp((a - b)))


def manhattan(a, b):
    a = np.array(a)
    b = np.array(b)
    return np.abs((a - b))


def chebyshov(a, b):
    a = np.array(a)
    b = np.array(b)
    return np.max(np.abs((a - b)))

In [14]:
predicted_labels_female_euclidean = []
predicted_labels_female_manhattan = []
predicted_labels_female_chebyshov = []

for i in test_female_array:
    d1_array = []
    d2_array = []
    d3_array = []
    try:
        for j in X:
            d1 = euclidean(i, j)
            d2 = manhattan(i, j)
            d3 = chebyshov(i, j)
            d1_array.append(d1)
            d2_array.append(d2)
            d3_array.append(d3)
        min_index_d1 = d1_array.index(min(d1_array))
        min_index_d2 = d2_array.index(min(d2_array))
        min_index_d3 = d3_array.index(min(d3_array))
        predicted_labels_female_euclidean.append(y[min_index_d1])
        predicted_labels_female_manhattan.append(y[min_index_d2])
        predicted_labels_female_chebyshov.append(y[min_index_d3])
    except ValueError as error:
        print(error)
        print(d1_array)
        print(d2_array)
        print(d3_array)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
total_female = len(predicted_labels_female_euclidean)
print(f"efficiency in euclidean is: {sum(predicted_labels_female_euclidean) / total_female}" )

total_female = len(predicted_labels_female_manhattan)
print(f"efficiency in manhattan is: {sum(predicted_labels_female_manhattan) / total_female}")

total_female = len(predicted_labels_female_chebyshov)
print(f"efficiency in manhattan is: {sum(predicted_labels_female_chebyshov) / total_female}")

In [None]:
predicted_labels_male_euclidean = []
predicted_labels_male_manhattan = []
predicted_labels_male_chebyshov = []

for i in test_male_array:
    d1_array = []
    d2_array = []
    d3_array = []
    for j in X:
        d1 = euclidean(i, j)
        d2 = manhattan(i, j)
        d3 = chebyshov(i, j)
        d1_array.append(d1)
        d2_array.append(d2)
        d3_array.append(d3)
    min_index_d1 = d1_array.index(min(d1_array))
    min_index_d2 = d2_array.index(min(d2_array))
    min_index_d3 = d3_array.index(min(d3_array))
    predicted_labels_male_euclidean.append(y[min_index_d1])
    predicted_labels_male_manhattan.append(y[min_index_d2])
    predicted_labels_male_chebyshov.append(y[min_index_d3])

## Referencias

Dataset: https://www.kaggle.com/datasets/ashwingupta3012/male-and-female-faces-dataset