In [13]:
import numpy as np
import pandas as pd
import os
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import SGDClassifier
import cv2

# To ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [14]:
# Generador para cargar imágenes y etiquetas en batches
def image_batch_generator(df, image_dir, target_size=(180, 180), batch_size=32):
    num_samples = len(df)
    while True:  # Generador infinito
        for offset in range(0, num_samples, batch_size):
            batch_df = df.iloc[offset:offset + batch_size]
            images = []
            labels = []

            for _, row in batch_df.iterrows():
                image_path = os.path.join(image_dir, row['label'], row['filename'])
                image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
                if image is None:
                    print(f"Error cargando la imagen: {image_path}")
                    continue
                # Preprocesar la imagen: redimensionar, normalizar, y aplanar
                image = cv2.resize(image, target_size)
                image = image.flatten()  # Convertir a vector unidimensional
                image = image.astype('float32') / 255.0
                images.append(image)
                labels.append(row['label'])

            yield np.array(images), np.array(labels)


In [15]:
# Cargar datos desde los CSV
train_df = pd.read_csv("data/Training_set.csv")
test_df = pd.read_csv("data/Testing_set.csv")

In [16]:
# Convertir las etiquetas a números
encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(train_df['label'])
y_test_encoded = encoder.transform(test_df['label'])

In [17]:
# Configurar el modelo de regresión logística con entrenamiento incremental
logistic_model = SGDClassifier(loss="log_loss", max_iter=1, warm_start=True)

In [18]:
# Entrenar manualmente con batches
batch_size = 32
num_epochs = 10

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    batch_generator = image_batch_generator(train_df, "data/train", batch_size=batch_size)

    for X_batch, y_batch in batch_generator:
        y_batch_encoded = encoder.transform(y_batch)  # Codificar etiquetas del batch
        logistic_model.partial_fit(X_batch, y_batch_encoded, classes=np.unique(y_train_encoded))
        if len(X_batch) < batch_size:
            break  # Termina la época cuando ya no hay más batches
            

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [19]:
# Evaluar en el conjunto de prueba
test_generator = image_batch_generator(test_df, "data/test", batch_size=batch_size)
X_test, y_test = next(test_generator)  # Cargar todos los datos de prueba
y_test_encoded = encoder.transform(y_test)

y_pred_test = logistic_model.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test_encoded, y_pred_test))
print("\nClassification Report:")
print(classification_report(y_test_encoded, y_pred_test))

Test Accuracy: 0.15625

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.00      0.00      0.00         2
           2       0.00      0.00      0.00         7
           3       0.00      0.00      0.00         2
           4       0.00      0.00      0.00         1
           5       0.00      0.00      0.00         2
           6       0.22      1.00      0.36         4
           7       0.00      0.00      0.00         2
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         3
          10       0.00      0.00      0.00         1
          11       0.00      0.00      0.00         1
          12       0.00      0.00      0.00         2
          13       1.00      1.00      1.00         1
          14       0.00      0.00      0.00         2

    accuracy                           0.16        32
   macro avg       0.08      0.13