In [None]:
import pandas as pd
import numpy as np
import os
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load preprocessed data
train_df = pd.read_csv('divided_data/train-small.csv')
valid_df = pd.read_csv('divided_data/valid-small.csv')
test_df = pd.read_csv('divided_data/test.csv')

# Define the diseases to predict
disease_labels = train_df.columns[2:]  # Skip 'Image' and 'PatientId'

# Create a generator for loading images
def load_image(img_path, target_size=(224, 224)):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, target_size)
    img = img / 255.0
    img = np.expand_dims(img, axis=-1)
    return img

def data_generator(df, batch_size, base_dir, target_size=(224, 224), shuffle=True):
    """
    Generador de datos para la carga de imágenes y etiquetas.
    """
    while True:
        if shuffle:
            df = df.sample(frac=1).reset_index(drop=True)
        
        for start in range(0, len(df), batch_size):
            end = min(start + batch_size, len(df))
            batch_df = df[start:end]

            images = []
            labels = []

            for _, row in batch_df.iterrows():
                img = None
                for folder in os.listdir(base_dir):
                    img_path = os.path.join(base_dir, folder, row['Image'])
                    if os.path.exists(img_path):
                        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                        break

                if img is None:
                    print(f"Image {row['Image']} not found in any folder")
                    continue

                img = cv2.resize(img, target_size)
                img = img / 255.0
                img = np.expand_dims(img, axis=-1)
                images.append(img)

                label = row[2:].values.astype('float32')
                labels.append(label)

            images = np.array(images)
            labels = np.array(labels)

            yield images, labels

# Parámetros
batch_size = 32
base_dir = 'path_to_images_dir'
train_steps = len(train_df) // batch_size
val_steps = len(valid_df) // batch_size

# Crear el modelo (ejemplo simple)
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),  # Añadir Dropout para regularización
    Dense(len(disease_labels), activation='sigmoid')
])

# Compilar el modelo
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Entrenar el modelo con generadores de datos
history = model.fit(
    data_generator(train_df, batch_size, base_dir),
    steps_per_epoch=train_steps,
    validation_data=data_generator(valid_df, batch_size, base_dir),
    validation_steps=val_steps,
    epochs=10
)

# Guardar el modelo entrenado
model.save('chest_xray_model.h5')