# AGAIN 2

In [30]:
import os
import numpy as np
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf


# Step 1: Split dataset into 60% train and 40% test
def split_data_60_40(image_dir):
    all_images = []
    all_labels = []

    for folder_name in os.listdir(image_dir):
        folder_path = os.path.join(image_dir, folder_name)
        if os.path.isdir(folder_path):
            for img_name in os.listdir(folder_path):
                if img_name.lower().endswith(('.jpeg', '.jpg', '.png')):
                    all_images.append(os.path.join(folder_path, img_name))
                    all_labels.append(folder_name)

    print(f"Total images found: {len(all_images)}")
    print(f"Classes found: {set(all_labels)}")

    if not all_images:
        raise ValueError(f"No images found in the directory: {image_dir}. Check the directory structure.")

    # Split data into 60% train and 40% test
    train_images, test_images, train_labels, test_labels = train_test_split(
        all_images, all_labels, test_size=0.4, stratify=all_labels, random_state=42
    )
    return train_images, test_images, train_labels, test_labels


# Step 2: Preprocess images
def preprocess_images(image_paths, labels, class_indices, target_size=(224, 224)):
    X, y = [], []
    for img_path, label in zip(image_paths, labels):
        try:
            img = load_img(img_path, target_size=target_size)
            img_array = img_to_array(img) / 255.0
            X.append(img_array)
            y.append(class_indices[label])
        except Exception as e:
            print(f"Error processing image {img_path}: {e}")

    X = np.array(X)
    y = to_categorical(y, num_classes=len(class_indices))
    return X, y


# Step 3: Create a fine-tuned MobileNetV2 model with weighted loss
def create_finetuned_model(class_weights):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = True  # Allow fine-tuning

    for layer in base_model.layers[:50]:  # Freeze fewer layers
        layer.trainable = False

    x = layers.GlobalAveragePooling2D()(base_model.output)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    output = layers.Dense(5, activation='softmax')(x)

    model = models.Model(inputs=base_model.input, outputs=output)

    # Custom weighted loss
    def weighted_categorical_crossentropy(y_true, y_pred):
        weights = tf.reduce_sum(class_weights * y_true, axis=-1)
        loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
        return loss * weights

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss=weighted_categorical_crossentropy, metrics=['accuracy'])
    return model


# Step 4: Federated Learning with Data Generators
def federated_learning_with_generators(clients_data, class_indices, class_weights, epochs=10, batch_size=16, target_size=(224, 224)):
    """
    Federated Learning implementation using data generators and weighted loss.
    """
    global_model = create_finetuned_model(class_weights)
    global_weights = global_model.get_weights()

    for round in range(epochs):
        print(f"Round {round + 1} of federated learning...")

        client_weights = []
        for client_id, client_images in enumerate(clients_data):
            print(f"Processing client {client_id + 1} with {len(client_images)} images...")

            # Create a generator for the client's data
            def client_generator(client_images, batch_size=batch_size):
                while True:
                    for i in range(0, len(client_images), batch_size):
                        batch_images = client_images[i:i + batch_size]
                        X, y = preprocess_images(
                            batch_images,
                            [os.path.basename(os.path.dirname(img)) for img in batch_images],
                            class_indices,
                            target_size=target_size
                        )
                        yield X, y

            # Train client model using generator
            client_model = create_finetuned_model(class_weights)
            client_model.set_weights(global_weights)

            steps_per_epoch = len(client_images) // batch_size
            generator = client_generator(client_images)
            client_model.fit(generator, steps_per_epoch=steps_per_epoch, epochs=1, verbose=1)

            client_weights.append(client_model.get_weights())

        # Aggregate client weights
        if client_weights:
            global_weights = [np.mean([client_w[layer] for client_w in client_weights], axis=0)
                              for layer in range(len(global_weights))]
        else:
            print("No valid client weights collected. Global weights remain unchanged.")

    global_model.set_weights(global_weights)
    return global_model


In [31]:
# Step 5: Efficient Evaluation of the Global Model
def evaluate_global_model(global_model, test_images, test_labels, class_indices, batch_size=16, target_size=(224, 224)):
    """
    Evaluate the global model efficiently by processing predictions in batches.
    """
    y_true = []
    y_pred = []

    for i in range(0, len(test_images), batch_size):
        # Batch the test images and labels
        batch_images = test_images[i:i + batch_size]
        batch_labels = test_labels[i:i + batch_size]
        X, y = preprocess_images(batch_images, batch_labels, class_indices, target_size=target_size)
        y_true.extend(np.argmax(y, axis=1))  # True labels
        y_pred.extend(np.argmax(global_model.predict(X, verbose=0), axis=1))  # Predictions

    # Compute metrics
    target_names = list(class_indices.keys())
    report = classification_report(y_true, y_pred, target_names=target_names)
    confusion = confusion_matrix(y_true, y_pred)

    # Output results
    accuracy = np.mean(np.array(y_true) == np.array(y_pred))
    print(f"Test Accuracy: {accuracy:.4f}")
    print("\nClassification Report:\n", report)
    print("\nConfusion Matrix:\n", confusion)

In [33]:
# Main Execution
if __name__ == "__main__":
    # Dataset path
    image_dir = "/kaggle/input/diabetic-retinopathy-2015-data-colored-resized/colored_images/colored_images"

    # Split data into 60% train and 40% test
    train_images, test_images, train_labels, test_labels = split_data_60_40(image_dir)
    print(f"Training images: {len(train_images)}, Testing images: {len(test_images)}")

    # Set up data generator to get class indices
    datagen = ImageDataGenerator(
        rescale=1.0 / 255,
        rotation_range=30,
        width_shift_range=0.2,
        height_shift_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=0.2
    )
    train_generator = datagen.flow_from_directory(
        directory=image_dir,
        target_size=(224, 224),
        batch_size=16,
        class_mode='categorical',
        subset='training'
    )
    class_indices = train_generator.class_indices

    # Calculate class weights
    class_weights = compute_class_weight(
        'balanced', classes=np.unique(train_labels), y=train_labels
    )
    class_weights_array = np.array([class_weights[i] for i in range(len(class_weights))], dtype=np.float32)

    # Divide training data into federated clients
    num_clients = 4
    client_data_size = len(train_images) // num_clients
    clients_data = [train_images[i * client_data_size:(i + 1) * client_data_size] for i in range(num_clients)]

    # Federated learning with generators and weighted loss (10 Rounds)
    global_model = federated_learning_with_generators(clients_data, class_indices, class_weights_array, epochs=3, batch_size=16)

    # Efficient evaluation of the global model
    evaluate_global_model(global_model, test_images, test_labels, class_indices, batch_size=16)

Total images found: 35126
Classes found: {'No_DR', 'Proliferate_DR', 'Moderate', 'Severe', 'Mild'}
Training images: 21075, Testing images: 14051
Found 28103 images belonging to 5 classes.
Round 1 of federated learning...
Processing client 1 with 5268 images...
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 42ms/step - accuracy: 0.2836 - loss: 1.6188
Processing client 2 with 5268 images...
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 44ms/step - accuracy: 0.3126 - loss: 1.5277
Processing client 3 with 5268 images...
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 41ms/step - accuracy: 0.3421 - loss: 1.4976
Processing client 4 with 5268 images...
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 43ms/step - accuracy: 0.3241 - loss: 1.6785
Round 2 of federated learning...
Processing client 1 with 5268 images...
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 44ms/step - accuracy: 0.4164 - loss