In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.applications import InceptionResNetV2, InceptionV3
from tensorflow.keras.callbacks import EarlyStopping
import os
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Constants
IMG_SIZE = (256, 256)  # Reduced input size for models
BATCH_SIZE = 16  # Reduced batch size
EPOCHS = 15
N_MODELS = 2  # Number of models per architecture

# Load Data
def load_data(csv_path, img_folder):
    logging.info("Loading data from CSV and image folder...")
    df = pd.read_csv(csv_path)
    images = []
    labels = []

    for index, row in df.iterrows():
        img_path = os.path.join(img_folder, row['id_code'] + '.png')
        img = tf.keras.preprocessing.image.load_img(img_path, target_size=IMG_SIZE)
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        images.append(img_array)
        labels.append(row['diagnosis'])
    
    logging.info(f"Loaded {len(images)} images and their labels.")
    return np.array(images), np.array(labels)

# Load Test Data
def load_test_data(csv_path, img_folder):
    logging.info("Loading test data from CSV and image folder...")
    df = pd.read_csv(csv_path)
    images = []

    for index, row in df.iterrows():
        img_path = os.path.join(img_folder, row['id_code'] + '.png')
        img = tf.keras.preprocessing.image.load_img(img_path, target_size=IMG_SIZE)
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        images.append(img_array)

    logging.info(f"Loaded {len(images)} test images.")
    return np.array(images)

# Data Augmentation
def create_data_generator():
    logging.info("Creating data generator with augmentations...")
    datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        brightness_range=[0.8, 1.2],
        fill_mode='nearest'
    )
    return datagen

# Build Model
def build_model(base_model):
    logging.info("Building model...")
    model = Model(inputs=base_model.input, outputs=Dense(5, activation='softmax')(GlobalAveragePooling2D()(base_model.output)))
    return model

# Generalized Mean Pooling
class GeM(tf.keras.layers.Layer):
    def __init__(self, p=3, epsilon=1e-6):
        super(GeM, self).__init__()
        self.p = tf.Variable(initial_value=p, trainable=False)
        self.epsilon = epsilon

    def call(self, x):
        return tf.reduce_mean(tf.maximum(x, self.epsilon) ** self.p, axis=(1, 2)) ** (1.0 / self.p)

# Training Function
def train_model(model, X_train, y_train, X_val, y_val):
    logging.info("Compiling model...")
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    es = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
    logging.info("Starting model training...")
    
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), 
                        batch_size=BATCH_SIZE, epochs=EPOCHS, callbacks=[es], verbose=1)
    
    logging.info("Training completed.")
    return model

# Main Program
if __name__ == "__main__":
    logging.info("Program started.")

    # Load Data
    train_csv_path = '/kaggle/input/aptos2019-blindness-detection/train.csv'
    train_images_folder = '/kaggle/input/aptos2019-blindness-detection/train_images'
    
    X, y = load_data(train_csv_path, train_images_folder)
    X = X / 255.0  # Normalize images

    # Split data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    # Create data generator
    datagen = create_data_generator()

    # Model Definitions
    models = []
    for i in range(N_MODELS):
        logging.info(f"Loading model {i + 1}: Inception ResNet V2 and Inception V3...")
        base_model1 = InceptionResNetV2(weights='/kaggle/input/inception-resnet/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
        base_model2 = InceptionV3(weights='/kaggle/input/inceptionv3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
        
        base_model1.layers[-1].trainable = False  # Freeze layers
        base_model2.layers[-1].trainable = False
        
        model1 = build_model(base_model1)
        model2 = build_model(base_model2)
        models.append(model1)
        models.append(model2)

    # Train Models
    for i, model in enumerate(models):
        logging.info(f"Training model {i + 1}...")
        train_model(model, X_train, y_train, X_val, y_val)

    # Ensemble Predictions
    predictions = []
    logging.info("Making predictions on validation set...")
    for model in models:
        preds = model.predict(X_val)
        predictions.append(preds)

    # Average Predictions
    avg_preds = np.mean(predictions, axis=0)
    final_preds = np.argmax(avg_preds, axis=1)

    # Calculate Kappa Score
    kappa = cohen_kappa_score(y_val, final_preds)
    logging.info(f'Validation Quadratic Weighted Kappa: {kappa:.4f}')

    # Load Test Data
    test_csv_path = '/kaggle/input/aptos2019-blindness-detection/test.csv'
    test_images_folder = '/kaggle/input/aptos2019-blindness-detection/test_images'
    
    X_test = load_test_data(test_csv_path, test_images_folder)
    X_test = X_test / 255.0  # Normalize test images

    # Make Predictions on Test Data
    test_predictions = []
    logging.info("Making predictions on test data...")
    for model in models:
        preds = model.predict(X_test)
        test_predictions.append(preds)

    # Average Test Predictions
    avg_test_preds = np.mean(test_predictions, axis=0)
    final_test_preds = np.argmax(avg_test_preds, axis=1)

    # Create Submission DataFrame
    submission_df = pd.DataFrame({
        'id_code': pd.read_csv(test_csv_path)['id_code'],
        'diagnosis': final_test_preds
    })

    # Save to CSV
    submission_df.to_csv('submission.csv', index=False)
    logging.info("Submission CSV file has been created and saved.")