In [None]:
import numpy as np
import pandas as pd
import cv2
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

weight_path = '/kaggle/input/efficientnetb0/efficientnetb0_notop.h5'
# Load EfficientNetB0 model without the top layer
base_model = EfficientNetB0(weights=weight_path, include_top=False, pooling='avg')

# CutMix Augmentation
def cutmix(image, image2, alpha=1.0):
    h, w, _ = image.shape
    h2, w2, _ = image2.shape

    if (h2 != h) or (w2 != w):
        image2 = cv2.resize(image2, (w, h))

    lam = np.random.beta(alpha, alpha)
    cut_rat = np.sqrt(1.0 - lam)
    cut_w = int(w * cut_rat)
    cut_h = int(h * cut_rat)

    cx = np.random.randint(w)
    cy = np.random.randint(h)

    bbx1 = np.clip(cx - cut_w // 2, 0, w)
    bby1 = np.clip(cy - cut_h // 2, 0, h)
    bbx2 = np.clip(cx + cut_w // 2, 0, w)
    bby2 = np.clip(cy + cut_h // 2, 0, h)

    if bbx1 >= bbx2 or bby1 >= bby2:
        return image, lam

    new_image = image.copy()
    new_image[bby1:bby2, bbx1:bbx2, :] = image2[bby1:bby2, bbx1:bbx2, :]
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (h * w))

    return new_image, lam

# Extract features using EfficientNetB0
def extract_features(img_path, model, img_size=(224, 224)):
    img = image.load_img(img_path, target_size=img_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    features = model.predict(img_array)
    return features.flatten()

# Load and extract features with optional CutMix
def load_and_extract_features(csv_path, images_folder, model, apply_cutmix=False):
    data = pd.read_csv(csv_path)
    features = []
    labels = []

    for index, row in data.iterrows():
        image_name = row['id_code']
        label = row['diagnosis']
        image_path = f"{images_folder}/{image_name}.png"

        if apply_cutmix:
            img = cv2.imread(image_path)
            rand_index = np.random.choice(data.index)
            random_image_name = data.loc[rand_index, 'id_code']
            random_image_path = f"{images_folder}/{random_image_name}.png"
            img2 = cv2.imread(random_image_path)

            cutmix_img, lam = cutmix(img, img2)
            cutmix_img = cv2.resize(cutmix_img, (224, 224))
            img_array = np.expand_dims(cutmix_img, axis=0)
            img_array = preprocess_input(img_array)
            feature = model.predict(img_array).flatten()
        else:
            feature = extract_features(image_path, model)

        features.append(feature)
        labels.append(label)

    return np.array(features), np.array(labels)

# Build fully connected neural network for classification
def build_classifier(input_shape):
    model = Sequential()
    model.add(Dense(512, activation='relu', input_shape=(input_shape,)))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(256, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(5, activation='softmax'))  # Assuming 5 classes
    return model

# Compile and train the model
def compile_and_train_model(model, X_train, y_train, X_val, y_val, epochs=10, batch_size=32):
    model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, batch_size=batch_size, verbose=2)
    return history

# Load test data and extract features using EfficientNetB0
def load_test_data_and_extract_features(csv_path, images_folder, model):
    data = pd.read_csv(csv_path)
    features = []
    image_names = []
    
    for index, row in data.iterrows():
        image_name = row['id_code']
        image_path = f"{images_folder}/{image_name}.png"
        feature = extract_features(image_path, model)
        features.append(feature)
        image_names.append(image_name)
    
    return np.array(features), image_names

# Predict test labels and save to CSV
def predict_and_generate_csv(model, test_features, image_names, output_csv_path):
    predictions = np.argmax(model.predict(test_features), axis=1)
    output_df = pd.DataFrame({
        'id_code': image_names,
        'diagnosis': predictions
    })
    output_df.to_csv('submission.csv', index=False)
    print(f"Predictions saved to {output_csv_path}")

# Main Program
if __name__ == "__main__":
    # Paths
    train_csv_path = '/kaggle/input/aptos2019-blindness-detection/train.csv'
    train_images_folder = '/kaggle/input/aptos2019-blindness-detection/train_images'
    test_csv_path = '/kaggle/input/aptos2019-blindness-detection/test.csv'
    test_images_folder = '/kaggle/input/aptos2019-blindness-detection/test_images'
    output_csv_path = '/kaggle/input/aptos2019-blindness-detection/submission.csv'

    # 1. Load and extract features from the training data with CutMix augmentation
    print("Loading and extracting features from training data with CutMix...")
    X, y = load_and_extract_features(train_csv_path, train_images_folder, base_model, apply_cutmix=True)

    # 2. Encode labels to numeric values
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    # 3. Perform 5-Fold Cross-Validation
    n_folds = 5
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
    
    fold_accuracies = []

    for fold, (train_idx, val_idx) in enumerate(skf.split(X, y_encoded)):
        print(f"Training fold {fold + 1}/{n_folds}...")
        
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y_encoded[train_idx], y_encoded[val_idx]
        
        # Build and train FCNN classifier
        classifier_model = build_classifier(input_shape=X_train.shape[1])
        compile_and_train_model(classifier_model, X_train, y_train, X_val, y_val, epochs=10)
        
        # Validate the model on this fold
        y_val_pred = np.argmax(classifier_model.predict(X_val), axis=1)
        accuracy = accuracy_score(y_val, y_val_pred)
        fold_accuracies.append(accuracy)
        
        print(f"Fold {fold + 1} Accuracy: {accuracy * 100:.2f}%")

    # Print overall average accuracy across folds
    avg_accuracy = np.mean(fold_accuracies)
    print(f"Average Cross-Validation Accuracy: {avg_accuracy * 100:.2f}%")

    # 4. Load and extract features from the test data
    print("Loading and extracting features from test data...")
    test_features, image_names = load_test_data_and_extract_features(test_csv_path, test_images_folder, base_model)

    # 5. Predict test labels and generate CSV output
    print("Generating predictions and saving to CSV...")
    predict_and_generate_csv(classifier_model, test_features, image_names, output_csv_path)