In [None]:
import numpy as np
import pandas as pd
import cv2
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model

In [None]:
 
weights_path = '/kaggle/input/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'

# Load the pretrained ResNet50 model without the top classification layer
base_model = ResNet50(weights=weights_path, include_top=False, pooling='avg')


# Function to extract features using ResNet50
def extract_features(img_path, model, img_size=(224, 224)):
    img = image.load_img(img_path, target_size=img_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)  # Preprocess input for ResNet50
    features = model.predict(img_array)
    return features.flatten()

In [None]:
# Function to load data and extract features using ResNet50
def load_and_extract_features(csv_path, images_folder, model):
    data = pd.read_csv(csv_path)
    features = []
    labels = []
    
    for index, row in data.iterrows():
        image_name = row['id_code']
        label = row['diagnosis']
        image_path = f"{images_folder}/{image_name}.png"
        
        # Extract features using pretrained model
        feature = extract_features(image_path, model)
        
        features.append(feature)
        labels.append(label)
    
    return np.array(features), np.array(labels)

In [None]:
# Function to load test data and extract features
def load_test_data_and_extract_features(csv_path, images_folder, model):
    data = pd.read_csv(csv_path)
    features = []
    image_names = []
    
    for index, row in data.iterrows():
        image_name = row['id_code']
        image_path = f"{images_folder}/{image_name}.png"
        
        # Extract features using pretrained model
        feature = extract_features(image_path, model)
        
        features.append(feature)
        image_names.append(image_name)
    
    return np.array(features), image_names

In [None]:
# Function to generate predictions and save them to CSV
def predict_and_generate_csv(model, test_features, image_names, output_csv_path):
    predictions = model.predict(test_features)
    
    # Create DataFrame to store image names and predictions
    output_df = pd.DataFrame({
        'id_code': image_names,
        'diagnosis': predictions
    })
    
    # Save to CSV
    output_df.to_csv('submission.csv', index=False)
    print(f"Predictions saved to {output_csv_path}")

In [None]:
# Main Program
if __name__ == "__main__":
    # Paths
    train_csv_path = '/kaggle/input/aptos2019-blindness-detection/train.csv'  # CSV with image names and labels
    train_images_folder = '/kaggle/input/aptos2019-blindness-detection/train_images'  # Folder with training images
    test_csv_path = '/kaggle/input/aptos2019-blindness-detection/test.csv'  # CSV with test image names
    test_images_folder = '/kaggle/input/aptos2019-blindness-detection/test_images'  # Folder with test images
    output_csv_path = '/kaggle/input/aptos2019-blindness-detection/output_predictions.csv'  # Output CSV file for predictions
    
    # 1. Load and extract features from the training data
    print("Loading and extracting features from training data...")
    X, y = load_and_extract_features(train_csv_path, train_images_folder, base_model)
    
    # 2. Encode labels to numeric values
    from sklearn.preprocessing import LabelEncoder
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
    
    # 3. Split data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
    
    ### Random Forest Classifier ###
    print("Training Random Forest model...")
    random_forest_model = RandomForestClassifier(n_estimators=100, random_state=42)
    random_forest_model.fit(X_train, y_train)
    
    # Validate the model
    y_val_pred = random_forest_model.predict(X_val)
    accuracy_rf = accuracy_score(y_val, y_val_pred)
    print(f"Random Forest Validation Accuracy: {accuracy_rf * 100:.2f}%")
    
    # 4. Load and extract features from the test data
    print("Loading and extracting features from test data...")
    test_features, image_names = load_test_data_and_extract_features(test_csv_path, test_images_folder, base_model)
    
    # 5. Predict test labels and generate CSV output
    print("Generating predictions and saving to CSV...")
    predict_and_generate_csv(random_forest_model, test_features, image_names, output_csv_path)