In [None]:
import numpy as np
import pandas as pd
import cv2
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

In [None]:
# Function to load training data
def load_data(csv_path, images_folder):
    data = pd.read_csv(csv_path)
    images = []
    labels = []
    
    for index, row in data.iterrows():
        image_name = row['id_code']
        label = row['diagnosis']
        
        # Load image, process it, and flatten it
        image_path = f"{images_folder}/{image_name}.png"
        image = cv2.imread(image_path)
        image = cv2.resize(image, (128, 128))  # Resize image to 128x128
        image = image.flatten()  # Flatten image to 1D array
        
        images.append(image)
        labels.append(label)
    
    return np.array(images), np.array(labels)

In [None]:
# Function to load test data
def load_test_data(csv_path, images_folder):
    data = pd.read_csv(csv_path)
    images = []
    image_names = []
    
    for index, row in data.iterrows():
        image_name = row['id_code']
        
        # Load and process image
        image_path = f"{images_folder}/{image_name}.png"
        image = cv2.imread(image_path)
        image = cv2.resize(image, (128, 128))  # Resize image to 128x128
        image = image.flatten()  # Flatten image to 1D array
        
        images.append(image)
        image_names.append(image_name)
    
    return np.array(images), image_names

In [None]:
# Function to generate predictions and save them to a CSV file
def predict_and_generate_csv(model, test_images, image_names, output_csv_path):
    predictions = model.predict(test_images)
    
    # Create DataFrame to store image names and predictions
    output_df = pd.DataFrame({
        'id_code': image_names,
        'diagnosis': predictions
    })
        # Save the DataFrame to a CSV file
    output_df.to_csv('submissio

In [None]:
# Main Program
if __name__ == "__main__":
    # Paths
    train_csv_path = '/kaggle/input/aptos2019-blindness-detection/train.csv'  # CSV file with training image names and labels
    train_images_folder = '/kaggle/input/aptos2019-blindness-detection/train_images'  # Folder with training images
    test_csv_path = '/kaggle/input/aptos2019-blindness-detection/test.csv'  # CSV file with test image names
    test_images_folder = '/kaggle/input/aptos2019-blindness-detection/test_images'  # Folder with test images
    output_csv_path = '/kaggle/input/aptos2019-blindness-detection/Submission_predictions.csv'  # CSV file to save predictions
    
    # 1. Load training data
    print("Loading training data...")
    X, y = load_data(train_csv_path, train_images_folder)
    
    # 2. Encode labels to numeric values
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
    
    # 3. Split the data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
    
    ### Decision Tree Classifier ###
    print("Training Decision Tree model...")
    decision_tree_model = DecisionTreeClassifier(random_state=42)
    decision_tree_model.fit(X_train, y_train)
    
    # Validate the Decision Tree model
    y_val_pred_dt = decision_tree_model.predict(X_val)
    accuracy_dt = accuracy_score(y_val, y_val_pred_dt)
    print(f"Decision Tree Validation Accuracy: {accuracy_dt * 100:.2f}%")
    
    ### Random Forest Classifier ###
    print("Training Random Forest model...")
    random_forest_model = RandomForestClassifier(n_estimators=100, random_state=42)
    random_forest_model.fit(X_train, y_train)
    
    # Validate the Random Forest model
    y_val_pred_rf = random_forest_model.predict(X_val)
    accuracy_rf = accuracy_score(y_val, y_val_pred_rf)
    print(f"Random Forest Validation Accuracy: {accuracy_rf * 100:.2f}%")
    
    # 6. Load test data
    print("Loading test data...")
    test_images, image_names = load_test_data(test_csv_path, test_images_folder)
    
    # 7. Choose the better model based on validation accuracy
    if accuracy_rf > accuracy_dt:
        print("Using Random Forest for final predictions...")
        best_model = random_forest_model
    else:
        print("Using Decision Tree for final predictions...")
        best_model = decision_tree_model
    
    # 8. Predict test labels and generate CSV output
    print("Generating predictions and saving to CSV...")
    predict_and_generate_csv(best_model, test_images, image_names, output_csv_path)