In [None]:
import os
import pandas as pd
import numpy as np
import cv2
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [None]:
# 1. Load the dataset (train CSV with image paths and labels)
def load_data(csv_path, images_folder):
    data = pd.read_csv(csv_path)
    images = []
    labels = []

    for idx, row in data.iterrows():
        # Assuming the image names in the CSV might be missing the .png extension
        image_name = row['id_code']
        
        # Append .png if it is not already in the file name
        if not image_name.endswith('.png'):
            image_name += '.png'
        
        image_path = os.path.join(images_folder, image_name)
        label = row['diagnosis']
        
        # Read and process the image
        image = cv2.imread(image_path)
        
        if image is None:
            print(f"Warning: Image {image_path} not found or cannot be opened.")
            continue
        
        image = cv2.resize(image, (128, 128))  # Resize images to 128x128
        image = image.flatten()  # Flatten image to 1D array

        images.append(image)
        labels.append(label)
    
    return np.array(images), np.array(labels)

In [None]:
# 2. Load test data (without labels, just images)
def load_test_data(csv_path, test_images_folder):
    test_data = pd.read_csv(csv_path)
    test_images = []
    image_names = []

    for idx, row in test_data.iterrows():
        # Assuming the test image names might also need the .png extension
        image_name = row['id_code']
        
        if not image_name.endswith('.png'):
            image_name += '.png'
        
        image_path = os.path.join(test_images_folder, image_name)
        image = cv2.imread(image_path)
        
        if image is None:
            print(f"Warning: Image {image_path} not found or cannot be opened.")
            continue
        
        image = cv2.resize(image, (128, 128))  # Resize image to 128x128
        image = image.flatten()  # Flatten the image to 1D array
        
        test_images.append(image)
        image_names.append(row['id_code'])  # Original image name without extension

    return np.array(test_images), image_names

In [None]:
# 3. Train the SVM model
def train_svm_model(X_train, y_train):
    # Create an SVM pipeline with StandardScaler and SVM classifier
    svm_pipeline = make_pipeline(StandardScaler(), SVC(kernel='linear', probability=True))
    
    # Fit the model
    svm_pipeline.fit(X_train, y_train)
    
    return svm_pipeline

In [None]:
# 4. Predict and generate output CSV
def predict_and_generate_csv(model, test_images, image_names, output_csv):
    predictions = model.predict(test_images)
    
    # Create a DataFrame to store predictions
    output_df = pd.DataFrame({
        'id_code': image_names,
        'diagnosis': predictions
    })
    output_df.to_csv('submission.csv', index=False)
    # Save the DataFrame to a CSV file

    print(f"Output saved ")

In [None]:
# Main program
if __name__ == "__main__":
    # Define paths
    train_csv_path = '/kaggle/input/aptos2019-blindness-detection/train.csv'  # CSV file with image names and labels
    train_images_folder = '/kaggle/input/aptos2019-blindness-detection/train_images'  # Folder containing training images
    test_csv_path = '/kaggle/input/aptos2019-blindness-detection/test.csv'  # CSV file with test image names
    test_images_folder = '/kaggle/input/aptos2019-blindness-detection/test_images'  # Folder containing test images
    output_csv_path = '/kaggle/input/aptos2019-blindness-detection/Submission_predictions.csv'  # CSV file to save predictions
    
    # 1. Load training data
    print("Loading training data...")
    X, y = load_data(train_csv_path, train_images_folder)
    
    # 2. Encode labels to numeric values
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
    
    # 3. Split data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
    
    # 4. Train the SVM model
    print("Training SVM model...")
    svm_model = train_svm_model(X_train, y_train)
    
    # 5. Validate the model (optional)
    y_val_pred = svm_model.predict(X_val)
    accuracy = accuracy_score(y_val, y_val_pred)
    print(f"Validation Accuracy: {accuracy * 100:.2f}%")
    
    # 6. Load test data
    print("Loading test data...")
    test_images, image_names = load_test_data(test_csv_path, test_images_folder)
    
    print("Generating predictions and saving to CSV...")
    predict_and_generate_csv(svm_model, test_images, image_names, output_csv_path)