# Diabetic Retinopathy
Early detection of  diabetic retinopathy, the leading cause of blindness among working aged adults.

## Importing Modules

In [1]:
import pandas as pd
import numpy as np
import cv2

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical

import os

## Load Data

In [2]:
train_df = pd.read_csv('dataset/train.csv')
test_df = pd.read_csv('dataset/test.csv')
train_image_folder = 'dataset/train_images'
test_image_folder = 'dataset/test_images'

### Check data load

In [3]:
print("Train Data:")
print(train_df.head())
print("\nTest Data:")
print(test_df.head())

Train Data:
        id_code  diagnosis
0  000c1434d8d7          2
1  001639a390f0          4
2  0024cdab0c1e          1
3  002c21358ce6          0
4  005b95c28852          0

Test Data:
        id_code
0  0005cfc8afb6
1  003f0afdcd15
2  006efc72b638
3  00836aaacf06
4  009245722fa4


### Find the smallest resolution and largest resolution image for scaling

In [4]:
smallest_width = float('inf')
smallest_height = float('inf')
largest_width = 0
largest_height = 0

# Iterate through the image files in the training folder
for filename in os.listdir(train_image_folder):
    if filename.endswith('.png'):
        img = cv2.imread(os.path.join(train_image_folder, filename))
        height, width, _ = img.shape
        smallest_width = min(smallest_width, width)
        smallest_height = min(smallest_height, height)
        largest_width = max(largest_width, width)
        largest_height = max(largest_height, height)

        # Print the results
        print("min:", smallest_width, "x", smallest_height, "max:", largest_width, "x", largest_height, end='\r')

print()

smallest_width = float('inf')
smallest_height = float('inf')
largest_width = 0
largest_height = 0

# Iterate through the image files in the testing folder
for filename in os.listdir(test_image_folder):
    if filename.endswith('.png'):
        img = cv2.imread(os.path.join(test_image_folder, filename))
        height, width, _ = img.shape
        smallest_width = min(smallest_width, width)
        smallest_height = min(smallest_height, height)
        largest_width = max(largest_width, width)
        largest_height = max(largest_height, height)
    
        print("min:", smallest_width, "x", smallest_height, "max:", largest_width, "x", largest_height, end='\r')

min: 474 x 358 max: 4288 x 284888 1050 x 1050 max: 3388 x 2588
min: 640 x 480 max: 2896 x 1958

## Pre-processing

In [5]:
# Define the common image dimensions and batch size
img_width, img_height = 512, 512
batch_size = 32

# Define an image data generator for data augmentation (if needed)
train_datagen = ImageDataGenerator(
    rescale=1. / 255,  # Normalize pixel values
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True
)

# Load and preprocess the training images
train_image_data = []
for image_id in train_df['id_code']:
    img = cv2.imread('dataset/train_images/' + image_id + '.png')
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (img_width, img_height))
    img = img / 255.0
    train_image_data.append(img)

train_images = np.array(train_image_data)
train_labels = to_categorical(train_df['diagnosis'], num_classes=5)

# Display the shape of train_images and train_labels to confirm
print("Shape of train_images:", train_images.shape)
print("Shape of train_labels:", train_labels.shape)

Shape of train_images: (3662, 512, 512, 3)
Shape of train_labels: (3662, 5)


In [6]:
# Load and preprocess the testing images
test_image_data = []
for image_id in test_df['id_code']:
    img = cv2.imread('dataset/test_images/' + image_id + '.png')
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (img_width, img_height))
    img = img / 255.0
    test_image_data.append(img)

test_images = np.array(test_image_data)

# Display the shape of test_images to confirm
print("Shape of test_images:", test_images.shape)


error: OpenCV(4.8.1) D:\a\opencv-python\opencv-python\opencv\modules\core\src\alloc.cpp:73: error: (-4:Insufficient memory) Failed to allocate 12582528 bytes in function 'cv::OutOfMemoryError'
