In [None]:
pip install kaggle
pip install tensorflow

In [6]:
import warnings
warnings.filterwarnings("ignore")

In [18]:
# Download Datasets

import os
from kaggle.api.kaggle_api_extended import KaggleApi

raw_data_directory = 'data/raw/'

api = KaggleApi()
api.authenticate()
    
api.dataset_download_files('benjaminwarner/resized-2015-2019-blindness-detection-images', path=raw_data_directory, unzip=True)

Dataset URL: https://www.kaggle.com/datasets/benjaminwarner/resized-2015-2019-blindness-detection-images


In [119]:
for filename in os.listdir(raw_data_directory):
    print(filename)

labels
resizedtest15
resizedtrain15
resizedtrain19
.ipynb_checkpoints
resizedtest19


In [118]:
# Remove spaces from raw data directories
for filename in os.listdir(raw_data_directory):
    old_file = os.path.join(raw_data_directory, filename)
        
    # Check if it is a file and has spaces in the name
    if os.path.isfile(old_file) and " " in filename:
        new_filename = filename.replace(" ", replace_with)
        new_file = os.path.join(raw_data_directory, new_filename)
            
        # Rename the file if the name has changed
        if old_file != new_file:
            os.rename(old_file, new_file)
            print(f"Renamed: {old_file} -> {new_file}")

In [7]:
import os
import pandas as pd
import json

dataset_path = "data/raw/labels/trainLabels19.csv"
json_data = 'data/data.json'

if not os.path.exists(json_data):
    df = pd.read_csv(dataset_path)
    df.head()

    json_data = df.to_json(orient='records', indent=4)

    with open('data/data.json', 'w') as f:
        f.write(json_data)
        
with open(json_data, 'r', encoding='utf-8') as f:
        data_as_dict = json.load(f)

print(type(data_as_dict))
data_as_dict[:10]

<class 'list'>


[{'id_code': '000c1434d8d7', 'diagnosis': 2},
 {'id_code': '001639a390f0', 'diagnosis': 4},
 {'id_code': '0024cdab0c1e', 'diagnosis': 1},
 {'id_code': '002c21358ce6', 'diagnosis': 0},
 {'id_code': '005b95c28852', 'diagnosis': 0},
 {'id_code': '0083ee8054ee', 'diagnosis': 4},
 {'id_code': '0097f532ac9f', 'diagnosis': 0},
 {'id_code': '00a8624548a9', 'diagnosis': 2},
 {'id_code': '00b74780d31d', 'diagnosis': 2},
 {'id_code': '00cb6555d108', 'diagnosis': 1}]

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

import shutil

# Constants
IMG_SIZE = 224  # VGG16 expects 224x224 input
BATCH_SIZE = 32
EPOCHS = 10

# Prepare Data - Copy Images to Class-Specific Directories
def prepare_data(data, src_folder, dst_folder):
    if not os.path.exists(dst_folder):
        os.makedirs(dst_folder)
    
    for record in data:
        diagnosis = str(record['diagnosis'])
        img_name = f"{record['id_code']}.jpg"
        
        class_folder = os.path.join(dst_folder, diagnosis)
        if not os.path.exists(class_folder):
            os.makedirs(class_folder)
        
        shutil.copy(os.path.join(src_folder, img_name), os.path.join(class_folder, img_name))

In [8]:
# Load the labels data and rearrange images in directories
with open(json_data, 'r', encoding='utf-8') as f:
    data_as_dict = json.load(f)

# dataset = [{'id_code': '000c1434d8d7', 'diagnosis': 2}, {'id_code': '001639a390f0', 'diagnosis': 4}]
prepare_data(data_as_dict, src_folder='data/raw/resizedtrain19', dst_folder='data/train')

In [90]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Image Data Generators for Augmentation and Preprocessing
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator = train_datagen.flow_from_directory(
    'data/train',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='sparse',
    subset='training'
)

Found 2932 images belonging to 5 classes.


In [91]:
val_generator = train_datagen.flow_from_directory(
    'data/train',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='sparse',
    subset='validation'
)

Found 732 images belonging to 5 classes.


In [92]:
# Load Pre-Trained VGG16 Model + Modify it
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))

In [93]:
# Freeze VGG16 Layers to Use as Feature Extractor
base_model.trainable = False

# Build the Model
model = Sequential([
    base_model,
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(5, activation='softmax')  # 5 classes (0-4 diagnosis)
])

In [94]:
# Compile the Model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the Model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [95]:
# Evaluate the Model
val_loss, val_accuracy = model.evaluate(val_generator)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

Validation Accuracy: 72.54%


In [97]:
# Save the new model
# Model Version should be updated on every save

model.save('models/v1.0_vgg16_model.keras')

In [None]:
# Load the Model

from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('models/final_vgg16_model.keras')

In [None]:
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Load and preprocess the image
img_path = 'data/predictions/081c7ec32f27.jpg'
img = load_img(img_path, target_size=(224, 224))  # Resize to 224x224
img_array = img_to_array(img) / 255.0  # Rescale pixel values
img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

# Predict the diagnosis
prediction = model.predict(img_array)

# Get the class with the highest probability
predicted_class = np.argmax(prediction, axis=1)[0]
print(f'Predicted diagnosis class: {predicted_class}')