In [1]:
from torch import nn
import torch
import os
import pandas as pd
import tqdm


In [2]:
### CLEANING UP THE DATASET BY REMOVING RETINAL FUNDUS IMAGES FEATURING DISEASES
import os
import pandas as pd
import tqdm

damaged_images = []

print("Scanning through the labels file...")
labels = pd.read_excel("ODIR/labels.xlsx")
for i in tqdm.tqdm(range(len(labels["ID"]))):
    if "normal fundus" not in labels["Left-Diagnostic Keywords"][i]:
        damaged_images.append(labels["Left-Fundus"][i])
    if "normal fundus" not in labels["Right-Diagnostic Keywords"][i]:
        damaged_images.append(labels["Right-Fundus"][i])

print(f"\nLength of the corrected dataset: {len(labels['ID']+1)*2 - len(damaged_images)}")

print(f"\nDeleting the corrupted images...")
dir = os.listdir("ODIR/images")
i = 0
for file in tqdm.tqdm(dir):
    if file in damaged_images:
        os.remove(f"ODIR/images/{file}")


Scanning through the labels file...


100%|██████████| 3500/3500 [00:00<00:00, 165373.77it/s]



Length of the corrected dataset: 3098

Deleting the corrupted images...


100%|██████████| 3099/3099 [00:00<00:00, 39233.77it/s]


In [2]:
### HIDDEN FILES
import shutil

directory = "ODIR/newimages"
files = os.listdir(directory)
for file in files:
    path = os.path.join(directory, file)
    if file.startswith('.'):
        if os.path.isdir(path):
            shutil.rmtree(path)  # Removes directories and their contents
            print(f"Directory {file} removed from directory.")
        else:
            os.remove(path)  # Removes files
            print(f"File {file} removed from directory.")

In [4]:
## RENAMING FILES TO FEATURE LABEL
dir = os.listdir("ODIR/images/")

for file in tqdm.tqdm(dir):
   if "name" or "female" not in file:
        matched_rows = labels[(labels["Left-Fundus"] == file) | (labels["Right-Fundus"] == file)]
        if not matched_rows.empty:
            for index, row in matched_rows.iterrows():
                sex = row["Patient Sex"]
                os.rename(file, f'{sex}_{file}')

100%|██████████| 3099/3099 [00:01<00:00, 2511.83it/s]


In [7]:
### CROPPING FILES TO CENTER AROUND THE RETINA
# Warning: when running this box, current directory must be the the Neuro140FP dir.
import cv2
import numpy as np
from tqdm import tqdm

def preprocess_image_color(image_path, output_size=(300, 300)):
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError(f"The specified image was not found or could not be read: {image_path}")
    
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    lower_bound = np.array([0, 0, 50])  # low saturation, medium value
    upper_bound = np.array([180, 255, 255])  # Full hue range, high saturation, high value

    mask = cv2.inRange(hsv_image, lower_bound, upper_bound)
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(largest_contour)
        cropped_image = image[y:y+h, x:x+w]
        resized_image = cv2.resize(cropped_image, output_size, interpolation=cv2.INTER_AREA)
        return resized_image
    else:
        return cv2.resize(image, output_size, interpolation=cv2.INTER_AREA)

if len(os.listdir("ODIR/newimages")) != len(os.listdir("ODIR/images")):
    files = os.listdir("ODIR/images")
    for file in tqdm(files):
        processed_image = preprocess_image_color(f"ODIR/images/{file}")
        if not os.path.exists('ODIR/newimages/'):
            os.makedirs('ODIR/newimages/')
        cv2.imwrite(f'ODIR/newimages/{file}', processed_image)


In [8]:
## CREATING TRAIN AND VAL DIRECTORIES
source_dir = 'ODIR/'
train_dir = os.path.join(source_dir, 'train')
val_dir = os.path.join(source_dir, 'val')
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

In [9]:
## MOVING INTO TRAIN AND VAL DIRECTORIES
from sklearn.model_selection import train_test_split

img_dir = 'ODIR/newimages'
files = [f for f in os.listdir(img_dir) if f.endswith('.jpg')]
train_files, val_files = train_test_split(files, test_size=0.2, random_state=42)
def move_files(files, dest):
    for f in files:
        shutil.move(os.path.join(img_dir, f), os.path.join(dest, f))

move_files(train_files, train_dir)
move_files(val_files, val_dir)

In [6]:
from odir_dataset import ODIRDataset

In [7]:
### TRANSFORMS
import torch
from torchvision.transforms import v2

train_transforms = v2.Compose([
    v2.Resize(size=(300, 300)),
    v2.ToTensor(),
    v2.Normalize(mean=[0.2, 0.2, 0.2], std=[0.229, 0.224, 0.225])
])

val_transforms = v2.Compose([
    v2.Resize(size=(300, 300)),
    v2.ToTensor()
])

In [14]:
train_data = ODIRDataset(targ_dir="ODIR/train",
                         transform=train_transforms)
val_data = ODIRDataset(targ_dir="ODIR/val",
                       transform=val_transforms)

Target directory: /Users/kamilkon/Desktop/Neuro140FP/ODIR/train


AttributeError: 'ODIRDataset' object has no attribute 'targ_dir'

In [12]:
train_data.load_image(1)

NameError: name 'train_data' is not defined

In [15]:
import matplotlib.pyplot as plt
from typing import List
import random
#1. Create a function to take in a dataset
def display_random_images(dataset: torch.utils.data.Dataset,
                          classes: List[str] = None,
                          n: int = 10,
                          display_shape: bool = True,
                          seed: int = None):
  #2. Adjust display if n is too high
  if n > 10:
    n = 10
    display_shape = False
    print("For display purposes, n shouldn't be larger than 10, setting it to 10 and removing shape display")

  #3. Set the seed
  if seed:
    random.seed(seed)

  #4. Get random sample indexes
  random_samples_idx = random.sample(range(len(dataset)), k=n)

  #5. Setup plot
  plt.figure(figsize=(16, 8))

  #6. Loop through and plot random indexes
  for i, targ_sample in enumerate(random_samples_idx):
    targ_image, targ_label = dataset[targ_sample][0], dataset[targ_sample][1]

    #7. Adjust tensor dimensions
    targ_image_adjust = targ_image.permute(1, 2, 0)

    #8. Plot adjusted samples
    plt.subplot(1, n, i+1)
    plt.imshow(targ_image_adjust)
    plt.axis(False)
    if classes:
      title = f"Class: {classes[targ_label]}"
      if display_shape:
        title = title + f"\nshape: {targ_image_adjust.shape}"
    plt.title(title)

In [16]:
train_data.load_image(1)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/kamilkon/Desktop/Neuro140FP/Female_4114_right.jpg'

In [46]:
display_random_images(train_data,
                      n=5,
                      classes=train_data.classes,
                      seed=42)

ValueError: Directory found where an image expected: ODIR

<Figure size 1600x800 with 0 Axes>