## Import libraries and define useful things

In [1]:
import numpy as np
import pandas as pd
from PIL import Image as PIL_Image

from collections import OrderedDict

from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, average_precision_score, f1_score
from sklearn.preprocessing import OneHotEncoder

import matplotlib
from matplotlib import pyplot as plt

# from tqdm import tqdm
from tqdm.notebook import tqdm

from scipy.special import softmax
# from scipy.special import expit
from scipy.spatial import distance

import relplot as rp

import sys
import os

# sys.path.insert(1, '../RETFound_MAE/')
# retfound_dir = os.path.dirname('../RETFound_MAE/')
# sys.path.insert(1, retfound_dir) 

import torch
import torch.nn as nn
import models_vit 
from util.pos_embed import interpolate_pos_embed
from timm.models.layers import trunc_normal_
import util.lr_decay as lrd

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from torchvision import transforms as T
# from torchvision.transforms import v2 as T

import timm
from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD

print(f'Max float : {sys.float_info.max}')
print(torch.__version__)
print(f'Cuda available : {torch.cuda.is_available()}')
print(f'Number of GPUs : {torch.cuda.device_count()}')
print(f'CUDA Version : {torch.version.cuda}')
print(f'timm Version : {timm.__version__}')

has_gpu = torch.cuda.is_available()
has_mps = torch.backends.mps.is_built() #getattr(torch, 'has_mps', False)
device = 'mps' if torch.backends.mps.is_built() else 'gpu' if torch.cuda.is_available() else 'cpu'

chkpt_dir = '../Projects/RETFound_MAE/RETFound_mae_natureCFP.pth'

input_size = 224

def prepare_model(chkpt_dir, arch='vit_large_patch16'):
    # build model
    model = models_vit.__dict__[arch](
        img_size=input_size,
        num_classes=5,
        drop_path_rate=0,
        global_pool=True,
    )
    # load model
    checkpoint = torch.load(chkpt_dir, weights_only=False, map_location=device)
    msg = model.load_state_dict(checkpoint['model'], strict=False)
    return model

Max float : 1.7976931348623157e+308
2.7.0
Cuda available : False
Number of GPUs : 0
CUDA Version : None
timm Version : 1.0.15




In [2]:
# IDRiD 
img_dir_tr = '/Users/msa/Datasets/IDRiD/DiseaseGrading/OriginalImages/TrainingSet/crop_224/'
# full_path_list_tr = sorted(glob.glob(img_dir_tr + '*' + '.jpg', recursive=False))
# print(f'Number of files in {img_dir_tr}\t{len(full_path_list_tr)}', flush=True)

csv_file_tr = '/Users/msa/Datasets/IDRiD/DiseaseGrading/Groundtruths/TrainingLabels.csv'
df_metadata_tr = pd.read_csv(csv_file_tr, low_memory=False)
df_metadata_tr = df_metadata_tr[['Image name', 'Retinopathy grade', 'Risk of macular edema ']]
file_paths = []
split = []
for idx, row in df_metadata_tr.iterrows():
    file_paths.append(img_dir_tr + str(row['Image name']) + '.png') # '.jpg')
    split.append('train')
df_metadata_tr['file_path'] = file_paths
df_metadata_tr['split'] = split
print(f'Metadata shape : {df_metadata_tr.shape}')
print(df_metadata_tr.columns)

img_dir_te = '/Users/msa/Datasets/IDRiD/DiseaseGrading/OriginalImages/TestingSet/crop_224/'
# full_path_list_te = sorted(glob.glob(img_dir_te + '*' + '.jpg', recursive=False))
# print(f'Number of files in {img_dir_te}\t{len(full_path_list_te)}', flush=True)

csv_file_te = '/Users/msa/Datasets/IDRiD/DiseaseGrading/Groundtruths/TestingLabels.csv'
df_metadata_te = pd.read_csv(csv_file_te, low_memory=False)
file_paths = []
split = []
for idx, row in df_metadata_te.iterrows():
    file_paths.append(img_dir_te + str(row['Image name']) + '.png') # '.jpg')
    split.append('test')
df_metadata_te['file_path'] = file_paths
df_metadata_te['split'] = split
print(f'Metadata shape : {df_metadata_te.shape}')
print(df_metadata_te.columns)

df_metadata = pd.concat([df_metadata_tr, df_metadata_te], axis=0)
print(f'Metadata shape : {df_metadata.shape}')
print(df_metadata.columns)

del df_metadata_tr, df_metadata_te, file_paths, split

Metadata shape : (413, 5)
Index(['Image name', 'Retinopathy grade', 'Risk of macular edema ',
       'file_path', 'split'],
      dtype='object')
Metadata shape : (103, 5)
Index(['Image name', 'Retinopathy grade', 'Risk of macular edema ',
       'file_path', 'split'],
      dtype='object')
Metadata shape : (516, 5)
Index(['Image name', 'Retinopathy grade', 'Risk of macular edema ',
       'file_path', 'split'],
      dtype='object')


## Preprare RETFound and extract feature embeddings

In [3]:
from bazinga import IDRiD_ImageDataset

chkpt_dir = './RETFound_mae_natureCFP.pth'
vision_encoder = prepare_model(chkpt_dir, 'RETFound_mae')

# device = torch.device('cuda')
vision_encoder.to(device)
print('Vision encoder model loaded.')

transforms = T.Compose([
    T.ToTensor(), 
    T.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD), 
])

# transforms = T.Compose([
#     # T.ToTensor(), #v1
#     T.ToImage(),  # v2
#     T.ToDtype(torch.uint8, scale=True), # v2
#     T.ToDtype(torch.float32, scale=True), # v2
#     T.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD),
# ])


# class IDRiD_ImageDataset(Dataset):
#     def __init__(self, metadata, target_column='Retinopathy grade', 
#                  transforms=None, target_transforms=None
#                 ):
#         self.metadata = metadata 
#         self.target_column = target_column        
#         self.transforms = transforms
#         self.target_transforms = target_transforms

#     def __len__(self):
#         return self.metadata.shape[0]

#     def __getitem__(self, idx):

#         filepath = self.metadata.iloc[idx]['file_path']
#         with PIL_Image.open(filepath) as img:
#             if len(img.size) < 3: # if single channel, convert to RGB
#                 img = img.convert(mode='RGB')
            
#             if self.transforms:
#                 img = self.transforms(img)
        
#         return img, int(self.metadata.iloc[idx][self.target_column])
    
#     # def get_labels(self):
#     #     # return as series for ImbalancedDatasetSampler to read into a Pandas dataframe
#     #     return self.metadata[self.target_column]


num_workers = 8
batch_size = 32

# Note that shuffle is mutually exclusive with Sampler
# shuffle_dict = {'train': False, 'test': False} #, 'test': False}

idrid_dataset = IDRiD_ImageDataset(df_metadata, transforms=transforms, target_transforms=None)

dataloader = DataLoader(idrid_dataset, batch_size=batch_size,
                        shuffle=False, sampler=None, # samplers[split], 
                        num_workers=num_workers, pin_memory=True)

Vision encoder model loaded.


In [4]:
def extract_features(vision_encoder, dataloader):
    
    out_data = OrderedDict()
    out_data['features'] = []
    out_data['labels'] = []
    
    vision_encoder.eval()

    with torch.no_grad():
        for inputs, labels in tqdm(iter(dataloader)):
                    
            inputs = inputs.to(device)
            
            # a dictionary of features from various read-out layers
            # {readout_layer_name : features}
            # with torch.autocast(device_type='cuda', dtype=torch.float16): #torch.cuda.amp.autocast():
                # with torch.inference_mode(mode=True):
            # outputs = model(inputs)
            outputs = vision_encoder.forward_features(inputs)
            outputs = torch.squeeze(outputs)
            # for readout_layername, features in outputs.items():
            outputs = np.squeeze(outputs.cpu().detach().numpy())
            out_data['features'].append(outputs)
            out_data['labels'].append(labels)
            # break # only 1 readout layer name!!
    
    
    # list to numpy array
    out_data['features'] = np.concatenate(out_data['features'], axis=0) 
    out_data['labels'] = np.concatenate(out_data['labels'], axis=0) 
        
    print(f'Features : {out_data["features"].shape}') 
    print(f'Labels : {out_data["labels"].shape}, Unique labels : {np.unique(out_data["labels"], return_counts=True)}') 
    
    return out_data

out_data = extract_features(vision_encoder, dataloader)

X, y = out_data['features'], np.asarray(out_data['labels'], dtype=np.int32)
    
with open(f'IDRiD_Features_MultiClass.npy', 'wb') as handle:
    # pickle.dump(out_data, handle, protocol=4)
    np.save(handle, out_data['features'])
    np.save(handle, out_data['labels'])

del out_data



  0%|          | 0/17 [00:00<?, ?it/s]

Features : (516, 1024)
Labels : (516,), Unique labels : (array([0, 1, 2, 3, 4]), array([168,  25, 168,  93,  62]))


In [5]:
print(f'{np.unique(y, return_counts=True)[1]/np.sum(np.unique(y, return_counts=True)[1])}')

[0.3255814  0.04844961 0.3255814  0.18023256 0.12015504]
