In [71]:
import os
import pandas as pd
import numpy as np
import torch
from pathlib import Path
import torchvision
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import random

from PIL import Image
from imgaug import augmenters as iaa
import cv2

In [72]:
# !pip3 install imgaug

In [73]:
# Device
DEVICE = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cpu')

In [74]:
path2images = "./train_validation_images"
path2csv = "./data.csv"

## Split train and validation

In [75]:
labels = pd.read_csv(path2csv)
labels.head()

Unnamed: 0,images,labels
0,1e5ac457-5800-43de-91cf-813a5a18e148.jpg,0
1,5e554e89-b4d6-4926-afe8-d2819693617c.jpg,0
2,8595f95a-5216-44c7-bdf4-dc325f230ecd.jpg,0
3,101b7a09-3116-4f28-afc4-e4705999a038.jpg,0
4,44c452dd-f004-4e6f-8a9b-dc779e4d48d2.jpg,0


In [76]:
print(f"ratio of 0: {len(labels.loc[labels['labels']==0])/len(labels)}")
print(f"ratio of 1: {len(labels.loc[labels['labels']==1])/len(labels)}")
print(f"ratio of 2: {len(labels.loc[labels['labels']==2])/len(labels)}")
print(f"ratio of 3: {len(labels.loc[labels['labels']==3])/len(labels)}")
print(f"ratio of 4: {len(labels.loc[labels['labels']==4])/len(labels)}")
print(f"ratio of 5: {len(labels.loc[labels['labels']==5])/len(labels)}")
print(f"ratio of 6: {len(labels.loc[labels['labels']==6])/len(labels)}")
print(f"ratio of 7: {len(labels.loc[labels['labels']==7])/len(labels)}")
print(f"ratio of 8: {len(labels.loc[labels['labels']==8])/len(labels)}")
print(f"ratio of 9: {len(labels.loc[labels['labels']==9])/len(labels)}")

ratio of 0: 0.011026392961876833
ratio of 1: 0.21043988269794722
ratio of 2: 0.11835777126099707
ratio of 3: 0.07777126099706745
ratio of 4: 0.10932551319648094
ratio of 5: 0.12058651026392962
ratio of 6: 0.11718475073313783
ratio of 7: 0.04586510263929619
ratio of 8: 0.08961876832844574
ratio of 9: 0.09982404692082111


In [87]:
SEED = 123
np.random.seed(SEED)
shuffle_idx = np.arange(len(labels))
np.random.shuffle(shuffle_idx)
train_size = int(len(labels)*0.8)
train, validation = labels.iloc[shuffle_idx[:train_size],:].reset_index(), labels.iloc[shuffle_idx[train_size:],:].reset_index()



In [88]:
train

Unnamed: 0,index,images,labels
0,4575,6e2032e7-af76-48fc-9867-505fc3c490ad.jpg,5
1,186,11e7855f-436f-4e79-b77d-8a9b31f25ba5.jpg,1
2,6681,1e6f7816-0268-4689-b537-cc6cdcbf07e9.jpg,7
3,8498,08ca0d61-ff46-41ff-9cc2-3406f85096f0.jpg,9
4,4428,62b0ed2f-bff7-4649-a287-8016f332cc81.jpg,4
...,...,...,...
6815,345,6ee2bfe6-3fed-4aee-8761-9b2843185863.jpg,1
6816,880,e5fdff53-683e-40ff-b0bc-2eb89527424a.jpg,1
6817,5786,6ae19490-83af-4d97-8a08-ec6577849ece.jpg,6
6818,3902,a6bdd3e6-242f-4a42-ba75-f944186ddaed.jpg,4


In [89]:
validation

Unnamed: 0,index,images,labels
0,4134,9b4de5f4-3b99-486b-89a7-878f088ab2bd.jpg,4
1,1053,e5753982-ee6e-4fee-9b34-5d631fa2d7d4.jpg,1
2,3284,95c114e7-17ee-49dd-aa35-b6f5815dc12c.jpg,3
3,5603,ac623c2f-c484-4f63-8e71-31dce8f7ea1d.jpg,6
4,4657,99cc2d5a-ad92-4719-8d57-feb66f8932c9.jpg,5
...,...,...,...
1700,7382,a052728b-3065-4ff6-9508-9efb4d1e0dab.jpg,8
1701,7763,512f85ae-e6dc-41c6-abbb-860fda404dd9.jpg,9
1702,5218,b568c5f4-0fa9-4c22-ab55-ee100ac5cbab.jpg,5
1703,1346,a9c2ad77-94fe-485b-a89f-4b7fecace3db.jpg,1


# Dataset

In [90]:
# normalize images iwth Imagenet stats
train_mean = [0.485, 0.456, 0.406]
train_std = [0.229, 0.224, 0.225]

In [91]:
# augmentation  
class ImgAugTransform:
    def __init__(self):
        self.aug = iaa.Sequential([
#             iaa.Scale((224, 224)),
            iaa.Sometimes(0.25, iaa.GaussianBlur(sigma=(0, 3.0))),
            iaa.Fliplr(0.5),
            iaa.Affine(rotate=(-20, 20), mode='symmetric'),
            iaa.Sometimes(0.25,
                      iaa.OneOf([iaa.Dropout(p=(0, 0.1)),
                                 iaa.CoarseDropout(0.1, size_percent=0.5)])),
            iaa.AddToHueAndSaturation(value=(-10, 10), per_channel=True)
    ])
      
    def __call__(self, img):
        img = np.array(img)
        return self.aug.augment_image(img)
    
custom_transfomrs = torchvision.transforms.Compose([
    ImgAugTransform(),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=train_mean, std=train_std) 
])

In [92]:
class MedicalImg():
    def __init__(self, dataframe, path2images, transform=False):
        self.img_file = path2images
        self.img_id = dataframe['images']
        self.y = dataframe['labels']
        self.transform = transform
        
    def __getitem__(self, index):
        img = Image.open(os.path.join(self.img_file, self.img_id[index]))
        
        label = self.y[index]
        
        if self.transform is not None:
            img = self.transform(img)
            
        return img, label
    
    def __len__(self):
        return self.y.shape[0]

## setting 

In [93]:
BATCH_SIZE = 3

In [94]:
train_data = MedicalImg(train, path2images, transform=custom_transfomrs)
validation_data = MedicalImg(validation, path2images, transform=custom_transfomrs)

In [95]:
len(train_data), len(validation_data)

(6820, 1705)

In [103]:
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
validation_loader = DataLoader(validation_data, batch_size=BATCH_SIZE)