# Load packages

In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms

  from .autonotebook import tqdm as notebook_tqdm


### CNN model in Pytorch

There are several ways to write a CNN model in pytorch. In this lab, you will be using the _Sequential_ class of pytorch (similarly to Tensorflow). We will see the syntax further on.



In [83]:
import os 
from PIL import Image 
from torch.utils.data import Dataset
import numpy as np

class SkinDataset(Dataset):
    def __init__(self, image_dir, y, transform=None):
        # super(SkinDataset, self).__init__()
        self.image_dir = image_dir
        self.transform = transform
        self.y = y
        self.images = os.listdir(image_dir)
        self.idx = []

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        img_path = os.path.join(self.image_dir, self.images[index])
        image = np.array(Image.open(img_path).convert("RGB"))
        if self.y is not None:
            y = np.array(self.y)
        if self.transform is not None:
            augmentations = self.transform(image=image)
            image = augmentations["image"]
            if self.y is not None:
                return image, y
            else:
                return image

In [79]:
import torch 
import torchvision
from torch.utils.data import DataLoader 
import numpy as np 


def get_loaders(
    train_dir,
    y_train,
    val_dir,
    y_val,
    test_dir,
    y_test,
    batch_size,
    train_transform,
    val_transform,
    test_transform,
    num_workers=4,
    pin_memory=True,
):
    train_ds = SkinDataset(image_dir=train_dir, transform=train_transform,y = y_train)
    train_loader = DataLoader(train_ds, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, shuffle=True,)

    val_ds = SkinDataset(image_dir=val_dir, transform=val_transform,y = y_val)
    val_loader = DataLoader(val_ds, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, shuffle=False,)

    test_ds = SkinDataset(image_dir=test_dir, transform=test_transform,y = y_test)
    test_loader = DataLoader(test_ds, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, shuffle=True,)

    return train_loader, val_loader, test_loader

def get_test_loaders(
    test_img_dir,
    batch_size,
    test_transform,
    num_workers=4,
    pin_memory=True,
):
    test_ds = SkinDataset(image_dir=test_img_dir, y=None, transform=test_transform,)
    test_loader = DataLoader(test_ds, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, shuffle=True,)
    return test_loader

In [33]:
import glob
import random
import shutil


Train_directory = 'data/Train_CNN/Train/'
if not os.path.exists(Train_directory):
    os.mkdir(Train_directory)
Val_directory = 'data/Train_CNN/Val/'
if not os.path.exists(Val_directory):
    os.mkdir(Val_directory)
Test_directory = 'data/Train_CNN/Test/'
if not os.path.exists(Test_directory):
    os.mkdir(Test_directory)

# Create training and validation splits
Raw_directory = 'data/saved_crops_train'
train_files = [f for f in sorted(glob.glob(Raw_directory+'/*.jpg'))]
indices = np.arange(len(train_files))
np.random.shuffle(indices)
train_split, val_split, test_split = int(len(train_files)*0.7), int(len(train_files)*0.15), int(len(train_files)*0.15)
indices = list(indices)
train_imgs = [train_files[i] for i in indices[:train_split]]
val_imgs = [train_files[i] for i in indices[train_split:train_split+val_split]]
test_imgs = [train_files[i] for i in indices[train_split+val_split:]]

# Copy images to different directories
for i in range(len(train_imgs)):
    shutil.copy(train_imgs[i], Train_directory+train_imgs[i].split('/')[-1].split('d_')[-1])
for i in range(len(test_imgs)):
    shutil.copy(test_imgs[i], Test_directory+test_imgs[i].split('/')[-1].split('d_')[-1])
for i in range(len(val_imgs)):
    shutil.copy(val_imgs[i], Val_directory+val_imgs[i].split('/')[-1].split('d_')[-1])


In [None]:
# Target prcessing
import pandas as pd
train_ids = ['ISIC_'+train_imgs[i].split('_')[-1].split('.')[0] for i in range(len(train_imgs))]
test_ids = ['ISIC_'+test_imgs[i].split('_')[-1].split('.')[0] for i in range(len(test_imgs))]
val_ids = ['ISIC_'+val_imgs[i].split('_')[-1].split('.')[0] for i in range(len(val_imgs))]
train_dataset = pd.read_csv('metadataTrain.csv')
y = train_dataset[['ID','CLASS']]
def get_y(train_ids, i):
    a = y[y['ID'] == train_ids[i]]['CLASS'].values[0]
    return a
y_train = []
for i in range(len(train_ids)):
    y_train.append(get_y(train_ids, i))
y_test = []
for i in range(len(test_ids)):
    y_test.append(get_y(test_ids, i))
y_val = []
for i in range(len(val_ids)):
    y_val.append(get_y(val_ids, i))

In [84]:
from zmq import device
import torch 
import albumentations as A 
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm


Train_directory = 'data/Train_CNN/Train'
Val_directory = 'data/Train_CNN/Val'
Test_directory = 'data/Train_CNN/Test'
Submission_directory = 'data/saved_crops_test'
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
BATCH_SIZE = 32
NUM_WORKERS = 2
LEARNING_RATE = 1e-4
train_transform = A.Compose(
    [
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Rotate(limit=35, p=1.0),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.1),
        A.RandomContrast(limit=0.6),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
        ),
        # A.HueSaturationValue(val_shift_limit=50),
        ToTensorV2(),

    ],
)

val_transform = A.Compose(
    [
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
        ),
        ToTensorV2(),

    ],    
)
test_transform = A.Compose(
    [
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
        ),
        ToTensorV2(),

    ],    
)

train_loader, val_loader, test_loader = get_loaders(
    train_dir=Train_directory,
    y_train=y_train,
    val_dir=Val_directory,
    y_val=y_val,
    test_dir=Test_directory,
    y_test=y_test,
    batch_size=BATCH_SIZE,
    train_transform=train_transform,
    val_transform=val_transform,
    test_transform=test_transform,    
    num_workers=NUM_WORKERS,
    pin_memory=True,
)

submission_loader = get_test_loaders(
    test_img_dir = Submission_directory,
    batch_size = BATCH_SIZE,
    test_transform = test_transform,
    num_workers=4,
    pin_memory=True,
)
print('train_loader: ', len(train_loader.dataset), 'val_loader: ', len(val_loader.dataset),'test_loader: ', len(test_loader.dataset),'submission_loader: ', len(submission_loader.dataset) )

train_loader:  11916 val_loader:  2553 test_loader:  2554 submission_loader:  5677




In [39]:
import tensorflow as tf 
IMG_SHAPE = (224,224,3)  # size of thumbnails used by the internal classification model
CLASS_TO_INDEX = {
            "Negative": 0,
            "Primordial": 1,
            "Primary": 2,
            "Secondary": 3,
            "Tertiary": 4,
        }  # how to convert the provided classes of follicules to numbers
INDEX_TO_CLASS = {value: key for key, value in CLASS_TO_INDEX.items()}
# create classifier model based on MobileNet
base_model = tf.keras.applications.InceptionV3(
    input_shape=IMG_SHAPE, include_top=False, weights="imagenet"
)          
#base_model = tf.keras.applications.MobileNetV2(
#    input_shape=self.IMG_SHAPE, include_top=False, weights="imagenet"
#)
base_model.trainable = True
inputs = tf.keras.Input(shape=IMG_SHAPE)
preprocess_input = tf.keras.applications.inception_v3.preprocess_input
#preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
prediction_layer = tf.keras.layers.Dense(8, activation="softmax")

x = preprocess_input(inputs)
#x = inputs
x = base_model(x, training=False)
x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = prediction_layer(x)

model = tf.keras.Model(inputs, outputs)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=["sparse_categorical_accuracy"],
)
_model = model

2022-05-08 21:06:37.995235: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/slimane/Documents/personal_docs/p_env/perso_env/lib/python3.8/site-packages/cv2/../../lib64:
2022-05-08 21:06:37.995261: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-05-08 21:06:40.877850: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-08 21:06:40.878359: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/slimane/Documents/per

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
