## Checking NVIDIA Driver

In [32]:
!nvidia-smi

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



## Importing Libraries

In [0]:
!pip install image-classifiers > /dev/null

In [0]:
import os
import gc
import cv2
import warnings
import numpy as np 
import pandas as pd 
import seaborn as sns
import multiprocessing
import tensorflow as tf
import matplotlib.pyplot as plt

from PIL import Image
from joblib import Parallel, delayed
from tqdm import tqdm_notebook as tqdm
from sklearn.metrics import roc_auc_score
from tensorflow.keras import backend as K
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import Sequential
from classification_models.tfkeras import Classifiers
from IPython.core.interactiveshell import InteractiveShell
from tensorflow.compat.v1.keras.layers import CuDNNLSTM, CuDNNGRU
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import StratifiedKFold,KFold, GroupKFold
from tensorflow.keras.applications import InceptionResNetV2,InceptionV3, ResNet50
from tensorflow.keras.layers import Flatten, Dense, Dropout, Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau,ModelCheckpoint

In [0]:
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)
InteractiveShell.ast_node_interactivity = 'all'

In [37]:
PATH = 'drive/My Drive/'
os.chdir(os.path.join(PATH,'Melanoma Classification'))
os.listdir()

['.ipynb_checkpoints', 'kaggle.json', 'melanoma-120x120-jpeg.zip', '120x120']

## Connecting to Kaggle CLI

In [8]:
os.listdir()

['kaggle.json',
 'train.csv',
 'test.csv',
 'sample_submission.csv',
 '.ipynb_checkpoints',
 'Model-Checkpoint',
 'test_with_pred_on_fold_2.csv',
 'submission_on_fold_2_resnet50_best_auc_0_83347.csv']

In [0]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle
!chmod 600 ~/.kaggle/kaggle.json

## Download the Dataset (If required)

In [9]:
!kaggle datasets download -d msharuk589/melanoma-120x120-jpeg

Downloading melanoma-120x120-jpeg.zip to /content/drive/My Drive/Melanoma Classification
 99% 183M/185M [00:01<00:00, 119MB/s]
100% 185M/185M [00:01<00:00, 118MB/s]


In [0]:
!mv melanoma-120x120-jpeg.zip ./../Ion-Switching
os.chdir('./../Ion-Switching')

In [0]:
!unzip melanoma-120x120-jpeg.zip > /dev/null

In [0]:
x = "1" #@param [0,1]


## Defining constants

In [0]:
NUM_CORES = multiprocessing.cpu_count()
SPLITS = 5
DIM = (224,224)
JPEG_TRAIN_256 = './../Ion-Switching/train/'
JPEG_TEST_256 = './../Ion-Switching/test/'

## Helper Functions

In [0]:
def process_and_save(folder,file_):
  path = f'300x300/{folder}/{file_}.jpg'
  img = cv2.imread(path)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  img = cv2.resize(img,(256,256))
  img = color_constancy(img)
  img = scale('norm',img)
  img = np.stack([img[:,:,2],img[:,:,1],img[:,:,0]],axis=2)
  plt.imsave(f'256x256/{folder}/{file_}.jpg', img) 

In [0]:
def roc(y_true, y_pred):
  return roc_auc_score(y_true, y_pred)

In [0]:
def color_constancy(img, power=6, gamma=None):
    img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
    img_dtype = img.dtype

    if gamma is not None:
        img = img.astype('uint8')
        look_up_table = np.ones((256,1), dtype='uint8') * 0
        for i in range(256):
            look_up_table[i][0] = 255*pow(i/255, 1/gamma)
        img = cv2.LUT(img, look_up_table)

    img = img.astype('float32')
    img_power = np.power(img, power)
    rgb_vec = np.power(np.mean(img_power, (0,1)), 1/power)
    rgb_norm = np.sqrt(np.sum(np.power(rgb_vec, 2.0)))
    rgb_vec = rgb_vec/rgb_norm
    rgb_vec = 1/(rgb_vec*np.sqrt(3))
    img = np.multiply(img, rgb_vec)

    img = cv2.cvtColor(np.array(img), cv2.COLOR_BGR2RGB)
    return img.astype(img_dtype)

In [0]:
def scale(type_,data):
    assert type_ in ['norm','std']
    if type_ == 'std':
        return (data - np.mean(data))/np.std(data)
    elif type_ == 'norm':
        return (data - np.min(data))/(np.max(data) - np.min(data))

In [0]:
def decode_jpeg(filename,label=None):
  if label is None:
    path = '120x120/Test/'
  else:
    path = '120x120/Train/'
  file_path = path + filename
  bits = tf.io.read_file(file_path)
  img = tf.image.decode_image(bits,channels=3)
  img = tf.stack((img[:,:,2],img[:,:,1],img[:,:,0]),axis=2)
  img = tf.cast(img,tf.float32)/255.0
  if label is None:
    return img
  else:
    return img, label

## Helper Classes

In [0]:
class DataGenerator(Sequence):
    def __init__(self,df,image_path,batch_size,dim,n_channels,to_fit):
      self.df = df
      self.image_path = image_path
      self.batch = batch_size
      self.dim = dim
      self.n_channels = n_channels
      self.to_fit = to_fit
      
    def __len__(self):
      return int(np.floor(self.df.shape[0])/self.batch)
    
    def __getitem__(self, index):
      list_IDs = self.df['image_name'].values[index*self.batch : (index+1)*self.batch]
      X = self._generate_X(list_IDs)
      if self.to_fit:
          target_y = self._generate_y(list_IDs)
          return np.array(X), np.array(target_y)
      return np.array(X)
    
    def _generate_X(self,list_IDs):
      X = Parallel(n_jobs=self.batch)(delayed(self._load_image)(i) for i in list_IDs)
      return X
    
    def _generate_y(self,list_IDs):
      target_y = []
      for i, ids in enumerate(list_IDs):
          target_y.append(self.df[self.df['image_name'] == ids]['target'].values[0])
      return target_y
    
    def _load_image(self,file_):
      kernel = np.ones((5,5),np.uint8)
      img = cv2.resize(cv2.imread(os.path.join(self.image_path,file_)+'.jpg'),DIM)
      img = cv2.dilate(img, kernel, iterations=1)
      img = cv2.erode(img, kernel, iterations=1)
      return img
        

## Reading Data

In [0]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
sub = pd.read_csv('sample_submission.csv')

## Configuring GPU or TPU

In [31]:
GPU = "False" #@param ["True", "False"] 
TPU = "True" #@param ["True","False"]
if(GPU == 'True'):
  print('Setting GPU')
  K.clear_session()
  config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,inter_op_parallelism_threads=1)
  graph = tf.compat.v1.get_default_graph()
  sess = tf.compat.v1.Session(graph=graph,config=config)
  tf.compat.v1.keras.backend.set_session(sess)
elif(TPU == 'True' and GPU == 'False'):
  print('Setting TPU')
  try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
  except ValueError:
      tpu = None

  if tpu:
      tf.config.experimental_connect_to_cluster(tpu)
      tf.tpu.experimental.initialize_tpu_system(tpu)
      strategy = tf.distribute.experimental.TPUStrategy(tpu)
  else:
      strategy = tf.distribute.get_strategy()

  print("REPLICAS: ", strategy.num_replicas_in_sync)

Setting TPU
REPLICAS:  1


## Deep Learning Model

In [0]:
model = Sequential()
model.add(ResNet50(weights=None,include_top=False,pooling='avg',input_shape=(*DIM,3)))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer=tf.keras.optimizers.Adam(lr=0.00025),metrics=['accuracy',tf.keras.metrics.AUC()])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 2048)              23587712  
_________________________________________________________________
dense (Dense)                (None, 1)                 2049      
Total params: 23,589,761
Trainable params: 23,536,641
Non-trainable params: 53,120
_________________________________________________________________


### Define callbacks

In [0]:
model_checkpoint = ModelCheckpoint(filepath='./Model-Checkpoint/ResNet50_weights/weights.{epoch:02d}-{val_auc:.2f}.hdf5',
                                   monitor='val_auc',verbose=1,save_best_only=True,
                                   save_weights_only=True,mode='max')
lr_schedule = ReduceLROnPlateau(monitor='val_loss',factor=0.5,patience=3,min_lr=0.000008)

## Training 

In [0]:
def train_on_folds(fold):
  print(f'On fold {fold}')
  train_data = train[train['stratified_folds'] != i]
  valid_data = train[train['stratified_folds'] == i]

  train_generator = DataGenerator(train_data,JPEG_TRAIN_256,32,DIM,3,True)
  valid_generator = DataGenerator(valid_data,JPEG_TRAIN_256,32,DIM,3,False)

  history = model.fit(train_generator,epochs=10,verbose=1,callbacks=[model_checkpoint,lr_schedule],validation_data=valid_generator)
  return model.predict(test_generator)

In [0]:
train_data = train[train['stratified_folds'] != 2]
valid_data = train[train['stratified_folds'] == 2]

train_generator = DataGenerator(train_data,JPEG_TRAIN_256,16,DIM,3,True)
valid_generator = DataGenerator(valid_data,JPEG_TRAIN_256,16,DIM,3,True)

In [0]:
history = model.fit(train_generator,epochs=10,verbose=1,callbacks=[model_checkpoint,lr_schedule],validation_data=valid_generator)

Epoch 1/10
Epoch 00001: val_auc improved from -inf to 0.48821, saving model to ./Model-Checkpoint/ResNet50_weights/weights.01-0.49.hdf5
Epoch 2/10
Epoch 00002: val_auc did not improve from 0.48821
Epoch 3/10
Epoch 00003: val_auc improved from 0.48821 to 0.57623, saving model to ./Model-Checkpoint/ResNet50_weights/weights.03-0.58.hdf5
Epoch 4/10
Epoch 00004: val_auc did not improve from 0.57623
Epoch 5/10
Epoch 00005: val_auc improved from 0.57623 to 0.83447, saving model to ./Model-Checkpoint/ResNet50_weights/weights.05-0.83.hdf5
Epoch 6/10
Epoch 00006: val_auc did not improve from 0.83447
Epoch 7/10
Epoch 00007: val_auc did not improve from 0.83447
Epoch 8/10
Epoch 00008: val_auc did not improve from 0.83447
Epoch 9/10
Epoch 00009: val_auc did not improve from 0.83447
Epoch 10/10
Epoch 00010: val_auc did not improve from 0.83447


In [0]:
model.load_weights('Model-Checkpoint/ResNet50_weights/weights.05-0.83.hdf5')

In [0]:
test_generator = DataGenerator(test,'300x300/test/',12,DIM,3,False)

In [0]:
predictions = model.predict(test_generator)

In [0]:
pred = list(predictions)
pred.extend([0.5,0.68])
pred = np.array(pred)
pred.shape,test.shape

((10982,), (10982, 5))

In [0]:
#test['target'] = pred
test_vals = dict(test[['image_name','target']].values)
sub['target'] = sub['image_name'].map(test_vals)

In [0]:
test_vals

{'image_name': 0        ISIC_5602249
 1        ISIC_5694968
 2        ISIC_5876203
 3        ISIC_6939698
 4        ISIC_3829545
 5        ISIC_3671995
 6        ISIC_6473784
 7        ISIC_9455280
 8        ISIC_5807899
 9        ISIC_8950567
 10       ISIC_3744551
 11       ISIC_8876322
 12       ISIC_0710840
 13       ISIC_2591810
 14       ISIC_4696807
 15       ISIC_3197088
 16       ISIC_5977193
 17       ISIC_3208994
 18       ISIC_8532460
 19       ISIC_9318565
 20       ISIC_9519092
 21       ISIC_1972390
 22       ISIC_5287696
 23       ISIC_3059224
 24       ISIC_1596422
 25       ISIC_1661794
 26       ISIC_1670585
 27       ISIC_1743829
 28       ISIC_2701399
 29       ISIC_3016106
 30       ISIC_3734151
 31       ISIC_5969279
 32       ISIC_8242793
 33       ISIC_9358483
 34       ISIC_5453550
 35       ISIC_2295564
 36       ISIC_2476859
 37       ISIC_6218648
 38       ISIC_1654007
 39       ISIC_9035628
 40       ISIC_7116372
 41       ISIC_7842034
 42       ISIC_79796

In [0]:
sub.to_csv('submission_on_fold_2_resnet50_best_auc_0_83347.csv',index=False)

## Submtting the predictions

In [0]:
!kaggle competitions submit -c siim-isic-melanoma-classification -f submission_on_fold_2_resnet50_best_auc_0_83347.csv -m "0.83347 Resnet50 7 folds"

100% 360k/360k [00:06<00:00, 61.1kB/s]
Successfully submitted to SIIM-ISIC Melanoma Classification

## Garbage

In [0]:
%%capture
"""train_datagen = ImageDataGenerator(rotation_range=360, 
                        width_shift_range=0.2, 
                        height_shift_range=0.2, 
                        zoom_range=0.2,
                        horizontal_flip=True,
                        vertical_flip=True)
test_datagen = ImageDataGenerator()

predictions = []
test_generator = test_datagen.flow_from_dataframe(dataframe=test,
                                                  directory=JPEG_TEST_256,
                                                  x_col='image_name',
                                                  y_col = 'target',
                                                  target_size=(256,256),
                                                  batch_size=32,
                                                  class_mode='binary')

predictions = []
train_data = train[train['group_folds'] != 0]
valid_data = train[train['group_folds'] == 0]

train_generator = train_datagen.flow_from_dataframe(dataframe=train_data,
                                                directory=JPEG_TRAIN_256,
                                                x_col='image_name',
                                                y_col='target',
                                                target_size=(256,256),
                                                batch_size=32,
                                                subset="training",
                                                class_mode='binary')
valid_generator = train_datagen.flow_from_dataframe(dataframe=valid_data,
                                                directory=JPEG_TRAIN_256,
                                                x_col='image_name',
                                                y_col='target',
                                                target_size=(256,256),
                                                batch_size=32,
                                                subset="training",
                                                class_mode='binary')
model.fit(train_generator,validation_data=valid_generator,epochs=5,steps_per_epoch=train_data.shape[0]//32)
predictions += model.predict(test_generator)


for i in range(SPLITS):
  train_data = train[train['stratified_folds'] == i]
  valid_data = train[train['stratified_folds'] != i]

  train_generator = train_datagen.flow_from_dataframe(dataframe=train_data,
                                                  directory=JPEG_TRAIN_256,
                                                  x_col='image_name',
                                                  y_col='target',
                                                  target_size=(256,256),
                                                  batch_size=32,
                                                  subset="training",
                                                  class_mode='binary')
  valid_generator = train_datagen.flow_from_dataframe(dataframe=valid_data,
                                                  directory=JPEG_TRAIN_256,
                                                  x_col='image_name',
                                                  y_col='target',
                                                  target_size=(256,256),
                                                  batch_size=32,
                                                  subset="training",
                                                  class_mode='binary')
  model.fit(train_generator,validation_data=valid_generator,epochs=5,steps_per_epoch=train_data.shape[0]//32)
  predictions += model.predict(test_generator)"""