## Loading data

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
train_df = pd.read_csv("train.csv")

In [3]:
def append_ext(filename):
    return filename + ".png"

In [4]:
train_df["image_name"] = train_df["image_name"].apply(append_ext)

In [5]:
df_benign = train_df[train_df['target'] == 0].sample(1200, 
                                             random_state = 111)

In [6]:
df_malignant = train_df[train_df['target'] == 1]

In [7]:
train_balanced = pd.concat([df_benign, df_malignant])

In [9]:
from  sklearn.model_selection import train_test_split

In [10]:
X_train, X_val, y_train, y_val = train_test_split(train_balanced['image_name'],
                                                 train_balanced['target'],
                                                 test_size = 0.2,
                                                 random_state = 111)

In [11]:
train = pd.DataFrame({'image_name': X_train, 'target': y_train})

In [12]:
val = pd.DataFrame({'image_name': X_val, 'target': y_val})

In [14]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [15]:
train_datagen = ImageDataGenerator(rotation_range=20,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   horizontal_flip=True)
val_datagen = ImageDataGenerator()

train_generator = train_datagen.flow_from_dataframe(
    dataframe = train,
    directory = r'D:\uni\masters\ML\project\my_code\archive\train',
    x_col = 'image_name',
    y_col = 'target',
    batch_size = 32,
    target_size=(224, 224),
    shuffle = True,
    class_mode = 'raw',
    seed = 111
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe = val,
    directory = r'D:\uni\masters\ML\project\my_code\archive\train',
    x_col = 'image_name',
    y_col = 'target',
    batch_size = 32,
    target_size=(224, 224),
    shuffle = True,
    class_mode = 'raw',
    seed = 111
)

Found 1427 validated image filenames.
Found 357 validated image filenames.


## Checking GPU

In [16]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 14651608643082029534
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 3061265204
locality {
  bus_id: 1
  links {
  }
}
incarnation: 2185059894611850125
physical_device_desc: "device: 0, name: GeForce GTX 1650, pci bus id: 0000:01:00.0, compute capability: 7.5"
]


In [17]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense
import keras

Using TensorFlow backend.


In [18]:
tf.test.gpu_device_name()

'/device:GPU:0'

## Loading InceptionV3

In [19]:
import PIL

In [20]:
# we use input shape 224*224 in all our models
# input shape of InceptionV3 is 299*299
input_tensor = Input(shape=(224, 244, 3))

In [21]:
inceptionv3_model = tf.keras.applications.InceptionV3(weights='imagenet',
                                                      input_tensor=input_tensor,
                                                      include_top=False)

Adding new layers on top of InceptionV3:

In [22]:
x = inceptionv3_model.output

In [23]:
x = GlobalAveragePooling2D()(x)

In [24]:
x = Dense(units=1024, activation='relu')(x)

In [25]:
predictions = Dense(units=1, activation='sigmoid')(x)

In [26]:
model = Model(inputs=inceptionv3_model.input, outputs=predictions)

We want to train first only newly added layers, therefore we freeze InceptionV3 layers:

In [27]:
for layer in inceptionv3_model.layers:
    layer.trainable = False

In [28]:
model.compile(optimizer='rmsprop', 
             loss='binary_crossentropy',
             metrics=['AUC'])

In [35]:
tf.config.experimental.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [36]:
tf.test.is_built_with_cuda()

True

In [30]:
history = model.fit(
    train_generator,
    validation_data = val_generator,
    steps_per_epoch = train.shape[0]//32,
    epochs = 50,
    validation_steps = val.shape[0]//32
)

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 44 steps, validate for 11 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


Now we are going to train top 2 inception blocks, we freeze 249 first layers, and unfreeze the rest:

In [31]:
for layer in model.layers[:249]:
    layer.trainable = False
for layer in model.layers[249:]:
    layer.trainable = True

In [32]:
from tensorflow.keras.optimizers import Adam

In [33]:
model.compile(optimizer=Adam(lr=0.0001),
             loss='binary_crossentropy',
             metrics=['AUC'])

In [34]:
history = model.fit(
    train_generator,
    validation_data = val_generator,
    steps_per_epoch = train.shape[0]//32,
    epochs = 50,
    validation_steps = val.shape[0]//32
)

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 44 steps, validate for 11 steps
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50

KeyboardInterrupt: 