In [0]:
# !pip install tensorflow-gpu==2.0.0-alpha0

In [4]:
import tensorflow as tf
tf.__version__

'2.0.0-alpha0'

In [0]:
# import libraries
import os
import zipfile
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

from tensorflow.keras import models, layers
from tensorflow import keras

In [0]:
SOURCE_ZIPFILE = 'zoom_1_256_256_partition_test_only.zip'
BEST_MODEL_WEIGHT = 'vgg_zoom1_256_256_09-0.9554-0.1437.h5'

In [0]:
zipped_file_url = 'https://storage.googleapis.com/applied-dl-sj/camelyon/input_data/new/{}'.format(SOURCE_ZIPFILE)

In [0]:
!curl -O $zipped_file_url

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 6550M  100 6550M    0     0   133M      0  0:00:49  0:00:49 --:--:--  132M


In [0]:
%%time

# unzip data folders to root dir
zf = zipfile.ZipFile(SOURCE_ZIPFILE)
zf.extractall()


CPU times: user 47.5 s, sys: 20.2 s, total: 1min 7s
Wall time: 1min 53s


In [0]:
build_model_code_url = 'https://storage.googleapis.com/applied-dl-sj/camelyon/source_code/model_build.py'

!curl -O $build_model_code_url

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   521  100   521    0     0   2846      0 --:--:-- --:--:-- --:--:--  2862


In [0]:
model_weight_url = 'https://storage.googleapis.com/applied-dl-sj/camelyon/output_data/best_weights/{}'.format(BEST_MODEL_WEIGHT)

!curl -O $model_weight_url

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 72.1M  100 72.1M    0     0  42.7M      0  0:00:01  0:00:01 --:--:-- 42.7M


### Preprocess Data

In [0]:
np.random.seed(828)

In [0]:
IMG_DIR = 'zoom_1_256_256_partition_test_only'

In [10]:
ls zoom_1_256_256_partition_test_only/meta/

partition_meta_info.json


In [0]:
meta_file_name = os.path.join(IMG_DIR, 
                              'meta',
                              'partition_meta_info.json')

# get metadata
meta_df = pd.read_json(meta_file_name, dtype=False)

In [12]:
meta_df.dtypes

file_name          object
img_id             object
is_non_gray         int64
is_roi              int64
label               int64
non_gray_ratio    float64
type               object
dtype: object

In [13]:
meta_df.shape

(1266294, 7)

In [14]:
# filter
filter_mask = (meta_df['type'] == 'test') & (meta_df['is_non_gray'] == 1) & \
              (meta_df['is_roi'] == 1)


meta_df = meta_df[filter_mask].copy()
meta_df.head()

Unnamed: 0,file_name,img_id,is_non_gray,is_roi,label,non_gray_ratio,type
100086,tumor_slide_002_split_186_53.png,2,1,1,0,0.948151,test
100094,tumor_slide_002_split_295_97.png,2,1,1,0,0.713028,test
100137,tumor_slide_002_split_217_80.png,2,1,1,0,0.802109,test
100157,tumor_slide_002_split_219_46.png,2,1,1,0,0.868195,test
100169,tumor_slide_002_split_288_103.png,2,1,1,0,0.760559,test


In [15]:
meta_df.shape

(19304, 7)

In [0]:
meta_df['label'] = meta_df['label'].astype(str)

In [17]:
# specify image data generator with augmentation
datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

# read data from the train directory
test_generator = datagen.flow_from_dataframe(dataframe=meta_df, 
                                            directory=os.path.join(
                                                IMG_DIR,
                                                'test',
                                                'slide'),
                                            x_col='file_name', 
                                            y_col='label',
                                            class_mode='binary', 
                                            target_size=(256, 256), 
                                            shuffle=False,
                                            batch_size=64)

Found 19304 images belonging to 2 classes.


### Build model

In [0]:
from model_build import build_vgg16_single_input

In [0]:
model = build_vgg16_single_input(input_shape=(256, 256, 3))

In [20]:
# compile model
model.compile(optimizer='adam', 
              loss='binary_crossentropy', 
              metrics=['acc'])

# print model architecture
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 8, 8, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 32768)             0         
_________________________________________________________________
dropout (Dropout)            (None, 32768)             0         
_________________________________________________________________
dense (Dense)                (None, 128)               4194432   
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 129       
Total params: 18,909,249
Trainable params: 4,194,561
Non-trainable params: 14,714,688
_________________________________________________________________


In [0]:
model.load_weights(BEST_MODEL_WEIGHT)

In [22]:
%%time

y_pred_prob = model.predict_generator(test_generator)

CPU times: user 2min 48s, sys: 55 s, total: 3min 43s
Wall time: 3min 52s


In [0]:
meta_df['y_pred_prob'] = y_pred_prob.flatten()

In [0]:
meta_df.to_pickle('zoom_1_256_256_test_inference_result_single_input.pkl')