<a href="https://colab.research.google.com/github/liminal-learner/cancer_detection/blob/master/notebooks/3_Deploy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deploy

In [0]:
from sklearn.metrics import roc_curve, auc, roc_auc_score
% tensorflow_version 1.x

import matplotlib.pyplot as plt
import os
import zipfile
from google.colab import files


In [2]:
# To import src for this project into colab: 
ROOT_DIR = '/content'
os.chdir(ROOT_DIR)
!rm -rf cancer_detection
! ls -l | head -10

!git clone https://github.com/liminal-learner/cancer_detection.git

PROJ_ROOT = os.path.join(ROOT_DIR, 'cancer_detection')

os.sys.path.append(PROJ_ROOT)

total 16
drwxr-xr-x 3 root root 4096 Nov  5 03:09 competitions
-rw-r--r-- 1 root root   64 Nov  5 03:09 kaggle.json
drwxr-xr-x 2 root root 4096 Nov  5 04:07 models
drwxr-xr-x 1 root root 4096 Oct 25 16:58 sample_data
Cloning into 'cancer_detection'...
remote: Enumerating objects: 94, done.[K
remote: Counting objects: 100% (94/94), done.[K
remote: Compressing objects: 100% (75/75), done.[K
remote: Total 231 (delta 40), reused 18 (delta 5), pack-reused 137
Receiving objects: 100% (231/231), 62.65 MiB | 9.96 MiB/s, done.
Resolving deltas: 100% (81/81), done.


In [3]:
src_dir = os.path.join(PROJ_ROOT, "src")
os.sys.path.append(src_dir)

from data.DataGenerator import DataGenerator
from model.ModelContainer import ModelContainer

Using TensorFlow backend.


### Define parameters & download the data from Kaggle:

In [8]:
# Use this to upload the kaggle.json from your local machine:
uploaded = files.upload()

Saving kaggle.json to kaggle.json


In [9]:
! ls -l | head -10

total 12
drwxr-xr-x 6 root root 4096 Nov  5 03:05 cancer_detection
-rw-r--r-- 1 root root   64 Nov  5 03:09 kaggle.json
drwxr-xr-x 1 root root 4096 Oct 25 16:58 sample_data


In [11]:
!cp kaggle.json '/root/.kaggle/'
!chmod 600 '/root/.kaggle/kaggle.json'
!kaggle config set -n path -v '/content/'
!kaggle competitions download histopathologic-cancer-detection

- path is now set to: /content/
Downloading sample_submission.csv.zip to /content/competitions/histopathologic-cancer-detection
  0% 0.00/1.33M [00:00<?, ?B/s]
100% 1.33M/1.33M [00:00<00:00, 44.9MB/s]
Downloading train_labels.csv.zip to /content/competitions/histopathologic-cancer-detection
 98% 5.00M/5.10M [00:00<00:00, 20.6MB/s]
100% 5.10M/5.10M [00:00<00:00, 20.3MB/s]
Downloading test.zip to /content/competitions/histopathologic-cancer-detection
 99% 1.30G/1.30G [00:28<00:00, 53.9MB/s]
100% 1.30G/1.30G [00:28<00:00, 48.7MB/s]
Downloading train.zip to /content/competitions/histopathologic-cancer-detection
100% 4.96G/4.98G [02:54<00:00, 26.6MB/s]
100% 4.98G/4.98G [02:54<00:00, 30.7MB/s]


In [0]:
DATA_DIR = os.path.join(ROOT_DIR, 'competitions/histopathologic-cancer-detection/')
train_path = os.path.join(DATA_DIR, 'train/')
test_path = os.path.join(DATA_DIR, 'test/')
train_labels_path = os.path.join(DATA_DIR, 'train_labels.csv/train_labels.csv')

unique_identifier = 'id'
target_class_column = 'label'
image_size = (96, 96)
n_channels = 3
validation_frac = 0.2

In [0]:
# Unzip files in all zipped directories:
os.chdir(DATA_DIR)

for path_to_zip_file in os.listdir():
    new_directory = os.path.splitext(path_to_zip_file)[0]
    os.makedirs(new_directory, exist_ok = True)
    with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
      zip_ref.extractall(new_directory)
      zip_ref.close()

In [5]:
os.chdir(DATA_DIR)
! ls -l | head -10

total 6617084
drwxr-xr-x 2 root root       4096 Nov  5 04:13 models
drwxr-xr-x 2 root root       4096 Nov  5 03:13 sample_submission.csv
-rw-r--r-- 1 root root    1394488 Nov  5 03:09 sample_submission.csv.zip
drwxr-xr-x 2 root root    4329472 Nov  5 03:16 test
-rw-r--r-- 1 root root 1401100547 Nov  5 03:10 test.zip
drwxr-xr-x 2 root root   16723968 Nov  5 03:15 train
drwxr-xr-x 2 root root       4096 Nov  5 03:13 train_labels.csv
-rw-r--r-- 1 root root    5352900 Nov  5 03:09 train_labels.csv.zip
-rw-r--r-- 1 root root 5346961539 Nov  5 03:13 train.zip


### Prepare the data & load the model:

In [6]:
data = DataGenerator(train_path, train_labels_path, test_path, unique_identifier, image_size, n_channels, target_class_column, validation_frac)

Found 176020 validated image filenames belonging to 2 classes.
Found 44005 validated image filenames belonging to 2 classes.
Found 57458 validated image filenames.


In [7]:
models = ModelContainer()

os.chdir(PROJ_ROOT)
models.load_model(os.path.join('models','baseline.h5'))


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


## Score the test set:


In [8]:
models.make_predictions('baseline', data.test_generator)



## Submit to Kaggle:

In [0]:
# Predictions were made in the order of the test_df using the test generator:
submission = data.test_df
submission['label'] = models.predictions
# Take out the extension for the submission:
submission['id'] = submission['id'].apply(lambda x: x.split(".")[0])

submission.to_csv("submission.csv", index = False, header = True)


In [10]:
submission.head()

Unnamed: 0,id,label
0,f57e0469a2627e63cf87c71acba7f4b82796f85a,0.192206
1,230c6b79f156c23b0642d0856263879b293daf8e,0.06503
2,6126f9be3558f75cb8de32d67de5856f7e6dd753,0.733952
3,1a2d90bc7b16165e72551d25995f5b8d08828144,0.871041
4,be7e1682edfc32bb84e17083f4b775eda91ef07f,0.045973


In [0]:
files.download("submission.csv")

In [11]:
!kaggle competitions submit -c histopathologic-cancer-detection -f submission.csv -m "Message"

100% 2.84M/2.84M [00:07<00:00, 412kB/s]
Successfully submitted to Histopathologic Cancer Detection

## Feature map analysis:
To-do.