In [12]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import pickle
from tqdm import tqdm
import matplotlib.pyplot as plt
import tensorflow as tf
import openslide
from openslide import OpenSlide
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split

In [13]:
DATA_DIR = '../input/mayo-clinic-resized-5gb/data/'
os.listdir(DATA_DIR)

['train_imgs', 'train.csv']

In [14]:
train_df = pd.read_csv(DATA_DIR+'train.csv')
train_df.head()

Unnamed: 0,image_id,center_id,patient_id,image_num,label
0,006388_0,11,006388,0,CE
1,008e5c_0,11,008e5c,0,CE
2,00c058_0,11,00c058,0,LAA
3,01adc5_0,11,01adc5,0,LAA
4,026c97_0,4,026c97,0,CE


In [15]:
train_df["file_path"] = train_df["image_id"].apply(lambda x: DATA_DIR + "train_imgs/" + x )
train_df["target"] = train_df["label"].apply(lambda x : 1 if x=="CE" else 0)
train_df.head()

Unnamed: 0,image_id,center_id,patient_id,image_num,label,file_path,target
0,006388_0,11,006388,0,CE,../input/mayo-clinic-resized-5gb/data/train_im...,1
1,008e5c_0,11,008e5c,0,CE,../input/mayo-clinic-resized-5gb/data/train_im...,1
2,00c058_0,11,00c058,0,LAA,../input/mayo-clinic-resized-5gb/data/train_im...,0
3,01adc5_0,11,01adc5,0,LAA,../input/mayo-clinic-resized-5gb/data/train_im...,0
4,026c97_0,4,026c97,0,CE,../input/mayo-clinic-resized-5gb/data/train_im...,1


In [16]:
%%time
def preprocess(image_path):
    slide=OpenSlide(image_path)
    region= (1000,1000)    
    size  = (5000, 5000)
    image = slide.read_region(region, 0, size)
    img = tf.image.resize(img, (256, 256))
    img = np.array(img)    
    return image

x_train=[]
for i in tqdm(train_df['file_path']):
    x1=preprocess(i)
    x_train.append(x1)

100%|██████████| 754/754 [00:05<00:00, 135.48it/s]

CPU times: user 3.2 s, sys: 1.84 s, total: 5.04 s
Wall time: 5.57 s





In [17]:
x_train=np.array(x_train)
y_train=train_df['target']
x_train,x_test,y_train,y_test=train_test_split(x_train,y_train,test_size=0.1)

In [18]:
num_classes = 1

model = Sequential([
  layers.Rescaling(1./255, input_shape=(256, 256, 4)),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes, activation='sigmoid')
])

In [19]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [20]:
history = model.fit(
    x_train,
    y_train,
    epochs = 10,
    batch_size=64,
    validation_data = (x_test,y_test),
    verbose = 1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [21]:
test_df  = pd.read_csv('../input/mayo-clinic-strip-ai/test.csv')
test_df["file_path"]  = test_df["image_id"].apply(lambda x: "../input/mayo-clinic-strip-ai/test/" + x + ".tif")

In [22]:
test_df.head()

Unnamed: 0,image_id,center_id,patient_id,image_num,file_path
0,006388_0,11,006388,0,../input/mayo-clinic-strip-ai/test/006388_0.tif
1,008e5c_0,11,008e5c,0,../input/mayo-clinic-strip-ai/test/008e5c_0.tif
2,00c058_0,11,00c058,0,../input/mayo-clinic-strip-ai/test/00c058_0.tif
3,01adc5_0,11,01adc5,0,../input/mayo-clinic-strip-ai/test/01adc5_0.tif


In [39]:
%%time
def preprocess(image_path):
    slide=OpenSlide(image_path)
    region= (1000,1000)    
    size  = (5000, 5000)
    img = slide.read_region(region, 0, size)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize(img, (256, 256))
    img = np.array(img)    
    return img

CPU times: user 7 µs, sys: 0 ns, total: 7 µs
Wall time: 13.1 µs


In [None]:
test1=[]
for i in tqdm(train_df['file_path']):
    x1=preprocess(i)
    test1.append(x1)

preds = model.predict(test1)

In [None]:
sub = pd.DataFrame(test_df["patient_id"].copy())
sub["CE"] = preds
sub["CE"] = sub["CE"].apply(lambda x : 0 if x<0 else x)
sub["CE"] = sub["CE"].apply(lambda x : 1 if x>1 else x)
sub["LAA"] = 1- sub["CE"]

sub = sub.groupby("patient_id").mean()
sub = sub[["CE", "LAA"]].round(6).reset_index()
sub

In [None]:
sub.to_csv("submission.csv", index = False)