In [2]:
import keras
from keras.models import Sequential
from PIL import Image
import numpy as np

Using TensorFlow backend.


In [3]:
import pandas as pd

In [4]:
import keras.backend as K
K.set_image_data_format('channels_last')

In [5]:
# 一次只能讀取一次
base_model = keras.applications.vgg16.VGG16(weights = 'imagenet',  include_top = False, input_shape=(64,64,3))

In [6]:
base_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 64, 64, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 64, 64, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 64, 64, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 32, 32, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 32, 32, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 32, 32, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 16, 16, 128)       0         
__________

In [8]:
from keras.layers import GlobalAveragePooling2D, Dense
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

_ = base_model.get_layer("block5_conv3").output
_ = GlobalAveragePooling2D()(_)
_ = Dense(512, activation='relu')(_)

# 3 個輸出
predictions = Dense(3, activation='softmax')(_)

# 這是我們的 model
model = Model(inputs=base_model.input, outputs=predictions)

for n in ["block4_conv1", "block4_conv2", "block4_conv3",\
          "block5_conv1", "block5_conv2", "block5_conv3"]:
    model.get_layer(n).trainable = True
    
model.compile(optimizer=Adam(lr=0.00001), loss='categorical_crossentropy', metrics=["accuracy"])

In [9]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 64, 64, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 64, 64, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 64, 64, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 32, 32, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 32, 32, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 32, 32, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 16, 16, 128)       0         
__________

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [12]:
train_data = np.load('/home/Kaggle_Cervical_Cancer_Screening/Rory/Dev-ipynb/resize_data/ALL_TRAIN_NOSEG_64.npy')

In [13]:
test_data = np.load('/home/Kaggle_Cervical_Cancer_Screening/Rory/Dev-ipynb/resize_data/ALL_TEST_NOSEG_64.npy')

In [14]:
train = pd.read_csv('/home/Kaggle_Cervical_Cancer_Screening/Rory/Dev-ipynb/resize_data/TRAIN.csv')
test = pd.read_csv('/home/Kaggle_Cervical_Cancer_Screening/Rory/Dev-ipynb/resize_data/TEST.csv')

In [15]:
test_id = test.image.values

In [16]:
le = LabelEncoder()
train_target = le.fit_transform(train['type'].values)

x_train, x_val_train, y_train, y_val_train =\
train_test_split(train_data, train_target, test_size = 0.4, random_state=17)

In [17]:
num_classes = 3
y_train = keras.utils.to_categorical(y_train, num_classes)
y_val_train = keras.utils.to_categorical(y_val_train, num_classes)

In [18]:
x_train = x_train.astype('float32')
x_val_train = x_val_train.astype('float32')

x_train /= 255
x_val_train /= 255

x_test = test_data.astype('float32')
x_test /= 255

In [19]:
from keras.preprocessing.image import ImageDataGenerator

In [20]:
earlystopping = EarlyStopping(monitor='val_loss', patience = 2)

In [21]:
# shift = 0.2
datagen = ImageDataGenerator(rotation_range = 180,\
                             horizontal_flip = True,\
                             vertical_flip = True,\
                             #width_shift_range=shift,\
                             #height_shift_range=shift,\
                             data_format = "channels_last")
datagen.fit(x_train)

In [None]:
model.fit_generator(datagen.flow(x_train, y_train,\
                                 batch_size = 128, shuffle = True),\
                    epochs = 12,\
                    steps_per_epoch = 500,\
                    verbose = 1,\
                    validation_data = (x_val_train, y_val_train),\
                    callbacks=[earlystopping])

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
108/500 [=====>........................] - ETA: 86s - loss: 0.4501 - acc: 0.8144 

In [20]:
x_test.shape

(512, 64, 64, 3)

In [22]:
pred = model.predict(x_test)

In [23]:
pred.shape

(512, 3)

In [24]:
df = pd.DataFrame(pred, columns=['Type_1','Type_2','Type_3'])

In [25]:
df['image_name'] = test_id

In [26]:
df = df[['image_name','Type_1','Type_2','Type_3']]

In [27]:
df

Unnamed: 0,image_name,Type_1,Type_2,Type_3
0,477.jpg,9.286057e-02,0.907096,4.304782e-05
1,178.jpg,1.437187e-04,0.000131,9.997252e-01
2,335.jpg,6.101811e-06,0.000045,9.999491e-01
3,324.jpg,2.558109e-02,0.974397,2.195847e-05
4,98.jpg,5.380234e-04,0.999458,3.566464e-06
5,184.jpg,3.472408e-08,0.000001,9.999986e-01
6,330.jpg,9.424442e-01,0.046124,1.143161e-02
7,170.jpg,8.147195e-05,0.997341,2.577674e-03
8,163.jpg,1.743213e-03,0.987509,1.074773e-02
9,344.jpg,9.993266e-01,0.000673,4.024411e-07


In [28]:
df.to_csv('/home/Kaggle_Cervical_Cancer_Screening/submission/submission_LB_vgg16_3.csv', index=False)