## Vgg16

### 資料沒有做 Segmentation

In [1]:
import keras
from keras.models import Sequential
from PIL import Image
import cv2
import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


### 處理資料

In [2]:
from IPython.display import display
def show(X):
    display(Image.fromarray(X))
    
def rotation(image, angle):
    assert image.shape[-1] == 3
    num_rows, num_cols = image.shape[:2]
    rotation_matrix = cv2.getRotationMatrix2D((num_cols/2, num_rows/2), angle, 1)
    img_rotation = cv2.warpAffine(image, rotation_matrix, (num_cols, num_rows))
    return img_rotation

### 增加資料旋轉 + 鏡射

In [3]:
train_data = np.load('/home/Kaggle_Cervical_Cancer_Screening/Rory/Dev-ipynb/resize_data/ALL_TRAIN_NOSEG_64.npy')
test_data = np.load('/home/Kaggle_Cervical_Cancer_Screening/Rory/Dev-ipynb/resize_data/ALL_TEST_NOSEG_64.npy')
train = pd.read_csv('/home/Kaggle_Cervical_Cancer_Screening/Rory/Dev-ipynb/resize_data/TRAIN.csv')
test = pd.read_csv('/home/Kaggle_Cervical_Cancer_Screening/Rory/Dev-ipynb/resize_data/TEST.csv')
test_id = test.image.values

In [4]:
train_data_ud = train_data[:,::-1,:,:]
train_data_rl = train_data[:,:,::-1,:]

rot_num = 4
rot_imgs = defaultdict(list)
for i in range(train_data.shape[0]):
    for rot in range(int(360/rot_num),360,int(360/rot_num)):
        rot_img = rotation(train_data[i,:,:,:], rot)
        rot_imgs[rot].append(rot_img)
        
le = LabelEncoder()
train_target = le.fit_transform(train['type'].values)

train_X = np.concatenate([train_data, train_data_ud, train_data_rl,\
                          np.array(rot_imgs[90]), np.array(rot_imgs[180]),\
                          np.array(rot_imgs[270])])

train_Y = np.concatenate([train_target for i in range(6)])

random_index = np.random.choice(range(train_X.shape[0]), train_X.shape[0], replace = False)

train_X = train_X[random_index]
train_Y = train_Y[random_index]

x_train, x_val_train, y_train, y_val_train =\
train_test_split(train_X, train_Y, test_size = 0.2, random_state=17)

num_classes = 3
y_train = keras.utils.to_categorical(y_train, num_classes)
y_val_train = keras.utils.to_categorical(y_val_train, num_classes)

In [5]:
x_train = x_train.astype('float32')
x_val_train = x_val_train.astype('float32')

x_train /= 255
x_val_train /= 255

x_test = test_data.astype('float32')
x_test /= 255

# Vgg16

In [6]:
import keras.backend as K
K.set_image_data_format('channels_last')

In [7]:
base_model = keras.applications.vgg16.VGG16(weights = 'imagenet',  include_top = False, input_shape=(64,64,3))

In [8]:
base_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 64, 64, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 64, 64, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 64, 64, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 32, 32, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 32, 32, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 32, 32, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 16, 16, 128)       0         
__________

### 微調最後幾層

In [9]:
from keras.layers import GlobalAveragePooling2D, Dense
from keras.models import Model
from keras.optimizers import Adam

_ = base_model.get_layer("block5_conv3").output
_ = GlobalAveragePooling2D()(_)
_ = Dense(512, activation='relu')(_)

# 3 個輸出
predictions = Dense(3, activation='softmax')(_)
model = Model(inputs=base_model.input, outputs=predictions)

for n in ["block5_conv1", "block5_conv2", "block5_conv3"]:
    model.get_layer(n).trainable = True
    
model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=["accuracy"])

In [10]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 64, 64, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 64, 64, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 64, 64, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 32, 32, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 32, 32, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 32, 32, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 16, 16, 128)       0         
__________

### Vgg16 容易 overfitting, 所以 epochs 只設成 6

In [11]:
model.fit(x_train, y_train, validation_data=(x_val_train, y_val_train), epochs = 6, batch_size = 128)

Train on 39417 samples, validate on 9855 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x7f7b50b58320>

### 最後幾層有過度配飾的情況，val_loss增加

In [14]:
pred = model.predict(x_test)

In [15]:
df = pd.DataFrame(pred, columns=['Type_1','Type_2','Type_3'])
df['image_name'] = test_id
df = df[['image_name','Type_1','Type_2','Type_3']]

In [16]:
df.head()

Unnamed: 0,image_name,Type_1,Type_2,Type_3
0,477.jpg,0.95306,0.046637,0.000303
1,178.jpg,2.4e-05,0.00054,0.999436
2,335.jpg,8.7e-05,0.003787,0.996126
3,324.jpg,0.001374,0.99773,0.000896
4,98.jpg,7e-05,0.999923,7e-06


In [17]:
sample_submission =\
pd.read_csv('/home/Kaggle_Cervical_Cancer_Screening/submission/sample_submission.csv')

In [18]:
for name in sample_submission['image_name']:
    #print(name)
    #sub[name] = df[df['image_name'] == name][['Type_1','Type_2','Type_3']].values.tolist()[0]
    #print(name)
    Type_1 = df.loc[df['image_name'] == name, 'Type_1']
    Type_2 = df.loc[df['image_name'] == name, 'Type_2']
    Type_3 = df.loc[df['image_name'] == name, 'Type_3']
    
    Type_1_index = Type_1.index[0]
    Type_2_index = Type_2.index[0]
    Type_3_index = Type_3.index[0]
    
    sample_submission.loc[sample_submission['image_name'] == name, 'Type_1'] = Type_1[Type_1_index]
    
    sample_submission.loc[sample_submission['image_name'] == name, 'Type_2'] = Type_2[Type_2_index]
    
    sample_submission.loc[sample_submission['image_name'] == name, 'Type_3'] = Type_3[Type_3_index]

In [19]:
sample_submission.head(10)

Unnamed: 0,image_name,Type_1,Type_2,Type_3
0,0.jpg,0.001208,0.925197,0.073595
1,1.jpg,0.006894,0.191041,0.802065
2,2.jpg,0.94999,0.042796,0.007214
3,3.jpg,0.001868,0.978593,0.019539
4,4.jpg,0.000173,0.021611,0.978216
5,5.jpg,0.000189,0.999741,7e-05
6,6.jpg,0.000128,0.999379,0.000493
7,7.jpg,0.001082,0.942255,0.056663
8,8.jpg,0.000749,0.999154,9.6e-05
9,9.jpg,0.000523,0.998609,0.000868


In [20]:
True_outcome = pd.read_csv('/home/dataset/solution_stg1_release.csv')

In [21]:
True_outcome.head(10)

Unnamed: 0,image_name,Type_1,Type_2,Type_3
0,0.jpg,0,1,0
1,1.jpg,0,1,0
2,2.jpg,1,0,0
3,3.jpg,0,0,1
4,4.jpg,0,0,1
5,5.jpg,0,1,0
6,6.jpg,0,1,0
7,7.jpg,0,1,0
8,8.jpg,0,1,0
9,9.jpg,0,1,0
