In [12]:
import os
import cv2
import glob
from glob import glob
import numpy as np
import pandas as pd
import keras
import tensorflow as tf
import matplotlib.pyplot
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from multiprocessing import Pool, cpu_count
from PIL import ImageFilter, ImageStat, Image, ImageDraw
from IPython.display import display
from IPython.display import SVG, display
from collections import defaultdict

In [3]:
basepath = '/home/dataset/train/'

all_cervix_images = []

for path in sorted(glob(basepath + "*")):
    #print(path)
    cervix_type = path.split("/")[-1]
    #print(cervix_type)
    cervix_images = sorted(glob(basepath + cervix_type + "/*"))
    #print(cervix_images)
    all_cervix_images = all_cervix_images + cervix_images

all_cervix_images = pd.DataFrame({'imagepath': all_cervix_images})
all_cervix_images['filetype'] = all_cervix_images.apply(lambda row: row.imagepath.split(".")[-1], axis=1)
all_cervix_images['type'] = all_cervix_images.apply(lambda row: row.imagepath.split("/")[-2], axis=1)
all_cervix_images.head()

Unnamed: 0,imagepath,filetype,type
0,/home/dataset/train/Type_1/0.jpg,jpg,Type_1
1,/home/dataset/train/Type_1/10.jpg,jpg,Type_1
2,/home/dataset/train/Type_1/1013.jpg,jpg,Type_1
3,/home/dataset/train/Type_1/1014.jpg,jpg,Type_1
4,/home/dataset/train/Type_1/1019.jpg,jpg,Type_1


In [14]:
def im_multi(path):
    try:
        #print(path)
        im_stats_im_ = cv2.imread(path)
        #print(im_stats_im_.size)
        return [path, {'size': im_stats_im_.size}]
    except:
        print("Error path:%s"%(path))
        return [path, {'size': [0,0]}]
    
def im_stats(im_stats_df):
    im_stats_d = {}
    p = Pool(cpu_count())
    ret = p.map(im_multi, im_stats_df['path'])
    for i in range(len(ret)):
        #print(ret[i][0])
        im_stats_d[ret[i][0]] = ret[i][1]
    im_stats_df['size'] = im_stats_df['path'].map(lambda x: ' '.join([str(s) for s in str(im_stats_d[x]['size'])]))
    return im_stats_df

def get_im_cv2(args):
    img = cv2.imread(args[0])
    resized = cv2.resize(img, (args[1], args[1]), cv2.INTER_LINEAR)
    return [args[0], resized]

def image_features(paths, dim):
    imf_d = {}
    p = Pool(cpu_count())
    dims = [dim for i in range(len(paths.tolist()))]
    params = list(zip(paths.tolist(), dims))
    ret = p.map(get_im_cv2, params)
    for i in range(len(ret)):
        imf_d[ret[i][0]] = ret[i][1]
    ret = []
    fdata = [imf_d[f] for f in paths]
    fdata = np.array(fdata, dtype = np.uint8)
    #fdata = fdata.transpose((0, 3, 1, 2)) # channels 放到前面
    #fdata = fdata.astype('float32') 
    #fdata = fdata / 255
    return fdata

def rotation(image, angle):
    assert image.shape[-1] == 3
    num_rows, num_cols = image.shape[:2]
    rotation_matrix = cv2.getRotationMatrix2D((num_cols/2, num_rows/2), angle, 1)
    img_rotation = cv2.warpAffine(image, rotation_matrix, (num_cols, num_rows))
    return img_rotation


In [6]:
import glob
train = glob.glob('/home/dataset/train/**/*.jpg')# + glob.glob('/data/kaggle/additional/**/*.jpg')
train =\
pd.DataFrame([[p.split('/')[4],p.split('/')[5],p] for p in train], columns = ['type','image','path'])

test = glob.glob('/home/dataset/test/*.jpg')
test = pd.DataFrame([[p.split('/')[4],p] for p in test], columns = ['image','path'])

test_id = test.image.values

In [7]:
train_ = im_stats(train)
test_ = im_stats(test)

In [35]:
dim = 32
train_1 = train_[train_['size'] != '0 0'].reset_index(drop=True) #remove bad images
test_1 = test_[test_['size'] != '0 0'].reset_index(drop=True)    #remove bad images

train_data = image_features(train_1['path'], dim)
test_data = image_features(test_1['path'], dim)

train_data = train_data[:,:,:,::-1]
test_data = test_data[:,:,:,::-1]

le = LabelEncoder()
train_target = le.fit_transform(train['type'].values)

In [36]:
rot_num = 4
rot_imgs = defaultdict(list)
for i in range(train_data.shape[0]):
    #print(i)
    #showX(train_data[i,:,:,:], dim)
    for rot in range(int(360/rot_num),360,int(360/rot_num)):
        #print(rot)
        rot_img = rotation(train_data[i,:,:,:], rot)
        rot_imgs[rot].append(rot_img)
        #showX(rot_img, dim)
    #break

In [37]:
train_Y = np.concatenate([train_target for i in range(4)])

In [38]:
train_X = np.concatenate([train_data,np.array(rot_imgs[90]),np.array(rot_imgs[180]),np.array(rot_imgs[270])])

In [39]:
train_data.shape, train_X.shape, train_Y.shape

((1481, 32, 32, 3), (5924, 32, 32, 3), (5924,))

In [40]:
from keras.layers import Dense, Activation, Conv2D, MaxPool2D, Reshape
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten, Activation
from keras.layers.convolutional import Convolution2D, ZeroPadding2D, MaxPooling2D
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
from sklearn.model_selection import train_test_split

batch_size = 32
num_classes = 3
epochs = 10
data_augmentation = True

In [41]:
x_train, x_val_train, y_train, y_val_train =\
train_test_split(train_X, train_Y, test_size=0.1, random_state=17)

y_train = keras.utils.to_categorical(y_train, num_classes)
y_val_train = keras.utils.to_categorical(y_val_train, num_classes)

#train_data = train_data.reshape(-1,3,32*32).shape

x_train.shape,x_val_train.shape,y_train.shape, y_val_train.shape

((5331, 32, 32, 3), (593, 32, 32, 3), (5331, 3), (593, 3))

In [42]:
model = Sequential()
model.add(Reshape((32,32,3), input_shape=x_train.shape[1:]))
model.add(Conv2D(32, (3, 3), padding='same',data_format = "channels_last"))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2, 2), data_format = "channels_last"))

model.add(Conv2D(64, (3, 3), padding='same',  data_format = "channels_last"))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2, 2), data_format = "channels_last"))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

In [43]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_1 (Reshape)          (None, 32, 32, 3)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 16, 16, 64)        18496     
_________________________________________________________________
activation_2 (Activation)    (None, 16, 16, 64)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 8, 8, 64)          0         
__________

In [44]:
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

x_train = x_train.astype('float32')
x_val_train = x_val_train.astype('float32')

x_train /= 255
x_val_train /= 255

x_test = test_data.astype('float32')
x_test /= 255

In [45]:
if not data_augmentation:
    print('Not using data augmentation.')
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False,              # set input mean to 0 over the dataset
        samplewise_center=False,               # set each sample mean to 0
        featurewise_std_normalization=False,   # divide inputs by std of the dataset
        samplewise_std_normalization=False,    # divide each input by its std
        zca_whitening=False,                   # apply ZCA whitening
        rotation_range = 180,                  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,                 # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,                # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,                  # randomly flip images
        vertical_flip=False,
        data_format = "channels_last")         # randomly flip images

    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    # Fit the model on the batches generated by datagen.flow().
    model.fit_generator(datagen.flow(x_train, y_train,
                                     batch_size = batch_size),
                        steps_per_epoch = x_train.shape[0],# // batch_size,
                        epochs = epochs,
                        validation_data=(x_val_train, y_val_train))

Using real-time data augmentation.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [46]:
pred = model.predict_proba(x_test)

 32/512 [>.............................] - ETA: 0s

In [47]:
df = pd.DataFrame(pred, columns=['Type_1','Type_2','Type_3'])

In [48]:
df['image_name'] = test_id

NameError: name 'test_id' is not defined

In [None]:
df = df[['image_name','Type_1','Type_2','Type_3']]

In [None]:
df.to_csv('submission_rot90.csv', index=False)