In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import os, shutil
import zipfile

In [None]:
labels_csv = pd.read_csv('train.csv')

In [None]:
labels_csv

In [None]:
df = labels_csv['Target']

In [None]:
df = df.str.split(' ')

In [None]:
labels = []
for i in df:
    y = np.zeros(28)
    for j in i:
        y[int(j)] = 1
    labels.append(y)

In [None]:
labels_csv = pd.concat([labels_csv['Id'], pd.DataFrame(labels)], axis=1)

In [None]:
labels_csv

In [None]:
shutil.rmtree('train')

In [None]:
with zipfile.ZipFile('train.zip', 'r') as zip_ref:
    zip_ref.extractall('train')

In [None]:
os.chdir('train')

In [None]:
os.getcwd()

In [None]:
colors = ['red', 'green', 'blue', 'yellow']

In [None]:
for i in labels_csv['Id']:
    os.mkdir(i)
    for j in colors:
        img = str(i) + '_' + str(j) +'.png'
        path = i
        shutil.move(img, path)

In [None]:
os.chdir('..')

In [None]:
x = labels_csv['Id']

In [None]:
y = pd.DataFrame(labels_csv.iloc[:, 1:])

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size = 0.125)

In [None]:
os.chdir('train')

In [None]:
os.mkdir('train')
os.mkdir('valid')

In [None]:
x_train

In [None]:
y_train

In [None]:
x_valid

In [None]:
y_valid

In [None]:
for i in x_train:
    if os.path.isdir(i):
        try:
            shutil.move(i, 'train')
        except:
            pass

for i in x_valid:
    if os.path.isdir(i):
        try:
            shutil.move(i, 'valid')
        except:
            pass

In [None]:
os.chdir('..')

In [None]:
os.chdir('train')

In [None]:
os.getcwd()

In [None]:
input_size = 352
img_group = 4

In [None]:
import keras

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255)

valid_datagen = ImageDataGenerator(
    rescale=1./255)

test_datagen = ImageDataGenerator(
    rescale=1./255)

In [None]:
train_generator = train_datagen.flow_from_directory(
    directory="train/",
    target_size=(input_size, input_size),
    color_mode="grayscale",
    batch_size=img_group,
    class_mode="categorical",
    shuffle=False,
    seed=42
)

In [None]:
train_labels = list(train_generator.class_indices)

In [None]:
valid_generator = valid_datagen.flow_from_directory(
    directory="valid/",
    target_size=(input_size, input_size),
    color_mode="grayscale",
    batch_size=img_group,
    class_mode="categorical",
    shuffle=False,
    seed=42
)

In [None]:
valid_labels = list(valid_generator.class_indices)

In [None]:
os.getcwd()

In [None]:
from keras.layers import Conv2D, LeakyReLU, Concatenate, ReLU, Input, Dense, Dropout, BatchNormalization
from keras.layers import GlobalAveragePooling2D, Flatten, MaxPooling2D, Activation, ZeroPadding2D
from keras.optimizers import Adam, SGD
from keras.models import Model
from keras import metrics
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.inception_v3 import InceptionV3
from keras import backend as K
import tensorflow as tf

In [None]:
model_base = InceptionV3(include_top = False, weights='imagenet', input_shape=(input_size, input_size, 3))

In [None]:
model_base.summary()

In [None]:
from imgaug import augmenters as iaa
def augment(image):
    augment_img = iaa.Sequential([
        iaa.OneOf([
            iaa.Fliplr(0.5), iaa.Crop(percent=(0, 0.1)),
            iaa.Affine(scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
                       rotate=(-180, 180), shear=(-2, 2))])], random_order=True)

    image_aug = augment_img.augment_image(image)
    return image_aug

In [None]:
def ProteinDataGenerator(generator, label, batch_size, augment_tf=None):

    index = 0
    while True:
        batch = []
        for i in range(batch_size):
            image = []
            temp = generator.next()[0].reshape(4, input_size, input_size)

            image.append(temp[2])
            image.append(temp[1])
            image.append(temp[0])

            image = np.dstack(image)
            
            if(augment_tf):
                image = augment(image)
                        
            batch.append(image)
            
        batch = np.asarray(batch, dtype=np.float16)
        
        if len(label) - index > batch_size:
            y = label[index : index + batch_size]
            y = y.iloc[:, 1:]
            index += batch_size
        elif len(label) - index < batch_size:
            y = pd.concat([label[index : len(label)], label[0 : batch_size - (len(label) - index)]])
            y = y.iloc[:, 1:]
            index = batch_size - (len(label) - index)
        else:
            y = label[index : index + batch_size]
            y = y.iloc[:, 1:]
            index = 0
        
        yield batch, y

In [None]:
def f1(y_true, y_pred):
    
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

In [None]:
train_label = pd.DataFrame([])
for k in tqdm(train_labels):
    train_label = train_label.append(labels_csv[labels_csv['Id'] == k])

In [None]:
valid_label = pd.DataFrame([])
for k in tqdm(valid_labels):
    valid_label = valid_label.append(labels_csv[labels_csv['Id'] == k])

In [None]:
train_label

In [None]:
valid_label

In [None]:
drop = 0.3
batch_size = 16

In [None]:
for layer in model_base.layers:
    layer.trainable = True

In [None]:
init = Input((input_size, input_size, 3))

x = BatchNormalization()(init)

x = model_base(x)

x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(3, 3))(x)

x = Flatten()(x)

x = Dropout(drop)(x)

x = BatchNormalization()(x)
x = Dense(1024)(x)
x = LeakyReLU(0.1)(x)

x = Dropout(drop)(x)

x = BatchNormalization()(x)
x = Dense(512)(x)
x = LeakyReLU(0.1)(x)

x = Dropout(drop)(x)

x = Dense(28)(x)
x = Activation('sigmoid')(x)

model = Model(init, x)

In [None]:
os.chdir('..')
model.load_weights("InceptionV3_352x352_RGB_400.h5")
print("Loaded model from disk")
os.chdir('train')

In [None]:
model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0001), metrics=[f1, 'acc', 'categorical_accuracy'])

In [None]:
model.summary()

In [None]:
model.fit_generator(ProteinDataGenerator(train_generator, train_label, batch_size, augment_tf=True),
                    validation_data=ProteinDataGenerator(valid_generator, valid_label, batch_size, augment_tf=False),
                    validation_steps=60, steps_per_epoch=85, epochs=100)

In [None]:
os.chdir('..')

In [None]:
model.save_weights("InceptionV3_352x352_RGB_500.h5")
print("Saved model to disk")

In [None]:
os.getcwd()

In [None]:
#shutil.rmtree('test')

In [None]:
#with zipfile.ZipFile('test.zip', 'r') as zip_ref:
#    zip_ref.extractall('test')

In [None]:
submit = pd.read_csv('sample_submission.csv')

In [None]:
#os.chdir('test')
#colors = ['red', 'green', 'blue', 'yellow']
#for i in submit['Id']:
#    os.mkdir(i)
#    for j in colors:
#        img = str(i) + '_' + str(j) +'.png'
#        path = i
#        shutil.move(img, path)
#os.chdir('..')

In [None]:
test_generator = test_datagen.flow_from_directory(
    directory="test/",
    target_size=(input_size, input_size),
    color_mode="grayscale",
    batch_size=img_group,
    class_mode="categorical",
    shuffle=False,
    seed=42
)

In [None]:
prediction = []

In [None]:
def TestProteinGenerator(generator, batch_size):
    batch = []
    for i in range(batch_size):
        image = []
        temp = generator.next()[0].reshape(4, input_size, input_size)

        image.append(2*temp[2]/3 + temp[3]/3)
        image.append(temp[1])
        image.append(2*temp[0]/3 + temp[3]/3)

        image = np.dstack(image)

        batch.append(image)

    batch = np.asarray(batch)

    return batch

In [None]:
for i in tqdm(submit['Id']):
    x_test_temp = TestProteinGenerator(test_generator, 1)
    prediction.append(model.predict(x_test_temp))

In [None]:
pred = np.vstack(prediction)

In [None]:
prediction = []

In [None]:
for i in range(0, 11702):
    label_predict = np.arange(28)[pred[i]>=0.2]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    prediction.append(str_predict_label)

In [None]:
submit['Predicted'] = prediction

In [None]:
submit.to_csv('submit_InceptionV3.csv', index=False)