### Load the data

In [2]:
import numpy as np
from numpy import expand_dims
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import ImageDataGenerator
from matplotlib import pyplot
import pandas as pd
import os
import imgaug as ia
import imgaug.augmenters as iaa
from PIL import Image

In [3]:
path = r"D:\train_images"
df = pd.read_csv('train.csv', delimiter=',', header=None, skiprows=1, names=['ID','image id','text','label', 'label_num'])
display(df.head())

Unnamed: 0,ID,image id,text,label,label_num
0,1,image_2455.jpg,- It is not our fight - Are we not part of thi...,troll,2
1,2,image_3701.jpg,THAT'S THE DIFFERENCE BETWEEN YOU AND ME YOU...,none,0
2,3,image_4166.png,- WHAT DO THE TITANIC AND THE SIXTH SENSE HAVE...,none,0
3,4,448_image_batch_2.png,"""COME ON MAN, YOU KNOW THE THING.\r\nJUST ASK ...",troll,2
4,5,653_image_batch_2.png,"""Those who believe without reason cannot be co...",none,0


In [4]:
shuffle_df = pd.DataFrame(np.random.permutation(df))
shuffle_df.columns = ['id','image_id','text','label','class']
shuffle_df = shuffle_df[['image_id','text','class']]

In [5]:
M = int(0.85*df.shape[0])
y_train = shuffle_df['class'][0:M]
y_test = shuffle_df['class'][M:]

In [6]:
def make_dataset(df):
    M = int(0.85*df.shape[0])
    x_train = np.zeros(shape=(M,224,224,3))
    for i in range(M):
        img_path = df['image_id'][i]
        img_class = df['class'][i]
        s = path + os.sep + img_path
        image = Image.open(s)
        image = image.resize((224, 224))
        image = np.asarray(image)
        if len(image.shape) < 3 :
            image = np.stack((image,)*3, axis=-1)
        image = image[...,:3]
        image = image.reshape((1,) + image.shape)
        x_train[i] = image
        if i%100 == 0:
            print(i)
    y_train = df['class'][0:M]
    
    x_test = np.zeros(shape=(len(df)-M,224,224,3))
    
    for i in range(len(df)-M):
        img_path = df['image_id'][i+M]
        img_class = df['class'][i+M]
        s = path + os.sep + img_path
        image = Image.open(s)
        image = image.resize((224, 224))
        image = np.asarray(image)
        if len(image.shape) < 3 :
            image = np.stack((image,)*3, axis=-1)
        image = image[...,:3]
        image = image.reshape((1,) + image.shape)
        x_test[i] = image
        if i%100 == 0:
            print(i)
    y_test = df['class'][M:]
    return x_train, y_train, x_test, y_test

In [7]:
x_train, y_train, x_test, y_test = make_dataset(shuffle_df)

0
100




200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
0
100
200


In [8]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(1692, 224, 224, 3)
(1692,)
(299, 224, 224, 3)
(299,)


In [9]:
#with open('x_train.npy', 'wb') as f:
#    np.save(f, x_train)
#with open('x_test.npy', 'wb') as f:
#    np.save(f, x_test)

In [10]:
x_train = np.asarray(x_train).astype('float32')
x_test = np.asarray(x_test).astype('float32')

In [11]:
from keras.utils import to_categorical

y_train=to_categorical(y_train)
y_test=to_categorical(y_test)
y_train = np.asarray(y_train).astype(np.int)
y_test = np.asarray(y_test).astype(np.int)

### Running the model with data augmentation

In [13]:
from keras.models import Model,Sequential
from keras.layers import Input,BatchNormalization, Conv2D, SeparableConv2D, MaxPooling2D, GlobalAveragePooling2D, Activation, Flatten, Dropout, Dense, Concatenate, Add, UpSampling2D, LeakyReLU
from keras.preprocessing.image import ImageDataGenerator
import keras
from keras.layers import Dense, Conv2D, MaxPool2D , Flatten
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.optimizers import Adam

In [14]:
aug = ImageDataGenerator(
		rotation_range=12.5,
		zoom_range=0.13,
		width_shift_range=0.12,
		height_shift_range=0.12,
		horizontal_flip=True,
    vertical_flip = True,
		fill_mode="nearest")

In [15]:
model = Sequential()
model.add(Conv2D(input_shape=(224,224,3),filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

In [16]:
model.add(Flatten())
model.add(Dense(units=4096,activation="relu"))
model.add(Dense(units=4096,activation="relu"))
model.add(Dense(units=3, activation="softmax"))

In [17]:
opt = Adam(lr=0.001)
model.compile(optimizer=opt, loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])

In [18]:
checkpoint = ModelCheckpoint("vgg16_1.h5", monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', save_freq=1)
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=20, verbose=1, mode='auto')


In [19]:
hist = model.fit_generator(steps_per_epoch = 2, generator = aug.flow(x_train, y_train, batch_size=16),validation_data= (x_test, y_test), validation_steps=8,epochs=32, callbacks=[checkpoint,early])



Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


### Predicting for the test data

In [40]:
test_path = r"D:\test_images"
test_df = pd.read_csv('test.csv', delimiter=',', header=None, skiprows=1, names=['ID','image id','text','label', 'label_num'])
test_df.columns = ['id','image_id','text','label','class']
#test_df = test_df[['image_id','text','class']]

In [51]:
def make_test_dataset(df):
    x_test = np.zeros(shape=(len(df),224,224,3))
    for i in range(600):
        img_path = df['image_id'][i]
        img_class = df['class'][i]
        s = test_path + os.sep + img_path
        image = Image.open(s)
        image = image.resize((224, 224))
        image = np.asarray(image)
        if len(image.shape) < 3 :
            image = np.stack((image,)*3, axis=-1)
        image = image[...,:3]
        image = image.reshape((1,) + image.shape)
        x_train[i] = image
        if i%100 == 0:
            print(i)
    return x_test

In [52]:
test_data = make_test_dataset(test_df)

0
100
200
300
400
500


In [53]:
probabilities = model.predict(test_data)

KeyboardInterrupt: 

In [None]:
print(probabilities)

In [None]:
final_df = pd.DataFrame(columns = ['ID', 'label_num'])
for i in range(len(test_data)):
    img_id = test_df['id'][i]
    prediction = (np.where(probabilities[i] == np.amax(probabilities[i])))[0]
    final_df = final_df.append({'ID' : img_id, 'label_num' : prediction[0]}, 
                ignore_index = True)

In [None]:
display(final_df)