In [1]:
import numpy as np 
import pandas as pd
import os, zipfile

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image

from keras.preprocessing.image import load_img, img_to_array
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


<h1>Preprocessing</h1>

In [2]:
# read in data
train_df = pd.read_csv('./train_onelabel.csv')
train_images_paths = ['./train_images/' + p for p in train_df.image]
train_labels = train_df['class'].values

In [3]:
# create image matrix
train_images = (load_img(p, target_size=(64, 64)) for p in train_images_paths)
train_images = [img_to_array(img)[:, :, [0]] for img in train_images]
train_images = np.array(train_images)
train_images.shape

(24204, 64, 64, 1)

In [4]:
# preprocessing of labels
classes = len(np.unique(train_labels))
train_labels = to_categorical(train_labels, classes)

In [5]:
# split data into test and train set
x_train, x_val, y_train,  y_val = train_test_split(train_images, train_labels, test_size=0.2)

In [6]:
# normalize image data

x_train = x_train.astype('float32')
x_val = x_val.astype('float32')
x_train /= 255
x_val /= 255

<h1>Model</h1>

In [9]:
# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout

In [18]:
# Initialising the CNN
classifier = Sequential()

In [19]:
# Step 1 - Convolution ---TODO: see input specification
classifier.add(Conv2D(32, (3, 3), input_shape = (64, 64, 1),padding='same', activation = 'relu'))

In [20]:
# Step 2 - Pooling
classifier.add(MaxPooling2D(pool_size = (2, 2)))

In [21]:
# Adding a second convolutional layer+pooling layer
classifier.add(Conv2D(32, (3, 3), activation = 'relu', padding='same'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))

In [22]:
# Step 3 - Flattening
classifier.add(Flatten())

In [23]:
#Step 4 - Create a fully connected neural network
'''its good practise to pick a number of power of two-experimenting output_dim
128 hidden nodes in hidden layers-by experimenting'''
 
#output_dim = number of nodes in the hidden layer
classifier.add(Dense(units= 128, activation = 'relu'))

#Dropout (p was used in the old API, changed it to the new standard)
classifier.add(Dropout(rate = 0.2))

#second hidden layer
classifier.add(Dense(units = 128, activation = 'relu'))


#output layer
classifier.add(Dense(units = 121, activation = 'softmax')) #We have 121 categories of plankton!!

In [24]:
# Compiling the CNN
classifier.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [25]:
# fit model on training data
classifier.fit(x_train, y_train, batch_size=80, nb_epoch=25, verbose=1)



Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x21b28903a20>

In [26]:
score = classifier.evaluate(x_val, y_val, verbose=0)

In [27]:
score

[1.6934762700139381, 0.53232803144772123]

In [17]:
score

[1.6067611749840531, 0.55009295603776365]

<h1>Save Model</h1>

In [18]:
from keras.models import model_from_json

# serialize model to JSON
best_model_classification_json = classifier.to_json()
with open("best_model_4.json", "w") as json_file:
    json_file.write(best_model_classification_json)
# serialize weights to HDF5
classifier.save_weights("model_best.h5")
print("Saved model to disk")

Saved model to disk


<h1>Load Model</h1>

In [12]:
from keras.models import model_from_json

# load json and create model
json_file = open('best_model_3.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
classifier = model_from_json(loaded_model_json)
# load weights into new model
classifier.load_weights("model_best.h5")
print("Loaded model from disk")

Loaded model from disk


<h1>Prediction - First Approach</h1>

In [28]:
# read in data
test_df = pd.read_csv('./sample.csv')
test_images_paths = ['./test_images/' + p for p in test_df['image']]
test_images_names = [p for p in test_df['image']]

In [29]:
# create image matrix
test_images = (load_img(p, target_size=(64, 64)) for p in test_images_paths)
test_images = [img_to_array(img)[:, :, [0]] for img in test_images]
test_images = np.array(test_images)
test_images.shape

(6132, 64, 64, 1)

In [30]:
# normalize image data
test_images = test_images.astype('float32')
test_images /= 255

In [31]:
result = classifier.predict(test_images)

In [32]:
labels = []

for count, i in enumerate(result):
    labels.append(np.argmax(result[count]))

In [33]:
from collections import OrderedDict

#Create Final csv for the competition
csv_list =  pd.DataFrame( OrderedDict( { 'image': pd.Series(test_images_names), 'class': pd.Series(labels) } ) )
csv_list.to_csv('sample3.csv',index=False)

In [34]:
results1 = pd.read_csv('./sample1.csv')

In [35]:
results1

Unnamed: 0,image,class
0,90715.jpg,17
1,159631.jpg,101
2,4294.jpg,34
3,56548.jpg,61
4,120979.jpg,40
5,139460.jpg,24
6,78510.jpg,89
7,102841.jpg,34
8,84208.jpg,17
9,9591.jpg,53


<h1>Prediction - Second Approach </h1>

In [59]:
import os
import numpy as np
from keras.preprocessing import image
from collections import OrderedDict

def find_category(path):
    
    test_image = image.load_img(path, target_size = (32, 32),grayscale = True)
    test_image = image.img_to_array(test_image)
  
    test_image = np.expand_dims(test_image, axis = 0)
    result = classifier.predict(test_image)
    
    #training_set.class_indices
    
    
    category = int(np.where(result[0]!=0)[0][0])
    return category
    

images = []
categories = []
path = 'test_images/'

for count,i in enumerate(os.listdir(path)):
    
    images.append(str(i)) #save image name
    
    image_path = path+str(i)

    category = find_category(image_path)
    
    categories.append(category) #saves the category of the image

In [60]:
images = []
categories = []
path = 'test_images/'

for count,i in enumerate(os.listdir(path)):
    
    images.append(str(i)) #save image name
    
    image_path = path+str(i)

    category = find_category(image_path)
    
    categories.append(category) #saves the category of the image

In [61]:
#Create Final csv for the competition
csv_list =  pd.DataFrame( OrderedDict( { 'image': pd.Series(images), 'class': pd.Series(categories) } ) )
csv_list.to_csv('sample2.csv',index=False)

In [65]:
results2 = pd.read_csv('./sample2.csv')

In [72]:
comparison = results1.merge(results2, on='image')

In [81]:
print('Number of different results: ' + str(len(comparison[comparison['class_x'] != comparison['class_y']])))ka

Number of different results: 5738
