In [1]:
from sklearn.datasets import load_files
from keras.utils import np_utils
import numpy as np
import pandas as pd
from glob import glob

Using TensorFlow backend.


In [2]:
#function to load the dataset
def load_dataset(path):
    data = load_files(path)
    fish_files = np.array(data['filenames'])
    fish_target = np_utils.to_categorical(np.array(data['target']), 8)
    return fish_files,fish_target

In [3]:
#let's load the training-data
train_files, train_targets = load_dataset('data/train')

#let's load the teting-data
test_files, _ = load_dataset('data/test')

#print the number of samples in test and trainin sets
print ("There are %d images in training dataset"%len(train_files))
print ("There are %d images in the training set"%len(test_files))

There are 3777 images in training dataset
There are 13153 images in the training set


In [4]:
from keras.preprocessing import image
from tqdm import tqdm

#converting image to tensor
def path_to_tensor(img_path):
    # loads RGB image
    img = image.load_img(img_path, target_size=(224,224))
    #convering the image to 3-D tensor with shape (224,224,3)
    x = image.img_to_array(img)
    #convert 3D tensor to 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

In [6]:
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True 

#preprocessing the data
test_tensors = paths_to_tensor(test_files).astype('float32')/255

100%|███████████████████████████████████████████████████████████████████████████| 13153/13153 [02:09<00:00, 101.20it/s]


In [7]:
train_tensors = paths_to_tensor(train_files).astype('float32')/255

100%|██████████████████████████████████████████████████████████████████████████████| 3777/3777 [01:37<00:00, 42.39it/s]


In [8]:
#shape of the tensor
print(np.shape(train_tensors))

(3777, 224, 224, 3)


# Model-5, Refining Model-4

In [9]:
from keras.layers import Dense, Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dropout
from keras.models import Sequential

model5 = Sequential()

#Model architecture.
#Convolution layer
model5.add(Conv2D(filters=32,kernel_size=2,strides=1,activation='relu',input_shape=(224,224,3)))
# Max Pooling layer to reduce the dimensionality
model5.add(MaxPooling2D(pool_size=2,strides=2))
#Dropout layer, for turning off each node with the probability of 0.5
model5.add(Dropout(0.5))
model5.add(Conv2D(filters=64, kernel_size=2,strides=1,activation='relu'))
model5.add(MaxPooling2D(pool_size=2,strides=2))
#Dropout layer, for turning off each node with the probability of 0.4
model5.add(Dropout(0.4))
model5.add(Conv2D(filters=128,kernel_size=2,strides=1,activation='relu'))
model5.add(MaxPooling2D(pool_size=2,strides=2))
#Dropout layer, for turning off each node with the probability of 0.2
model5.add(Dropout(0.2))
#Global Average Pooling layer for object localization
model5.add(GlobalAveragePooling2D())
#A fully connected dense layer with 8 nodes (no of classes of fish)
model5.add(Dense(8,activation='softmax'))
#printing the summary of the architecture
model5.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 223, 223, 32)      416       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 111, 111, 32)      0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 111, 111, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 110, 110, 64)      8256      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 55, 55, 64)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 55, 55, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 54, 54, 128)       32896     
__________

### Compiling the Model-5

In [10]:
model5.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

### Training Model-5

In [11]:
from keras.callbacks import ModelCheckpoint, EarlyStopping

#number of epochs
epochs = 10
batch_size=20
#split the training data into training and validation datasets (30% for validation and 70 % for training).
validation_split=0.3
# print the progress
verbose=0.1

#checkpointer saves the weight of the best model only
checkpointer_5 = ModelCheckpoint(filepath='saved_models/weights.best.model5.hdf5', verbose=1, save_best_only=True)

model5.fit(train_tensors, train_targets, batch_size=batch_size, epochs=epochs, 
           callbacks=[checkpointer_5], validation_split=validation_split, verbose=verbose)

Train on 2643 samples, validate on 1134 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 1.75660, saving model to saved_models/weights.best.model5.hdf5
Epoch 2/10

Epoch 00002: val_loss improved from 1.75660 to 1.70973, saving model to saved_models/weights.best.model5.hdf5
Epoch 3/10

Epoch 00003: val_loss improved from 1.70973 to 1.67688, saving model to saved_models/weights.best.model5.hdf5
Epoch 4/10

Epoch 00004: val_loss improved from 1.67688 to 1.58472, saving model to saved_models/weights.best.model5.hdf5
Epoch 5/10

Epoch 00005: val_loss improved from 1.58472 to 1.56420, saving model to saved_models/weights.best.model5.hdf5
Epoch 6/10

Epoch 00006: val_loss improved from 1.56420 to 1.50906, saving model to saved_models/weights.best.model5.hdf5
Epoch 7/10

Epoch 00007: val_loss did not improve from 1.50906
Epoch 8/10

Epoch 00008: val_loss improved from 1.50906 to 1.44786, saving model to saved_models/weights.best.model5.hdf5
Epoch 9/10

Epoch 00009: val_loss did n

<keras.callbacks.History at 0x1d5910ef978>

## Predictions for Model-5

### Loading the  weights of Benchmark model

In [12]:
model5.load_weights('saved_models/weights.best.model5.hdf5')

### Predictions

In [13]:
model5_prediction = [model5.predict(np.expand_dims(img_tensor, axis=0)) for img_tensor in test_tensors]

### Processing the Predictions

In [14]:
#visaulizing the array
print(model5_prediction[:][0])

[[0.2684233  0.10738349 0.07769796 0.04475707 0.07960093 0.09364016
  0.11497399 0.2135231 ]]


In [15]:
#swapping the axes of the model4_prediction for easy handling
model5_prediction = np.swapaxes(model5_prediction,0,1)

#creating a pandas dataframe for with benchmark model's prediction
df_pred_model5 = pd.DataFrame(model5_prediction[0][:], columns=['ALB','BET','DOL','LAG','NoF','OTHER','SHARK','YFT'])

#first five rows of df_pred_model1 dataframe
print(df_pred_model5[:5])

        ALB       BET       DOL       LAG       NoF     OTHER     SHARK  \
0  0.268423  0.107383  0.077698  0.044757  0.079601  0.093640  0.114974   
1  0.374453  0.074432  0.053887  0.017832  0.186082  0.065478  0.002076   
2  0.354573  0.046298  0.024999  0.031291  0.228334  0.159255  0.001658   
3  0.396436  0.076942  0.036188  0.072826  0.132634  0.120565  0.042135   
4  0.374415  0.057608  0.033026  0.033885  0.201088  0.130594  0.003302   

        YFT  
0  0.213523  
1  0.225760  
2  0.153592  
3  0.122275  
4  0.166083  


In [16]:
#extracting name of the image form its path
image_names = [test_files[i][15:] for i in range(len(test_files))]


#adjusting the filename of the image to match the submission guidelines
for i in range(13153):
    if image_names[i][5]=='_':
        image_names[i] = "test_stg2/" + image_names[i]

In [17]:
#adding image names to our dataframe
df_pred_model5['image'] = pd.DataFrame(image_names)

#reindexing the dataframe
df_pred_model5 = df_pred_model5.reindex_axis(['image','ALB','BET','DOL','LAG','NoF','OTHER','SHARK','YFT'], axis=1)

#printing the first five rows of dataframe
print(df_pred_model5[:5])

  """


                       image       ALB       BET       DOL       LAG  \
0  test_stg2/image_10973.jpg  0.268423  0.107383  0.077698  0.044757   
1  test_stg2/image_00175.jpg  0.374453  0.074432  0.053887  0.017832   
2  test_stg2/image_09645.jpg  0.354573  0.046298  0.024999  0.031291   
3              img_02920.jpg  0.396436  0.076942  0.036188  0.072826   
4  test_stg2/image_09349.jpg  0.374415  0.057608  0.033026  0.033885   

        NoF     OTHER     SHARK       YFT  
0  0.079601  0.093640  0.114974  0.213523  
1  0.186082  0.065478  0.002076  0.225760  
2  0.228334  0.159255  0.001658  0.153592  
3  0.132634  0.120565  0.042135  0.122275  
4  0.201088  0.130594  0.003302  0.166083  


##  .csv file for submission

In [18]:
df_pred_model5.to_csv('submission5.csv',index=False)

---

# Public Score - 1.50961 and Private Score - 1.77940