In [1]:
from sklearn.datasets import load_files
from keras.utils import np_utils
import numpy as np
import pandas as pd
from glob import glob

Using TensorFlow backend.


In [2]:
#function to load the dataset
def load_dataset(path):
    data = load_files(path)
    fish_files = np.array(data['filenames'])
    fish_target = np_utils.to_categorical(np.array(data['target']), 8)
    return fish_files,fish_target

In [3]:
#let's load the training-data
train_files, train_targets = load_dataset('data/train')

#let's load the teting-data
test_files, _ = load_dataset('data/test')

#print the number of samples in test and trainin sets
print ("There are %d images in training dataset"%len(train_files))
print ("There are %d images in the training set"%len(test_files))

There are 3777 images in training dataset
There are 13153 images in the training set


In [4]:
from keras.preprocessing import image
from tqdm import tqdm

#converting image to tensor
def path_to_tensor(img_path):
    # loads RGB image
    img = image.load_img(img_path, target_size=(224,224))
    #convering the image to 3-D tensor with shape (224,224,3)
    x = image.img_to_array(img)
    #convert 3D tensor to 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

In [5]:
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True 

#preprocessing the data
test_tensors = paths_to_tensor(test_files).astype('float32')/255

100%|███████████████████████████████████████████████████████████████████████████| 13153/13153 [02:06<00:00, 104.15it/s]


In [6]:
train_tensors = paths_to_tensor(train_files).astype('float32')/255

100%|██████████████████████████████████████████████████████████████████████████████| 3777/3777 [01:38<00:00, 38.29it/s]


In [7]:
#shape of the tensor"
print(np.shape(train_tensors))

(3777, 224, 224, 3)


# Model 2 using transfer learning, Extracted VGG-19 features

In [8]:
from keras.applications.vgg19 import VGG19
from keras.preprocessing import image
from keras.applications.vgg19 import preprocess_input
from keras.models import Model
from keras.layers import Input
import numpy as np

#Extracting the weights of VGG19 model pretrained on Imagenet
#defing the Input shape
input_tensor = Input(shape=(224,224,3))
#extracting the weights wof VGG19, without top layers
#and MaxPooling as pooling layer
base_model = VGG19(input_tensor=input_tensor, weights='imagenet', include_top=False, pooling=max)
#removing the last layer
output = base_model.get_layer(index = -1).output
#defining the model
VGG19_model2 = Model(base_model.input, output)
VGG19_model2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

## Extracting VGG19 features for training and testing datasets

In [9]:
VGG19_features = [VGG19_model2.predict(np.expand_dims(train_tensor, axis=0)) for train_tensor in train_tensors]

VGG19_features_test = [VGG19_model2.predict(np.expand_dims(test_tensor, axis=0)) for test_tensor in test_tensors]

In [10]:
print ("Shape of VGG_19_features: {0}".format(np.shape(VGG19_features)))

print ("Shape of VGG_19_features_test: {0}".format(np.shape(VGG19_features_test)))

Shape of VGG_19_features: (3777, 1, 7, 7, 512)
Shape of VGG_19_features_test: (13153, 1, 7, 7, 512)


### Pre-processing the features

In [11]:
#VGG_19_features having 5 dimensions, so we have to squeeze it to a 4 dim array by removing extra dimension
squeezed_VGG19_train = np.squeeze(VGG19_features, axis=1)
#squeezing the test features
squeezed_VGG19_test = np.squeeze(VGG19_features_test, axis=1)

print ("Shape of squeezed_VGG19_train: {0}".format(np.shape(squeezed_VGG19_train)))
print ("Shape of squeezed_VGG_19_test: {0}".format(np.shape(squeezed_VGG19_test)))


Shape of squeezed_VGG19_train: (3777, 7, 7, 512)
Shape of squeezed_VGG_19_test: (13153, 7, 7, 512)


### Defining the Model architecture

In [12]:
from keras.models import Sequential
from keras.layers import MaxPooling2D, GlobalMaxPooling2D, Dense

fish_model = Sequential()
#adding a GlobalMaxPooling2D layer with with input shape same as the shape of Squeezed_VGG19_train.
fish_model.add(GlobalMaxPooling2D(input_shape=squeezed_VGG19_train.shape[1:]))
#adding a fully connected dense layer with relu activation function
fish_model.add(Dense(1024, activation='relu'))
#adding a dense layer with softmax activation function.
#no of nodes are same as the number of classes of fish.
fish_model.add(Dense(8, activation = 'softmax'))
fish_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
global_max_pooling2d_1 (Glob (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              525312    
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 8200      
Total params: 533,512
Trainable params: 533,512
Non-trainable params: 0
_________________________________________________________________


### Compiling the Model 2

In [13]:
#compiling the model with rmsprop optimizer
fish_model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

### Training Model 2

In [15]:
#training fish_model on the trainig dataset
from keras.callbacks import ModelCheckpoint

#checkpointer for saving only best weights
checkpointer_VGG = ModelCheckpoint(filepath='saved_models/weights.best.VGG19.hdf5', verbose=1, save_best_only=True)

fish_model.fit(squeezed_VGG19_train,train_targets,validation_split=0.3,batch_size=20,
               epochs=10,callbacks=[checkpointer_VGG],verbose=1)

Train on 2643 samples, validate on 1134 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.95650, saving model to saved_models/weights.best.VGG19.hdf5
Epoch 2/10

Epoch 00002: val_loss did not improve from 0.95650
Epoch 3/10

Epoch 00003: val_loss did not improve from 0.95650
Epoch 4/10

Epoch 00004: val_loss did not improve from 0.95650
Epoch 5/10

Epoch 00005: val_loss improved from 0.95650 to 0.50554, saving model to saved_models/weights.best.VGG19.hdf5
Epoch 6/10

Epoch 00006: val_loss improved from 0.50554 to 0.42070, saving model to saved_models/weights.best.VGG19.hdf5
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.42070
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.42070
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.42070
Epoch 10/10

Epoch 00010: val_loss did not improve from 0.42070


<keras.callbacks.History at 0x18849ab2908>

## Predictions for Model-2

### Loading the  weights of Benchmark model

In [16]:
fish_model.load_weights('saved_models/weights.best.VGG19.hdf5')

### Predictions

In [17]:
#making the predictions from fish_model
fish_model_prediction = [fish_model.predict(np.expand_dims(feature, axis=0)) for feature in squeezed_VGG19_test]

### Processing the Predictions

In [20]:
print(fish_model_prediction[1])

[[9.8164302e-01 6.0717302e-04 1.7280490e-05 2.2413282e-05 9.4259111e-03
  1.9288070e-03 2.4875710e-03 3.8676704e-03]]


In [21]:
print(np.shape(fish_model_prediction))

(13153, 1, 8)


In [22]:
#swapping the axes for better handling
fish_model_prediction = np.swapaxes(fish_model_prediction,0,1)

In [23]:
import pandas as pd

#creating a pandas dataframe for with benchmark model's prediction
df_pred_fish_model = pd.DataFrame(fish_model_prediction[0][:], columns=['ALB','BET','DOL','LAG','NoF','OTHER','SHARK','YFT'])

In [24]:
print(df_pred_fish_model[:5])

        ALB       BET           DOL           LAG       NoF     OTHER  \
0  0.826058  0.000039  3.081456e-06  1.138450e-04  0.013480  0.005857   
1  0.981643  0.000607  1.728049e-05  2.241328e-05  0.009426  0.001929   
2  0.799668  0.000053  2.480575e-05  9.625995e-05  0.195594  0.003104   
3  0.999282  0.000030  9.633685e-07  6.915273e-05  0.000335  0.000255   
4  0.924001  0.000009  4.823132e-07  3.318170e-07  0.074761  0.000472   

          SHARK       YFT  
0  2.155311e-05  0.154428  
1  2.487571e-03  0.003868  
2  4.579341e-05  0.001415  
3  1.255303e-09  0.000027  
4  8.517122e-05  0.000670  


In [25]:
#test_files[0]

#extracting relevant name of the image from the full-path of image
image_names = [test_files[i][15:] for i in range(len(test_files))]

In [26]:
#adjusting the filename of the image to match the submission guidelines
for i in range(13153):
    if image_names[i][5]=='_':
        image_names[i] = "test_stg2/" + image_names[i]

In [27]:
#adding image names to our dataframe
df_pred_fish_model['image'] = pd.DataFrame(image_names)

#reindexing the dataframe
df_pred_fish_model = df_pred_fish_model.reindex_axis(['image','ALB','BET','DOL','LAG','NoF','OTHER','SHARK','YFT'], axis=1)

#printing the first five rows of dataframe
print(df_pred_fish_model[:5])

  """


                       image       ALB       BET           DOL           LAG  \
0  test_stg2/image_10973.jpg  0.826058  0.000039  3.081456e-06  1.138450e-04   
1  test_stg2/image_00175.jpg  0.981643  0.000607  1.728049e-05  2.241328e-05   
2  test_stg2/image_09645.jpg  0.799668  0.000053  2.480575e-05  9.625995e-05   
3              img_02920.jpg  0.999282  0.000030  9.633685e-07  6.915273e-05   
4  test_stg2/image_09349.jpg  0.924001  0.000009  4.823132e-07  3.318170e-07   

        NoF     OTHER         SHARK       YFT  
0  0.013480  0.005857  2.155311e-05  0.154428  
1  0.009426  0.001929  2.487571e-03  0.003868  
2  0.195594  0.003104  4.579341e-05  0.001415  
3  0.000335  0.000255  1.255303e-09  0.000027  
4  0.074761  0.000472  8.517122e-05  0.000670  


In [28]:
df_pred_fish_model.tail(10)

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
13143,test_stg2/image_05875.jpg,0.912021,2.139378e-07,3.573953e-07,1.752325e-08,0.087614,5e-06,4.643796e-07,0.000359
13144,test_stg2/image_04374.jpg,0.488202,0.0006991809,0.0001621282,0.0001277162,0.219567,0.003448,7.379734e-07,0.287794
13145,test_stg2/image_07892.jpg,0.127718,5.56411e-07,4.340842e-10,1.050562e-09,0.133792,3e-06,6.480554e-10,0.738486
13146,test_stg2/image_09226.jpg,0.976595,8.894766e-05,0.0002095772,5.158936e-06,0.001163,0.006884,1.366547e-06,0.015053
13147,test_stg2/image_04860.jpg,0.951323,0.0008440098,8.432475e-06,4.594551e-05,0.040608,9.3e-05,1.667168e-06,0.007076
13148,img_07578.jpg,0.003271,0.004056724,0.04595753,6.440025e-08,0.006294,0.000336,4.800255e-08,0.940084
13149,test_stg2/image_03265.jpg,0.047379,9.768966e-06,1.16375e-06,2.680112e-06,0.951973,0.000233,9.491364e-07,0.0004
13150,test_stg2/image_09846.jpg,0.273156,0.0002245731,5.897219e-05,1.383662e-05,0.718948,0.002872,4.906206e-05,0.004679
13151,test_stg2/image_10800.jpg,0.717296,0.0005683791,3.363607e-08,1.123596e-09,0.251141,3e-06,9.802219e-09,0.030992
13152,test_stg2/image_02733.jpg,0.202337,0.0001552632,2.199913e-05,3.599256e-06,0.792443,0.001206,0.000106271,0.003727


##  .csv file for submission

In [29]:
df_pred_fish_model.to_csv('submission2.csv',index=False)

---

# Public Score - 1.64077 and Private Score - 4.04953