In [2]:
import pickle
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import keras
from keras import layers

tf.keras.backend.clear_session()  # For easy reset of notebook state.

# from google.colab import drive
# drive.mount('/content/drive')
# base_folder = '/content/drive/My Drive/Colab Notebooks/Shopee2020/comp2 - product detection'
# data_folder = '/content/drive/My Drive/Colab Notebooks/Shopee2020/comp2 - product detection/new_train'

data_folder ='train/train'
# save_folder = '../input/shopee-product-detection-student'

#base_folder = '../input/shopee'
# data_folder ='../input/shopee/train_new/train'
# save_folder = '../input/shopee'


#this pretrained model takes input as the one below
input_shape = (224,224,3)

Using TensorFlow backend.


In [3]:
from keras.preprocessing.image import ImageDataGenerator

In [4]:
# Set up the data generators to read from our data_folder
bs = 128 # The batch size is 128

# An object that applies transformations to the images before they are consumed by the model
# These transformations include (1) preprocessing, like rescaling or normalization (2) data augmentation
datagen = ImageDataGenerator(
        rescale=1./255, # divide each pixel value by 255. Each pixel is in the range 0-255, so after division it is in 0-1
        rotation_range=20, # rotate the image between -20 to +20 degrees
        width_shift_range=0.2, # translate the image left-right for 20% of the image's width
        height_shift_range=0.2, # same, for up-down and height
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=0.2)
print('Making training data generator...')
train_gen = datagen.flow_from_directory(
        data_folder,
        target_size=input_shape[:2],
        batch_size=bs,
        subset='training')
print('Making validation data generator...')
val_gen = datagen.flow_from_directory(
        data_folder,
        target_size=input_shape[:2],
        batch_size=bs,
        subset='validation')

Making training data generator...
Found 56726 images belonging to 42 classes.
Making validation data generator...
Found 14161 images belonging to 42 classes.


In [5]:
#!pip install image-classifiers==1.0.0b1

# installing efficientNet source code
!pip install -U git+https://github.com/qubvel/efficientnet

#!pip install -U efficientnet

Collecting git+https://github.com/qubvel/efficientnet
  Cloning https://github.com/qubvel/efficientnet to /private/var/folders/ch/wctq_gw94hnbp931rddqjk780000gn/T/pip-req-build-s6fe97mm
  Running command git clone -q https://github.com/qubvel/efficientnet /private/var/folders/ch/wctq_gw94hnbp931rddqjk780000gn/T/pip-req-build-s6fe97mm
Building wheels for collected packages: efficientnet
  Building wheel for efficientnet (setup.py) ... [?25ldone
[?25h  Created wheel for efficientnet: filename=efficientnet-1.1.0-py3-none-any.whl size=18326 sha256=3aee5b709574a035013223371650aec862182e56d381aa4e894d5ea1b3dac404
  Stored in directory: /private/var/folders/ch/wctq_gw94hnbp931rddqjk780000gn/T/pip-ephem-wheel-cache-hf42f2vy/wheels/11/69/85/814d64d694c96db0eef17b718042d644a1e54f113920481920
Successfully built efficientnet
Installing collected packages: efficientnet
  Attempting uninstall: efficientnet
    Found existing installation: efficientnet 1.1.0
    Uninstalling efficientnet-1.1.0:
   

In [6]:
import efficientnet.keras as enet

In [7]:
from keras.layers import Flatten

from keras.models import Sequential
from keras.layers import Dense

new_model = Sequential()
new_model.add(enet.EfficientNetB0(include_top=False, input_shape=(224,224,3), pooling='avg', weights='imagenet'))
#new_model.add(Flatten())
new_model.add(Dense(42, activation='softmax', input_dim=2048))

In [8]:
new_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnet-b0 (Model)      (None, 1280)              4049564   
_________________________________________________________________
dense_1 (Dense)              (None, 42)                53802     
Total params: 4,103,366
Trainable params: 4,061,350
Non-trainable params: 42,016
_________________________________________________________________


In [9]:
new_model.compile(optimizer='adam',
                  loss = 'categorical_crossentropy',
                  metrics=['categorical_accuracy'])

In [10]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import os
model_context = 'inceptionv3'

model_checkpoint = ModelCheckpoint('{}-best_val_loss.h5'.format(model_context),save_weights_only=False,monitor='val_loss',mode='auto',save_best_only=True)

# If the validation loss doesn't improve for 20 epochs, stop training
earlystopping = EarlyStopping(monitor='val_loss', patience=20)

# If the validation loss doesn't improve for 5 epochs, reduce the learning rate to 0.2 times it's previous value
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5)

In [11]:
epochs = 5
new_model.fit(
    train_gen,
    steps_per_epoch=train_gen.n // bs,
    epochs=epochs,
    validation_data=val_gen,
    validation_steps=val_gen.n // bs,
    callbacks=[model_checkpoint, earlystopping, reduce_lr],
    verbose =1)

Epoch 1/5
  5/443 [..............................] - ETA: 9:03:43 - loss: 3.5759 - categorical_accuracy: 0.1156

KeyboardInterrupt: 

In [None]:
new_model.save('efficientnetb0_3epoch.h5')

# load pretrained model
##### (the model is trained above)

In [None]:
#training_model = tf.keras.models.load_model('..input/epochh5/new_model_kaggle_1epoch.h5')

# predictions

In [12]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def run_image_on_model(img_path, model, label_map):
  pil_img = load_img(img_path)
  pil_img = pil_img.resize( input_shape[:2] )
  img_arr = img_to_array(pil_img)
  # Remember to normalize the image values the same way you did when you trained the model
  img_arr = img_arr / 255.
  # We need to wrap this in an np.array with dimensions (b,H,W,C). Currently, the shape is only (H,W,C)
  img_arr = np.array( [img_arr] )
  pred = model.predict(img_arr, batch_size=1)[0]
  pred_idx = np.argmax(pred)
  return label_map[pred_idx]

In [13]:
# Construct a reverse mapping
label_map = {v:k for k,v in train_gen.class_indices.items()}
label_map

{0: '00',
 1: '01',
 2: '02',
 3: '03',
 4: '04',
 5: '05',
 6: '06',
 7: '07',
 8: '08',
 9: '09',
 10: '10',
 11: '11',
 12: '12',
 13: '13',
 14: '14',
 15: '15',
 16: '16',
 17: '17',
 18: '18',
 19: '19',
 20: '20',
 21: '21',
 22: '22',
 23: '23',
 24: '24',
 25: '25',
 26: '26',
 27: '27',
 28: '28',
 29: '29',
 30: '30',
 31: '31',
 32: '32',
 33: '33',
 34: '34',
 35: '35',
 36: '36',
 37: '37',
 38: '38',
 39: '39',
 40: '40',
 41: '41'}

In [14]:
# test the function to see if it works, should print the category
run_image_on_model('../input/shopeetest/test/0004b03ad7eabfb3989727c461310a84.jpg', new_model, label_map)

'12'

In [15]:
# The generator's internal labeling 
print(train_gen.class_indices)

{'00': 0, '01': 1, '02': 2, '03': 3, '04': 4, '05': 5, '06': 6, '07': 7, '08': 8, '09': 9, '10': 10, '11': 11, '12': 12, '13': 13, '14': 14, '15': 15, '16': 16, '17': 17, '18': 18, '19': 19, '20': 20, '21': 21, '22': 22, '23': 23, '24': 24, '25': 25, '26': 26, '27': 27, '28': 28, '29': 29, '30': 30, '31': 31, '32': 32, '33': 33, '34': 34, '35': 35, '36': 36, '37': 37, '38': 38, '39': 39, '40': 40, '41': 41}


In [17]:
test_folder = '../input/shopee-product-detection-student/test/test/test'

In [18]:
import pandas as pd
import os

# create another dataframe to store results

resultdf = pd.DataFrame(os.listdir(test_folder), columns =['filename'])
resultdf

Unnamed: 0,filename
0,70f33346d45bf086f0401ab0c574b6c3.jpg
1,26d5d0e72e79acb152600fe84d948875.jpg
2,b213ae298a91f74f813031c038fadf15.jpg
3,a4a5fd79014fa1346bbab3be76f2173a.jpg
4,fc1c5b59905930eb8a13a72249040cc2.jpg
...,...
12187,4f1e71f00628a1350723fce9999aaaf3.jpg
12188,8ba3dc5a082c3b6c3fb0e3aa0de5508b.jpg
12189,de8b3f896d9035ca0ee05cb1c7071469.jpg
12190,618e3c20756112f0c703f8d158793f7e.jpg


In [22]:
# add 'test/test/' to the filename, that will be the path for the images
resultdf['path'] = resultdf['filename'].map(lambda x: '../input/shopee-product-detection-student/test/test/test/' + str(x))
resultdf

Unnamed: 0,filename,path
0,70f33346d45bf086f0401ab0c574b6c3.jpg,../input/shopee-product-detection-student/test...
1,26d5d0e72e79acb152600fe84d948875.jpg,../input/shopee-product-detection-student/test...
2,b213ae298a91f74f813031c038fadf15.jpg,../input/shopee-product-detection-student/test...
3,a4a5fd79014fa1346bbab3be76f2173a.jpg,../input/shopee-product-detection-student/test...
4,fc1c5b59905930eb8a13a72249040cc2.jpg,../input/shopee-product-detection-student/test...
...,...,...
12187,4f1e71f00628a1350723fce9999aaaf3.jpg,../input/shopee-product-detection-student/test...
12188,8ba3dc5a082c3b6c3fb0e3aa0de5508b.jpg,../input/shopee-product-detection-student/test...
12189,de8b3f896d9035ca0ee05cb1c7071469.jpg,../input/shopee-product-detection-student/test...
12190,618e3c20756112f0c703f8d158793f7e.jpg,../input/shopee-product-detection-student/test...


In [23]:
resultdf['category'] = resultdf['path'].map(lambda x: run_image_on_model(x, new_model, label_map))
resultdf

Unnamed: 0,filename,path,category
0,70f33346d45bf086f0401ab0c574b6c3.jpg,../input/shopee-product-detection-student/test...,18
1,26d5d0e72e79acb152600fe84d948875.jpg,../input/shopee-product-detection-student/test...,05
2,b213ae298a91f74f813031c038fadf15.jpg,../input/shopee-product-detection-student/test...,27
3,a4a5fd79014fa1346bbab3be76f2173a.jpg,../input/shopee-product-detection-student/test...,10
4,fc1c5b59905930eb8a13a72249040cc2.jpg,../input/shopee-product-detection-student/test...,02
...,...,...,...
12187,4f1e71f00628a1350723fce9999aaaf3.jpg,../input/shopee-product-detection-student/test...,05
12188,8ba3dc5a082c3b6c3fb0e3aa0de5508b.jpg,../input/shopee-product-detection-student/test...,02
12189,de8b3f896d9035ca0ee05cb1c7071469.jpg,../input/shopee-product-detection-student/test...,03
12190,618e3c20756112f0c703f8d158793f7e.jpg,../input/shopee-product-detection-student/test...,24


In [24]:
# make a copy in case anything goes wrong
# we will use the copy version
resultdf_copy = resultdf.copy()
resultdf_copy

Unnamed: 0,filename,path,category
0,70f33346d45bf086f0401ab0c574b6c3.jpg,../input/shopee-product-detection-student/test...,18
1,26d5d0e72e79acb152600fe84d948875.jpg,../input/shopee-product-detection-student/test...,05
2,b213ae298a91f74f813031c038fadf15.jpg,../input/shopee-product-detection-student/test...,27
3,a4a5fd79014fa1346bbab3be76f2173a.jpg,../input/shopee-product-detection-student/test...,10
4,fc1c5b59905930eb8a13a72249040cc2.jpg,../input/shopee-product-detection-student/test...,02
...,...,...,...
12187,4f1e71f00628a1350723fce9999aaaf3.jpg,../input/shopee-product-detection-student/test...,05
12188,8ba3dc5a082c3b6c3fb0e3aa0de5508b.jpg,../input/shopee-product-detection-student/test...,02
12189,de8b3f896d9035ca0ee05cb1c7071469.jpg,../input/shopee-product-detection-student/test...,03
12190,618e3c20756112f0c703f8d158793f7e.jpg,../input/shopee-product-detection-student/test...,24


In [25]:
# we do no need path in the csv
del resultdf_copy['path']
resultdf_copy

Unnamed: 0,filename,category
0,70f33346d45bf086f0401ab0c574b6c3.jpg,18
1,26d5d0e72e79acb152600fe84d948875.jpg,05
2,b213ae298a91f74f813031c038fadf15.jpg,27
3,a4a5fd79014fa1346bbab3be76f2173a.jpg,10
4,fc1c5b59905930eb8a13a72249040cc2.jpg,02
...,...,...
12187,4f1e71f00628a1350723fce9999aaaf3.jpg,05
12188,8ba3dc5a082c3b6c3fb0e3aa0de5508b.jpg,02
12189,de8b3f896d9035ca0ee05cb1c7071469.jpg,03
12190,618e3c20756112f0c703f8d158793f7e.jpg,24


In [26]:
# test csv has 12196 rows but test folder has 12192 pictures
# kaggle say follow the test csv, so need to remove the extra 6 filenames in resultdf_copy
# will perform a merge
test = pd.read_csv('../input/shopee-product-detection-student/test.csv')
test

Unnamed: 0,filename,category
0,fd663cf2b6e1d7b02938c6aaae0a32d2.jpg,43
1,c7fd77508a8c355eaab0d4e10efd6b15.jpg,43
2,127f3e6d6e3491b2459812353f33a913.jpg,43
3,5ca4f2da11eda083064e6c36f37eeb81.jpg,43
4,46d681a542f2c71be017eef6aae23313.jpg,43
...,...,...
12181,5ba958eacb23cd7d1673bad4dae55784.jpg,43
12182,efbe41a1c2b666b70e337e438559808b.jpg,43
12183,79fdaa5ac5ba10dbe8004cabd8c35eb3.jpg,43
12184,ac3d136124617637a05ba66694e381ef.jpg,43


In [27]:
del test['category']
test

Unnamed: 0,filename
0,fd663cf2b6e1d7b02938c6aaae0a32d2.jpg
1,c7fd77508a8c355eaab0d4e10efd6b15.jpg
2,127f3e6d6e3491b2459812353f33a913.jpg
3,5ca4f2da11eda083064e6c36f37eeb81.jpg
4,46d681a542f2c71be017eef6aae23313.jpg
...,...
12181,5ba958eacb23cd7d1673bad4dae55784.jpg
12182,efbe41a1c2b666b70e337e438559808b.jpg
12183,79fdaa5ac5ba10dbe8004cabd8c35eb3.jpg
12184,ac3d136124617637a05ba66694e381ef.jpg


In [28]:
finalresult = pd.merge(resultdf_copy, test, on = 'filename')
finalresult

Unnamed: 0,filename,category
0,70f33346d45bf086f0401ab0c574b6c3.jpg,18
1,26d5d0e72e79acb152600fe84d948875.jpg,05
2,b213ae298a91f74f813031c038fadf15.jpg,27
3,a4a5fd79014fa1346bbab3be76f2173a.jpg,10
4,fc1c5b59905930eb8a13a72249040cc2.jpg,02
...,...,...
12181,4f1e71f00628a1350723fce9999aaaf3.jpg,05
12182,8ba3dc5a082c3b6c3fb0e3aa0de5508b.jpg,02
12183,de8b3f896d9035ca0ee05cb1c7071469.jpg,03
12184,618e3c20756112f0c703f8d158793f7e.jpg,24


In [29]:
finalresult.to_csv('gpu_b0_3.csv', index=False)