<a href="https://colab.research.google.com/github/raywu60kg/kaggles/blob/master/Plant-seedlings-classification/Plant_seedlings_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Plant seedlings classification
A simple demo for image classification using tensorflow 2 tranfer learning. The image preprocessing is from [here](https://www.kaggle.com/gaborvecsei/plant-seedlings-fun-with-computer-vision)

## Load data

In [1]:
# Put our kaggle api token (kaggle.json)
from google.colab import files
uploaded = files.upload()

Saving kaggle.json to kaggle.json


In [2]:
import json
with open('kaggle.json') as f:
  kaggle_api = json.load(f)

In [3]:
import os
os.environ['KAGGLE_USERNAME'] = kaggle_api['username']
os.environ['KAGGLE_KEY'] = kaggle_api['key']

In [4]:
# Avoid "Too many requests" error# Avoid 
!pip uninstall -y kaggle
!pip install --upgrade pip
!pip install kaggle

Found existing installation: kaggle 1.5.6
Uninstalling kaggle-1.5.6:
  Successfully uninstalled kaggle-1.5.6
Requirement already up-to-date: pip in /usr/local/lib/python3.6/dist-packages (20.2.2)
Processing /root/.cache/pip/wheels/01/3e/ff/77407ebac3ef71a79b9166a8382aecf88415a0bcbe3c095a01/kaggle-1.5.6-py3-none-any.whl
Installing collected packages: kaggle
Successfully installed kaggle-1.5.6


In [5]:
!kaggle competitions download -c plant-seedlings-classification

Downloading plant-seedlings-classification.zip to /content
 99% 1.67G/1.69G [00:29<00:00, 72.2MB/s]
100% 1.69G/1.69G [00:29<00:00, 60.4MB/s]


In [6]:
!unzip plant-seedlings-classification.zip >>  /dev/null

In [7]:
!ls

kaggle.json			    sample_submission.csv  train
plant-seedlings-classification.zip  test


## Tensorflow version

In [8]:
%tensorflow_version 2.x

In [9]:
import tensorflow as tf

In [10]:
print(tf.__version__)

2.3.0


## Check gpu


In [11]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [12]:
import timeit

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print(
      '\n\nThis error most likely means that this notebook is not '
      'configured to use a GPU.  Change this in Notebook Settings via the '
      'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
  raise SystemError('GPU device not found')

def cpu():
  with tf.device('/cpu:0'):
    random_image_cpu = tf.random.normal((100, 100, 100, 3))
    net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
    return tf.math.reduce_sum(net_cpu)

def gpu():
  with tf.device('/device:GPU:0'):
    random_image_gpu = tf.random.normal((100, 100, 100, 3))
    net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
    return tf.math.reduce_sum(net_gpu)
  
# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
cpu()
gpu()

# Run the op several times.
print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
      '(batch x height x width x channel). Sum of ten runs.')
print('CPU (s):')
cpu_time = timeit.timeit('cpu()', number=10, setup="from __main__ import cpu")
print(cpu_time)
print('GPU (s):')
gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
print(gpu_time)
print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))

Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images (batch x height x width x channel). Sum of ten runs.
CPU (s):
3.3598774740003137
GPU (s):
0.1065169869998499
GPU speedup over CPU: 31x


## Image mask

In [13]:
import cv2
import numpy as np

In [14]:
def create_mask_for_plant(image):
    image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    sensitivity = 35
    lower_hsv = np.array([60 - sensitivity, 100, 50])
    upper_hsv = np.array([60 + sensitivity, 255, 255])

    mask = cv2.inRange(image_hsv, lower_hsv, upper_hsv)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11,11))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    
    return mask

In [15]:
def segment_plant(image):
    mask = create_mask_for_plant(image)
    output = cv2.bitwise_and(image, image, mask = mask)
    return output

In [16]:
def sharpen_image(image):
    image_blurred = cv2.GaussianBlur(image, (0, 0), 3)
    image_sharp = cv2.addWeighted(image, 1.5, image_blurred, -0.5, 0)
    return image_sharp

## Image Generator

In [17]:
train_data_dir = '/content/train'
batch_size = 32
img_width, img_height = 299, 299
nb_epochs = 30

In [18]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255,
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=360,
    width_shift_range=0.5,
    height_shift_range=0.5,
    shear_range=0.5,
    zoom_range=0.5,
    validation_split=0.2,
    preprocessing_function=sharpen_image
    ) 

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training') # set as training data

validation_generator = train_datagen.flow_from_directory(
    train_data_dir, # same directory as training data
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation') # set as validation data

Found 3803 images belonging to 12 classes.
Found 947 images belonging to 12 classes.


## Model

### Import Pretrain Model

In [19]:
pretrained_model = tf.keras.applications.inception_resnet_v2.InceptionResNetV2(
    weights='imagenet', 
    include_top=False, 
    input_shape = (img_width, img_height, 3),
    pooling='avg')

In [20]:
print(pretrained_model.summary())

Model: "inception_resnet_v2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 299, 299, 3) 0                                            
__________________________________________________________________________________________________
conv2d_22 (Conv2D)              (None, 149, 149, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 149, 149, 32) 96          conv2d_22[0][0]                  
__________________________________________________________________________________________________
activation (Activation)         (None, 149, 149, 32) 0           batch_normalization[0][0]        
________________________________________________________________________________

### Freeze Pretrain Model

In [21]:
# Freeze layers
for layer in pretrained_model.layers[:-19]:
   print(layer.name)
   layer.trainable = False

input_1
conv2d_22
batch_normalization
activation
conv2d_23
batch_normalization_1
activation_1
conv2d_24
batch_normalization_2
activation_2
max_pooling2d
conv2d_25
batch_normalization_3
activation_3
conv2d_26
batch_normalization_4
activation_4
max_pooling2d_1
conv2d_30
batch_normalization_8
activation_8
conv2d_28
conv2d_31
batch_normalization_6
batch_normalization_9
activation_6
activation_9
average_pooling2d
conv2d_27
conv2d_29
conv2d_32
conv2d_33
batch_normalization_5
batch_normalization_7
batch_normalization_10
batch_normalization_11
activation_5
activation_7
activation_10
activation_11
mixed_5b
conv2d_37
batch_normalization_15
activation_15
conv2d_35
conv2d_38
batch_normalization_13
batch_normalization_16
activation_13
activation_16
conv2d_34
conv2d_36
conv2d_39
batch_normalization_12
batch_normalization_14
batch_normalization_17
activation_12
activation_14
activation_17
block35_1_mixed
block35_1_conv
block35_1
block35_1_ac
conv2d_43
batch_normalization_21
activation_21
conv2d_41
co

### Fully-Connected

In [22]:
def fully_connected_layer(model, dense_array):

  for dense_number in dense_array:
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dense(dense_number,activation='relu'))
    model.add(tf.keras.layers.Dropout(0.5))
  return model


In [23]:
model = tf.keras.Sequential()
model.add(pretrained_model)
model = fully_connected_layer(model, [1024,512,256])
model.add(tf.keras.layers.Dense(12,activation='softmax'))

In [24]:
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inception_resnet_v2 (Functio (None, 1536)              54336736  
_________________________________________________________________
batch_normalization_203 (Bat (None, 1536)              6144      
_________________________________________________________________
dense (Dense)                (None, 1024)              1573888   
_________________________________________________________________
dropout (Dropout)            (None, 1024)              0         
_________________________________________________________________
batch_normalization_204 (Bat (None, 1024)              4096      
_________________________________________________________________
dense_1 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0

### Learning Rate 

In [25]:
initial_learning_rate = 0.0001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100,
    decay_rate=0.95,
    staircase=True)
model.compile(
      loss = 'categorical_crossentropy', 
      optimizer = tf.keras.optimizers.Adam(lr_schedule),
      metrics=['accuracy'])

### Callback

In [26]:
checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath='/content/my_model.hdf5', save_best_only=True)

## Training

In [27]:
tf.keras.backend.clear_session()
model.fit(
  train_generator,
  steps_per_epoch = train_generator.samples // batch_size,
  validation_data = validation_generator, 
  validation_steps = validation_generator.samples // batch_size,
  epochs = nb_epochs,
  callbacks=[checkpointer])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f4779db40f0>

## Predict

In [47]:
# Reload the best model
model.load_weights('/content/my_model.hdf5')

Move the direaction for tensorflow image generator.

In [29]:
!mkdir test_gerator

In [30]:
!mv test test_gerator/

In [31]:
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    preprocessing_function=sharpen_image) 

test_generator = test_datagen.flow_from_directory(
    '/content/test_gerator',
    target_size=(img_height, img_width),
    batch_size=1,
    shuffle=False,
    class_mode="categorical") # set as training data

Found 794 images belonging to 1 classes.


In [32]:
filenames = test_generator.filenames
nb_samples = len(filenames)

In [33]:
test_generator.reset()
predictions = model.predict(test_generator, use_multiprocessing=True,steps=nb_samples)



In [34]:
predictions

array([[9.88525595e-10, 5.98207706e-08, 2.33160016e-08, ...,
        5.19408104e-07, 9.99999404e-01, 3.08425854e-12],
       [2.64385335e-07, 1.47544870e-06, 1.22355814e-05, ...,
        1.08760503e-06, 5.18491674e-07, 1.16207870e-02],
       [3.97577189e-08, 1.61837566e-08, 6.86743533e-08, ...,
        5.61156988e-09, 8.67154459e-10, 9.99876738e-01],
       ...,
       [1.60388055e-07, 1.23968078e-07, 4.02669485e-07, ...,
        5.45521210e-08, 3.20831628e-09, 9.99954581e-01],
       [9.23709148e-14, 1.00000000e+00, 1.85182390e-11, ...,
        1.43150669e-13, 2.04974504e-09, 6.78593873e-14],
       [1.37055827e-06, 1.76721642e-08, 1.12117114e-07, ...,
        1.94251393e-09, 1.24717090e-08, 5.79198165e-08]], dtype=float32)

In [35]:
np.shape(predictions) 

(794, 12)

In [36]:
prediction_class_index = np.argmax(predictions, axis=1)
print(prediction_class_index)

[10  5 11  3 11  6  3  5  6  6  5 10 11  8 11  5  8  8  3  9  3 10  3 11
 11  7  8  8  3 11  3  9  6 11  6  6  0  6  3  7  6  4  3 10 11 11  7  8
  7 10  6 11  2  6  7  5  1 10  2  6 11  2  1  4  7  5  6  6  9  8 10  3
  8  3  3  4  6  8  8  6 11  3  3  4  9 11  2  8  5  6  1  6  5  0  3  6
 10  8  7 10 11 11  8 11 11  5  3  8  2  2  6  1  3  3  3 11  2  1 11  3
  3 11  2  6  7  4  1  7 11  3 11 11  3  3  5  4  5  1  6  3  6 10  9  3
  5 11  4 10 11  1  8  6  7  8  6 10  3  5  6  2  1  6  8  2  3  0  8  4
  3  3  3  7  9 11  3 10  6  2  6  6  6 10  6  6  8  6  3  5  5 10  6  1
  8  4  1  6  4  3  6  3  2  6  1  1 10  2  5  3  5  3  5  1  5  6 10  2
  7  6 10  3  8  6  3 10  4 11  5  3  3  7  9  2  9  2  3  6  6  2 10  1
 11  3  7 10  2  3  6  5 10  5  3  6  7  3  3  4  2  1 10  3  6  7  3  7
 10  2  5  9  8  9  1  6  6 10  3 10  8  3 11  2  6  6  7 11  7  6  3  3
  7  3  6  8  1  7 10 11  6  0 10  5  9  3  2 10 11 11  1  8  4  2 11  6
  8  4  9  6 11  1  6 10 11  1  1 10 10 10 11 11 10

## Submit

In [37]:
labels = (train_generator.class_indices)
print(labels)

{'Black-grass': 0, 'Charlock': 1, 'Cleavers': 2, 'Common Chickweed': 3, 'Common wheat': 4, 'Fat Hen': 5, 'Loose Silky-bent': 6, 'Maize': 7, 'Scentless Mayweed': 8, 'Shepherds Purse': 9, 'Small-flowered Cranesbill': 10, 'Sugar beet': 11}


In [38]:
index2label = dict((v,k) for k,v in labels.items())
print(index2label)

{0: 'Black-grass', 1: 'Charlock', 2: 'Cleavers', 3: 'Common Chickweed', 4: 'Common wheat', 5: 'Fat Hen', 6: 'Loose Silky-bent', 7: 'Maize', 8: 'Scentless Mayweed', 9: 'Shepherds Purse', 10: 'Small-flowered Cranesbill', 11: 'Sugar beet'}


In [39]:
!ls

kaggle.json    plant-seedlings-classification.zip  test_gerator
my_model.hdf5  sample_submission.csv		   train


In [40]:
import pandas as pd

In [41]:
# Submission sample from kaggle
pd.read_csv('/content/sample_submission.csv')

Unnamed: 0,file,species
0,0021e90e4.png,Sugar beet
1,003d61042.png,Sugar beet
2,007b3da8b.png,Sugar beet
3,0086a6340.png,Sugar beet
4,00c47e980.png,Sugar beet
...,...,...
789,fea355851.png,Sugar beet
790,fea3da57c.png,Sugar beet
791,fef2ade8c.png,Sugar beet
792,ff65bc002.png,Sugar beet


In [42]:
col_file = list(map( lambda x:x[5:],test_generator.filenames))

In [43]:
col_species = list(map(lambda x:index2label[x], prediction_class_index))

In [44]:
submission = pd.DataFrame({'file':col_file, 'species':col_species})
submission

Unnamed: 0,file,species
0,0021e90e4.png,Small-flowered Cranesbill
1,003d61042.png,Fat Hen
2,007b3da8b.png,Sugar beet
3,0086a6340.png,Common Chickweed
4,00c47e980.png,Sugar beet
...,...,...
789,fea355851.png,Loose Silky-bent
790,fea3da57c.png,Sugar beet
791,fef2ade8c.png,Sugar beet
792,ff65bc002.png,Charlock


In [45]:
submission.to_csv('/content/submission.csv', index=False)

In [46]:
!kaggle competitions submit  -f submission.csv -m 'test_submisstion' plant-seedlings-classification

100% 22.3k/22.3k [00:05<00:00, 4.16kB/s]
Successfully submitted to Plant Seedlings Classification