# Introduction

1. This kernel is a detailed guide for transfer learning on Pollen Grain problem.

2. The aim of this kernel is to show you how to use pre-trained CNN models for transfer learning techniques.

3. The Dataset has been taken from kaggle and contains 805 pollen images of 23 pollen types. [Link](https://www.kaggle.com/andrewmvd/pollen-grain-image-classification)


In [None]:
import numpy as np
import pandas as pd
import os

In [None]:
work_dir = '../input/pollen-grain-image-classification'

In [None]:
image_name = os.listdir(work_dir) #Getting the names of all the images

In [None]:
image_type = [image.replace(' ','_').split('_')[0] for image in image_name] #Getting classes by splitting

In [None]:
image_type

In [None]:
df = pd.DataFrame()  #Creating dataframe for ImageDataGenerator
df['image_name']=image_name
df['image_type']=image_type

In [None]:
df.head()

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
datagen = ImageDataGenerator(
    rescale = 1./255,
    validation_split = 0.1,
    rotation_range=90,
    shear_range = 0.1,
    zoom_range = 0.1,
    horizontal_flip = True
)

In [None]:
train_generator = datagen.flow_from_dataframe(
    df,
    directory='../input/pollen-grain-image-classification',
    x_col = 'image_name',
    y_col = 'image_type',
    target_size = (224,224),
    batch_size = 32,
    class_mode = 'categorical',
    subset = 'training'
)

valid_generator = datagen.flow_from_dataframe(
    df,
    directory='../input/pollen-grain-image-classification',
    x_col = 'image_name',
    y_col = 'image_type',
    target_size = (224,224),
    batch_size = 32,
    class_mode = 'categorical',
    subset = 'validation'
)

In [None]:
from tensorflow.keras.layers import Conv2D,MaxPool2D,Dropout,Flatten,Dense,GlobalAveragePooling2D
from tensorflow.keras.models import Sequential,Model

# Creating Simple CNN Model

In [None]:
model = Sequential()
model.add(Conv2D(50, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu', input_shape=(224, 224, 3)))
model.add(Conv2D(75, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Conv2D(125, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(500, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(250, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(23, activation='softmax'))


In [None]:
model.compile(
    optimizer = 'adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

In [None]:
train_steps = train_generator.n//train_generator.batch_size
valid_steps = valid_generator.n//valid_generator.batch_size

In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch=train_steps,
    epochs=50,
    validation_data=valid_generator,
    validation_steps=valid_steps
)

In [None]:
import matplotlib.pyplot as plt
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# Creating model using InceptionV3

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
inception_preprocess_input = preprocess_input

In [None]:
inception_datagen = ImageDataGenerator(
    preprocessing_function=inception_preprocess_input,
    validation_split = 0.1
)

In [None]:
inception_train_generator = inception_datagen.flow_from_dataframe(
    df,
    directory='../input/pollen-grain-image-classification',
    x_col = 'image_name',
    y_col = 'image_type',
    target_size = (331,331),
    batch_size = 32,
    class_mode = 'categorical',
    subset = 'training'
)

inception_valid_generator = inception_datagen.flow_from_dataframe(
    df,
    directory='../input/pollen-grain-image-classification',
    x_col = 'image_name',
    y_col = 'image_type',
    target_size = (331,331),
    batch_size = 32,
    class_mode = 'categorical',
    subset = 'validation'
)

In [None]:
inception_base = InceptionV3(include_top=False,weights='imagenet',input_shape=(331,331,3))

In [None]:
inception_base.trainable = False

In [None]:
inception_model = Sequential()
inception_model.add(inception_base)
inception_model.add(GlobalAveragePooling2D())
inception_model.add(Dense(1024,activation='relu'))
inception_model.add(Dropout(0.5))
inception_model.add(Dense(512,activation='relu'))
inception_model.add(Dropout(0.3))
inception_model.add(Dense(23,activation='softmax'))

In [None]:
inception_model.summary()

In [None]:
inception_model.compile(
    optimizer = 'adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

In [None]:
train_steps = inception_train_generator.n//inception_train_generator.batch_size
valid_steps = inception_valid_generator.n//inception_valid_generator.batch_size

In [None]:
inception_history = inception_model.fit(
    inception_train_generator,
    steps_per_epoch=train_steps,
    epochs=20,
    validation_data=inception_valid_generator,
    validation_steps=valid_steps
)

In [None]:
# summarize history for accuracy
plt.plot(inception_history.history['accuracy'])
plt.plot(inception_history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(inception_history.history['loss'])
plt.plot(inception_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# Summary
1. Pretrained Model can be used to boost accuracy.
2. In our case InceptionV3 gives 80+ accuracy in just 6 epochs on the other hand our simple CNN model gives accuracy of around 68% after 50 epochs. 