# Image feature Extraction Transfer Learning

    * Tapan Kumar Patro
    * tapankumarpatro05@gmail.com

## Work Items:
    1.Image Load
    2.Feature Extraction for all Image
    3.Save into A pickel file
    4.Load the pickel file
    5.Extract feature from given image
    6.Find the minimum distance and return the images


## Importing Libraries

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
# from google_images_download import google_images_download

try:
  # The %tensorflow_version magic only works in colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [None]:
tf.__version__

In [None]:
import os
from tqdm import tqdm
from tensorflow.keras import models, layers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization, Activation, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Dense,AveragePooling2D,BatchNormalization,Conv2D,Input,Flatten,Activation,concatenate,Dropout,GlobalAveragePooling2D, GlobalMaxPooling2D
from time import time
from datetime import datetime
from tensorflow.python.keras.callbacks import TensorBoard
import cv2

## Setup Google Colab for importing Dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Navigating to Dataset folder in my drive
path = 'drive/My Drive/PocketApps/Avantari/dataset'
os.chdir(path)


In [None]:
!pwd

In [None]:
# !ls

In [None]:
# creating a list of all images
all_images = os.listdir()

In [None]:
# Defining Image Size given in requirement
IMAGE_SIZE = 512

In [None]:
from PIL import Image, ImageOps

In [None]:
# Showing some random 3 images to visualize
for i, val in enumerate(all_images[10:13]):
    plt.subplot(1, 3, i+1)
    image_data = Image.open(val)
    plt.imshow(image_data)
    plt.show()

## Create the base model from the pre-trained convnets

Create the base model from the **MobileNet V2** model developed at Google, and pre-trained on the ImageNet dataset, a large dataset of 1.4M images and 1000 classes of web images.

First, pick which intermediate layer of MobileNet V2 will be used for feature extraction. A common practice is to use the output of the very last layer before the flatten operation, the so-called "bottleneck layer". The reasoning here is that the following fully-connected layers will be too specialized to the task the network was trained on, and thus the features learned by these layers won't be very useful for a new task. The bottleneck features, however, retain much generality.

Let's instantiate an MobileNet V2 model pre-loaded with weights trained on ImageNet. By specifying the `include_top=False` argument, we load a network that doesn't include the classification layers at the top, which is ideal for feature extraction.

In [None]:
# Creating Base Model


# Defining Image Shape
IMG_SHAPE = (IMAGE_SIZE, IMAGE_SIZE, 3)


# Create the base model from the pre-trained model MobileNet V2
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                              include_top=False, 
                                              weights='imagenet')

## Feature extraction
You will freeze the convolutional base created from the previous step and use that as a feature extractor, we will train extra 20 more layers to get the features out.

In [None]:
# base_model.trainable = False

In [None]:
base_model.trainable = True

# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))

# Fine tune from this layer onwards
fine_tune_at = 20

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_model.layers[:fine_tune_at]:
  layer.trainable =  False

## Creating sequestial model


In [None]:
# Creating Sequential model with MobileNetV2 Base model
model = tf.keras.Sequential([
  base_model,
  tf.keras.layers.Conv2D(64, 3, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.GlobalAveragePooling2D(), #Adding Pooling layer to better featuer extraction
])

### Compile the model

You must compile the model before training it.  Since there are two classes, use a binary cross-entropy loss.

In [None]:
# Compiling Sequential Model
model.compile(optimizer=tf.keras.optimizers.Adam(), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [None]:
# Summary of the new Model
model.summary()

In [None]:
print('Number of trainable variables = {}'.format(len(model.trainable_variables)))

In [None]:
path+"/"+all_images[0]

In [None]:
# Image processing and getting the features
image = Image.open(all_images[0])
# Expanding array shape so that we can get the array
image = np.expand_dims(image, axis=0) 
# Making the data computation easy
image = image/127.5
image = image - 1.0
# Extracting features with our model
feature = model.predict(image)

In [None]:
print(feature[0])

In [None]:
import pickle
from tqdm.notebook import tqdm

In [None]:
# Creating function for doing the feature extraction
def cal_feature(image_data):
    image = Image.open(image_data)
    image = np.expand_dims(image, axis=0)
    image = image/127.5
    image = image - 1.0
    feature = model.predict(image)
    return feature

# Created function for saving the extracted feature 
def pickle_stuff(filename, stuff):
    save_stuff = open(filename, "wb")
    pickle.dump(stuff, save_stuff)
    save_stuff.close()


In [None]:
# Computing features for all images

precompute_features = []

for image_name in tqdm(all_images_listed):
    name = image_name
    features = cal_feature(image_name)
    precompute_features.append({"name": name, "features": features})


In [None]:
# Saving the Computed features for all images into pickle file
pickle_stuff("precompute_img_features.pickle", precompute_features)

# Now as the feature are saved .. now Need to load and find out Similar Images

In [None]:
# Loading pickle file

def load_stuff(filename):
    saved_stuff = open(filename, "rb")
    stuff = pickle.load(saved_stuff)
    saved_stuff.close()
    return stuff

In [None]:
precompute_features = load_stuff("precompute_img_features.pickle")

In [None]:
# How the pickle file looks like ?

precompute_features[:1]

In [None]:
import scipy as sp
from scipy import spatial
from scipy.spatial import distance
from heapq import nsmallest

In [None]:
# Finding Similar Images
def find_similar_image(path, count):
    distances = []
    image_name_list = []


    feature = cal_feature(path)
            
    for each_image_data in precompute_features:
        image_feature = each_image_data.get("features")
        eucl_dist = distance.euclidean(image_feature, feature)
        # eucl_dist = np.linalg.norm(image_feature, feature)
        distances.append(eucl_dist)

    # distances = distances.sort()
    min_distance_value = min(distances)
    print("The lowest distance for given Image {}".format(min_distance_value))
    min_distance_index = distances.index(min_distance_value)
    print("The lowest distance for given Image index {}".format(min_distance_index))
    print("The lowest distance for given Image name {}".format(precompute_features[min_distance_index].get("name")))
    
    for dis in nsmallest(3, distances):
        each_index = distances.index(dis)
        image_name = precompute_features[each_index].get("name")
        image_name_list.append(image_name)

    return image_name_list
        
    

In [None]:
image_list = find_similar_image(all_images[11], 3)

In [None]:
image_list

## Lets take a sample Image 

In [None]:
image_data = Image.open(all_images[11])
plt.imshow(image_data)
plt.show()

## Here are the similar Images What it gets

In [None]:
for img in list(image_list):
    image_data = Image.open(img)
    plt.imshow(image_data)
    plt.show()