# Transfer Learning

---

## Description
This script will use the pretrained VGG 16, Inception, Resnet, Mobilenet for classification. Here we will lookout for features that were common in images of earthquake and based on collective probabilities of top 5 features we will decide if the image is useful for us, i.e it is relevant or irrelevant (Eg. Meme) and will move the usfull images to __filtered__ directory

In [1]:
import keras
import numpy as np
from keras.applications import vgg16, inception_v3, resnet50, mobilenet
 
#Load the VGG model
vgg_model = vgg16.VGG16(weights='imagenet')
 
#Load the Inception_V3 model
inception_model = inception_v3.InceptionV3(weights='imagenet')
 
#Load the ResNet50 model
resnet_model = resnet50.ResNet50(weights='imagenet')
 
#Load the MobileNet model
mobilenet_model = mobilenet.MobileNet(weights='imagenet')

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels.h5
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.6/mobilenet_1_0_224_tf.h5


In [2]:
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications.imagenet_utils import decode_predictions
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
def processImage(base_dir,img_url):
    filename = base_dir+img_url
    # load an image in PIL format
    original = load_img(filename, target_size=(224, 224))
#     print('PIL image size',original.size)
    plt.imshow(original)
#     plt.show()

    # convert the PIL image to a numpy array
    # IN PIL - image is in (width, height, channel)
    # In Numpy - image is in (height, width, channel)
    numpy_image = img_to_array(original)
#     plt.imshow(np.uint8(numpy_image))
#     plt.show()
#     print('numpy array size',numpy_image.shape)

    # Convert the image / images into batch format
    # expand_dims will add an extra dimension to the data at a particular axis
    # We want the input matrix to the network to be of the form (batchsize, height, width, channels)
    # Thus we add the extra dimension to the axis 0.
    image_batch = np.expand_dims(numpy_image, axis=0)
#     print('image batch size', image_batch.shape)
#     plt.imshow(np.uint8(image_batch[0]))
    
    return image_batch

In [5]:
def predict(base_dir, img_url, model_preprocess, model_obj, model_name):
    image_batch = processImage(base_dir, img_url)

    # prepare the image for the VGG model
    processed_image = model_preprocess.preprocess_input(image_batch.copy())

    # get the predicted probabilities for each class
    predictions = model_obj.predict(processed_image)
    # print predictions

    # convert the probabilities to class labels
    # We will get top 5 predictions which is the default
    label = decode_predictions(predictions)

#     print("\n")
#     print("Predictions of: "+model_name)
#     print("="*20)
#     for i in range (0,5):
#         print(label[0][i][1]+": "+str("{0:.2f}".format(label[0][i][2]*100))+"%")
    return label

In [7]:
import os
from os import listdir
from os.path import isfile, join

def getFilesInDir(sourceDir):
    onlyfiles = [f for f in listdir(sourceDir) if isfile(join(sourceDir, f))]
    return onlyfiles
    



In [None]:
sourceDir = 'SMERP-2018-Dataset/nepal-quake-2015-images'
filterDir = "SMERP-2018-Dataset/filtered/"

onlyfiles = getFilesInDir(sourceDir)

labels = ['knee_pad', 'stretcher', 'crash_helmet', 'cliff', 'ambulance', 'lakeside', 'half_track', 'garbage_truck', 'fire_engine', 'patio', 'plow', 'barrow', 'nail', 'hatchet', 'lumbermill', 'chain_saw', 'wood', 'military_uniform', 'rock', 'cobra', 'assault_rifle', 'syringe', 'mask', 'lifeboat', 'mountain', 'prison', 'swab', 'crutch', 'jinrikisha', 'hen-of-the-woods', 'tractor', 'snake', 'dwelling', 'church', 'monastery', 'band_aid', 'bath_towel', 'airliner', 'aircraft_carrier', 'shovel']
counter = 0
try:
    for img in onlyfiles:
        counter+=1
        try:
            vgg = predict(sourceDir, img, vgg16, vgg_model, "VGG")
            inc = predict(sourceDir, img, inception_v3, inception_model, "Inception")
            rnet = predict(sourceDir, img, resnet50, resnet_model, "Resnet")
            mnet = predict(sourceDir, img, mobilenet, mobilenet_model, "Mobilenet")

            if(counter%50==0):
                print(str(counter)+" files processed")

            # Move images to filtered folder
            for i in range (0,5):
                if vgg[0][i][1].lower() in labels or inc[0][i][1].lower() in labels or rnet[0][i][1].lower() in labels or mnet[0][i][1].lower() in labels:
                    os.rename(sourceDir+"/"+img, filterDir+img)

        except:
#             print("Skipped: "+str(img))
            pass
            
except Exception as e:
    print(e)

50 files processed
100 files processed
150 files processed
200 files processed
250 files processed
300 files processed
350 files processed
400 files processed
450 files processed
500 files processed
550 files processed
600 files processed
650 files processed
700 files processed
750 files processed
800 files processed
850 files processed
900 files processed
950 files processed
1000 files processed
1050 files processed
1100 files processed
1150 files processed
1200 files processed
1250 files processed
1300 files processed
1350 files processed
1400 files processed
1450 files processed
1500 files processed
1550 files processed
1600 files processed
1650 files processed
1700 files processed
1750 files processed
1800 files processed
1850 files processed
1900 files processed
1950 files processed
2000 files processed
2050 files processed
2100 files processed
2150 files processed
2200 files processed
2250 files processed
2300 files processed
2350 files processed
2400 files processed
2450 files p