In [1]:
import numpy as np
import keras
import os
import tensorflow as tf
import pandas as pd

from keras.preprocessing import image
from keras.applications.mobilenet import MobileNet
from keras.applications.vgg16 import VGG16
from keras.applications.inception_v3 import InceptionV3

from keras.models import Model
from keras.layers import Dense, Dropout, GlobalAveragePooling2D
from keras.models import Sequential
from keras import backend as K

import matplotlib.pyplot as plt

from PIL import Image

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def preprocess(img):
    """
    Reshape an image to 224 x 224 x 3, so that it can be understood by our model
    
    Args:
        image: an arbitary height x width x 3 image
    Output:
        returns the converted image
    """
    
    width, height = img.shape[0], img.shape[1]
    
    img = image.array_to_img(img, scale=False)
    
    desired_width, desired_height = 224, 224
    
    if width < desired_width:
        desired_width = width
    if height < desired_height:
        desired_height = height
    
    start_x = np.maximum(0, int((width-desired_width)//2))
    
    img = img.crop((start_x, np.maximum(0, height-desired_height), start_x + desired_width, height))
    
    img = img.resize((224, 224))
    
    img = image.img_to_array(img)
    
    return img / 255  
  

In [3]:
def get_images(path, correct_images, is_train=True):
    """
    Get all the images
    
    Args:
        path               : folder path to load images from 
        correct_images     : image names which have protest label set
        is_train           : to identify if the images to load are training images or test images
    Output:
        returns an array of processed images 
    """
    
    files = os.listdir(path)
    
    files = list(map(lambda x: os.path.join(path, x), files)) # load all the images
    
    if is_train:
        filtered_files = [image for image in files if image in correct_images]
    else:
        filtered_files = [image for image in files if image in correct_images]
    
    
    train_images = filtered_files
    
    images = []
    for item in train_images:
        img = image.load_img(item)
        img = image.img_to_array(img)
        img = preprocess(img)
        images.append(img)
    
    return images

In [4]:
def convert_labels_to_numpy_array(items): 
    """
    Filter all labels based on the protest parameter
    
    Args:
        items: a dataframe object of csv file
    Output:
        returns an array of numpy arrays    
    """
    values = []
    
    for index, row in items.iterrows():

        cats = {
                'protest': True,
                'violence': True,
                'sign': True,
                'photo': True,
                'fire': True,
                'police': True,
                'children': True,
                'group_20': True,
                'group_100': True,
                'flag': True,
                'night': True,
                'shouting': True
               }

        if row['protest']:
            
            array = []
            
            for cat in cats:
                f = float(row[cat])
                array.append(f)
            
            narray = np.array(array)
        
            values.append(narray)

    return values

In [5]:
def get_labels(path):
    """
    Load the csv file and return an array of numpy arrays of it's objects
    
    Args:
        path: a relative path to load csv file from
    Output:
        returns an array of the numpy objects
    """
    labels = pd.read_csv(path)
    
    train_labels = convert_labels_to_numpy_array(labels)  
    
    return train_labels

In [6]:
def reshape_images(images, size):
    """
    Reshape the images array adding one more dimension as batch size
    
    Args:
        images : list of images
        size   : batch size
    Output:
        returns the numpy array of reshaped objects
    """
    return np.asarray(images).reshape(size,224,224,3)

In [7]:
def get_model(base_model):
    """
    Get the base model and modify it 
    
    Args:
        base_model: It is the object of our pretrained model
    Output:
        returns the new_model by adding new layers and setting first few layers as 
        non trainable
    """
    bottleneck_input = base_model.get_layer(index=0).input
    bottleneck_output = base_model.get_layer(index=-2).output
    
    bottleneck_model = Model(inputs=bottleneck_input, outputs=bottleneck_output)
    
    for layer in bottleneck_model.layers:
        
        layer.trainable = False
    
    new_model = Sequential()
    
    new_model.add(bottleneck_model)
    
    new_model.add(Dense(12))
    
    new_model.add(Dropout(0.5))
    
    new_model.add(Dense(12, input_dim=2048, activation='softmax'))
    
    return new_model

In [8]:
# initialize our base model
base_model = InceptionV3()

# get the new modified model
new_model = get_model(base_model)

# compile the model with optimizer, loss function and add a metrics for accuracy
new_model.compile(optimizer='rmsprop', loss='mean_absolute_error', metrics=['accuracy'])

In [9]:
# training labels file path
training_labels_path = "training.csv"

# create a dataframe object by loading that file using pandas
df = pd.read_csv(training_labels_path)

# store the image names which have protest label set
train_image_names = {}

# iterate over the file and modify the file name to "type/filename". Here type is either train or test.
# this will help us in further mapping labels and images with each other
for index, item in df.iterrows():
    
    if item['protest']:
        name = "train/" + item['fname']
        train_image_names[name] = True
        
# test labels file path
test_labels_path = "test.csv"

# read the test csv file
test_df = pd.read_csv(test_labels_path)

# store the test image names which have protest label set
test_image_names = {}

# iterate over the file and modify the file name to "type/filename". Here type is either train or test.
# this will help us in further mapping labels and images with each other
for index, item in test_df.iterrows():
    
    if item['protest']:
        name = "test/" + item['fname']
        
        test_image_names[name] = True

# path of training images folder
training_images_path = "train/"

# path of test images folder
test_images_path = "test/"

# get all the training labels
train_labels = get_labels(training_labels_path)

# get all the training images
train_images = get_images(training_images_path, train_image_names, True)

# get all the test labels
test_labels = get_labels(test_labels_path)

# get all the test images
test_images = get_images(test_images_path, test_image_names, False)

# size of our training images dataset
train_size = len(train_images)

# size of our test images dataset
test_size = len(test_images)

# reshaped train images
reshaped_train_images = reshape_images(train_images, train_size)

# reshaped test images
reshaped_test_images = reshape_images(test_images, test_size)

In [12]:
# fit our training images dataset
new_model.fit(reshaped_train_images, np.array(train_labels), epochs=2, shuffle=True)

# save the state of our model, so that we won't need to run it again from scratch in future
new_model.save('news_image_with_dropout_with_dense_two.h5')

Epoch 1/2
Epoch 2/2


In [13]:
# predict label of our test images
predict = new_model.predict(reshaped_test_images, verbose=1)



In [14]:
# calculate accuracy of our model
new_model.evaluate(reshaped_test_images, np.array(test_labels), verbose=0)

[0.21317748070984277, 1.0]

In [None]:
new_model.evaluate(reshaped_test_images, np.array(test_labels), verbose=0)