In [18]:
import numpy as np
import keras
import os
import tensorflow as tf
import pandas as pd

from keras.preprocessing import image
from keras.applications.mobilenet import MobileNet
from keras.applications.vgg16 import VGG16
from keras.applications.inception_v3 import InceptionV3

from keras.models import Model
from keras.layers import Dense, Dropout, GlobalAveragePooling2D
from keras.models import Sequential
from keras import backend as K

from PIL import Image

In [3]:
def preprocess(img):
    """
    Reshape an image to 224 x 224 x 3, so that it can be understood by our model
    
    Args:
        image: an arbitary height x width x 3 image
    Output:
        returns the converted image
    """
    
    width, height = img.shape[0], img.shape[1]
    
    img = image.array_to_img(img, scale=False)
    
    desired_width, desired_height = 224, 224
    
    if width < desired_width:
        desired_width = width
    if height < desired_height:
        desired_height = height
    
    start_x = np.maximum(0, int((width-desired_width)//2))
    
    img = img.crop((start_x, np.maximum(0, height-desired_height), start_x + desired_width, height))
    
    img = img.resize((224, 224))
    
    img = image.img_to_array(img)
    
    return img / 255  
  

In [4]:
def get_images(path, correct_images, is_train=True):
    """
    Get all the images
    
    Args:
        path               : folder path to load images from 
        correct_images     : image names which have protest label set
        is_train           : to identify if the images to load are training images or test images
    Output:
        returns an array of processed images 
    """
    
    files = os.listdir(path)
    
    files = list(map(lambda x: os.path.join(path, x), files)) # load all the images
    
    if is_train:
        filtered_files = [image for image in files if image in correct_images]
    else:
        filtered_files = [image for image in files if image in correct_images]
    
    
    train_images = filtered_files
    
    images = []
    for item in train_images:
        img = image.load_img(item)
        img = image.img_to_array(img)
        img = preprocess(img)
        images.append(img)
    
    return images

In [5]:
def convert_labels_to_numpy_array(items): 
    """
    Filter all labels based on the protest parameter
    
    Args:
        items: a dataframe object of csv file
    Output:
        returns an array of numpy arrays    
    """
    values = []
    
    for index, row in items.iterrows():

        cats = {
                'protest': True,
                'violence': True,
                'sign': True,
                'photo': True,
                'fire': True,
                'police': True,
                'children': True,
                'group_20': True,
                'group_100': True,
                'flag': True,
                'night': True,
                'shouting': True
               }

        if row['protest']:
            
            array = []
            
            for cat in cats:
                f = float(row[cat])
                array.append(f)
            
            narray = np.array(array)
        
            values.append(narray)

    return values

In [6]:
def get_labels(path):
    """
    Load the csv file and return an array of numpy arrays of it's objects
    
    Args:
        path: a relative path to load csv file from
    Output:
        returns an array of the numpy objects
    """
    labels = pd.read_csv(path)
    
    train_labels = convert_labels_to_numpy_array(labels)  
    
    return train_labels

In [7]:
def reshape_images(images, size):
    """
    Reshape the images array adding one more dimension as batch size
    
    Args:
        images : list of images
        size   : batch size
    Output:
        returns the numpy array of reshaped objects
    """
    return np.asarray(images).reshape(size,224,224,3)

In [16]:
def get_model(base_model):
    """
    Get the base model and modify it 
    
    Args:
        base_model: It is the object of our pretrained model
    Output:
        returns the new_model by adding new layers and setting first few layers as 
        non trainable
    """
    bottleneck_input = base_model.get_layer(index=0).input
    bottleneck_output = base_model.get_layer(index=-2).output
    
    bottleneck_model = Model(inputs=bottleneck_input, outputs=bottleneck_output)
    
    for layer in bottleneck_model.layers:
        
        layer.trainable = False
    
    new_model = Sequential()
    
    new_model.add(bottleneck_model)
    
    new_model.add(Dense(7))
    
    new_model.add(Dropout(1))
    
    new_model.add(Dense(12, input_dim=2048, activation='softmax'))
    
    return new_model

In [19]:
# initialize our base model
base_model = InceptionV3()

# get the new modified model
new_model = get_model(base_model)

# compile the model with optimizer, loss function and add a metrics for accuracy
new_model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

In [12]:
# training labels file path
training_labels_path = "training.csv"

# create a dataframe object by loading that file using pandas
df = pd.read_csv(training_labels_path)

print(df)

# store the image names which have protest label set
train_image_names = {}

# iterate over the file and modify the file name to "type/filename". Here type is either train or test.
# this will help us in further mapping labels and images with each other
for index, item in df.iterrows():
    
    if item['protest']:
        name = "train/" + item['fname']
        train_image_names[name] = True
        
# test labels file path
test_labels_path = "test.csv"

# read the test csv file
test_df = pd.read_csv(test_labels_path)

# store the test image names which have protest label set
test_image_names = {}

# iterate over the file and modify the file name to "type/filename". Here type is either train or test.
# this will help us in further mapping labels and images with each other
for index, item in test_df.iterrows():
    
    if item['protest']:
        name = "test/" + item['fname']
        
        test_image_names[name] = True

# path of training images folder
training_images_path = "train/"

# path of test images folder
test_images_path = "test/"

# get all the training labels
train_labels = get_labels(training_labels_path)

# get all the training images
train_images = get_images(training_images_path, train_image_names, True)

# get all the test labels
test_labels = get_labels(test_labels_path)

# get all the test images
test_images = get_images(test_images_path, test_image_names, False)

# size of our training images dataset
train_size = len(train_images)

# size of our test images dataset
test_size = len(test_images)

# reshaped train images
reshaped_train_images = reshape_images(train_images, train_size)

# reshaped test images
reshaped_test_images = reshape_images(test_images, test_size)

                 fname  protest     violence sign photo fire police children  \
0      train-00000.jpg        0            -    -     -    -      -        -   
1      train-00001.jpg        0            -    -     -    -      -        -   
2      train-00002.jpg        0            -    -     -    -      -        -   
3      train-00003.jpg        0            -    -     -    -      -        -   
4      train-00004.jpg        0            -    -     -    -      -        -   
5      train-00005.jpg        1  0.348705716    1     0    0      0        0   
6      train-00006.jpg        0            -    -     -    -      -        -   
7      train-00007.jpg        0            -    -     -    -      -        -   
8      train-00008.jpg        0            -    -     -    -      -        -   
9      train-00009.jpg        0            -    -     -    -      -        -   
10     train-00010.jpg        1  0.153150543    1     1    0      0        0   
11     train-00011.jpg        0         

{'test/test-00000.jpg': True, 'test/test-00002.jpg': True, 'test/test-00010.jpg': True, 'test/test-00013.jpg': True, 'test/test-00020.jpg': True, 'test/test-00028.jpg': True, 'test/test-00029.jpg': True, 'test/test-00030.jpg': True, 'test/test-00037.jpg': True, 'test/test-00041.jpg': True, 'test/test-00047.jpg': True, 'test/test-00049.jpg': True, 'test/test-00057.jpg': True, 'test/test-00058.jpg': True, 'test/test-00075.jpg': True, 'test/test-00078.jpg': True, 'test/test-00084.jpg': True, 'test/test-00090.jpg': True, 'test/test-00091.jpg': True, 'test/test-00092.jpg': True, 'test/test-00097.jpg': True, 'test/test-00102.jpg': True, 'test/test-00106.jpg': True, 'test/test-00110.jpg': True, 'test/test-00115.jpg': True, 'test/test-00116.jpg': True, 'test/test-00120.jpg': True, 'test/test-00124.jpg': True, 'test/test-00126.jpg': True, 'test/test-00128.jpg': True, 'test/test-00130.jpg': True, 'test/test-00133.jpg': True, 'test/test-00136.jpg': True, 'test/test-00138.jpg': True, 'test/test-00

[[[[0.3019608  0.36078432 0.        ]
   [0.3529412  0.40784314 0.0627451 ]
   [0.33333334 0.38431373 0.0627451 ]
   ...
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]

  [[0.5137255  0.54509807 0.2509804 ]
   [0.3254902  0.36078432 0.05098039]
   [0.34901962 0.38431373 0.05882353]
   ...
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]

  [[0.8039216  0.8156863  0.60784316]
   [0.5294118  0.54901963 0.28627452]
   [0.3764706  0.40392157 0.09019608]
   ...
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]

  ...

  [[0.72156864 0.7607843  0.7254902 ]
   [0.6313726  0.6901961  0.6784314 ]
   [0.42352942 0.53333336 0.54901963]
   ...
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]

  [[0.7137255  0.74509805 0.7019608 ]
   [0.7

In [21]:
# fit our training images dataset
new_model.fit(reshaped_train_images, np.array(train_labels), epochs=5, shuffle=True)

# save the state of our model, so that we won't need to run it again from scratch in future
new_model.save('news_image_with_dropout.h5')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [30]:
# predict label of our test images
predict = new_model.predict(reshaped_test_images, verbose=1)



In [31]:
# calculate accuracy of our model
new_model.evaluate(reshaped_test_images, np.array(test_labels), verbose=0)

RuntimeError: You must compile a model before training/testing. Use `model.compile(optimizer, loss)`.