In [83]:
import numpy as np
import keras
import os
import tensorflow as tf
import pandas as pd

from keras.preprocessing import image
from keras.applications.mobilenet import MobileNet
from keras.applications.vgg16 import VGG16
from keras.applications.inception_v3 import InceptionV3

from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Sequential
from keras import backend as K

from PIL import Image

In [84]:
def preprocess(img):
    
    width, height = img.shape[0], img.shape[1]
    
    img = image.array_to_img(img, scale=False)
    
    desired_width, desired_height = 224, 224
    
    if width < desired_width:
        desired_width = width
    if height < desired_height:
        desired_height = height
    
    start_x = np.maximum(0, int((width-desired_width)//2))
    
    img = img.crop((start_x, np.maximum(0, height-desired_height), start_x + desired_width, height))
    
    img = img.resize((224, 224))
    
    img = image.img_to_array(img)
    
    return img / 255  
  

In [85]:
def get_images(path, correct_images, is_train=True):
    
    files = os.listdir(path)
    
    files = list(map(lambda x: os.path.join(path, x), files)) # load all the training images
    
    if is_train:
        filtered_files = [image for image in files if image in correct_images]
    else:
        filtered_files = [image for image in files if image in correct_images]
    
    
    train_images = filtered_files
    
    images = []
    for item in train_images:
        img = image.load_img(item)
        img = image.img_to_array(img)
        img = preprocess(img)
        images.append(img)
    
    return images

In [86]:
def convert_labels_to_numpy_array(items): 
    values = []
    
    for index, row in items.iterrows():

        cats = {
                'protest': True,
                'violence': True,
                'sign': True,
                'photo': True,
                'fire': True,
                'police': True,
                'children': True,
                'group_20': True,
                'group_100': True,
                'flag': True,
                'night': True,
                'shouting': True
               }

        if row['protest']:
            
            array = []
            
            for cat in cats:
                f = float(row[cat])
                array.append(f)
            
            narray = np.array(array)
        
            values.append(narray)

    return values

In [87]:
def get_labels(path):
    
    labels = pd.read_csv(path) # load csv of text data provided
    
    ll = convert_labels_to_numpy_array(labels)
 
    train_labels = ll    # train only in 20 images first
    
    return train_labels

In [88]:
def reshape_images(images, size):
    return np.asarray(images).reshape(size,224,224,3)

In [89]:
def get_model(base_model):
    
    bottleneck_input = base_model.get_layer(index=0).input
    bottleneck_output = base_model.get_layer(index=-2).output
    
    bottleneck_model = Model(inputs=bottleneck_input, outputs=bottleneck_output)
    
    for layer in bottleneck_model.layers:
        
        layer.trainable = False
    
    new_model = Sequential()
    
    new_model.add(bottleneck_model)
    
    new_model.add(Dense(12, input_dim=2048, activation='softmax'))
    
    return new_model

In [90]:
base_model = InceptionV3()

new_model = get_model(base_model)

new_model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

In [91]:
training_labels_path = "training.csv"

df = pd.read_csv(training_labels_path)

train_image_names = {}

for index, item in df.iterrows():
    
    if item['protest']:
        name = "train/" + item['fname']
        train_image_names[name] = True


test_labels_path = "test.csv"

test_df = pd.read_csv(test_labels_path)

test_image_names = {}

for index, item in test_df.iterrows():
    
    if item['protest']:
        name = "test/" + item['fname']
        
        test_image_names[name] = True
        
training_images_path = "train/"

test_labels_path = "test.csv"

test_images_path = "test/"

train_labels = get_labels(training_labels_path)

train_images = get_images(training_images_path, train_image_names, True)

test_labels = get_labels(test_labels_path)

test_images = get_images(test_images_path, test_image_names, False)

train_size = len(train_images)

test_size = len(test_images)

reshaped_train_images = reshape_images(train_images, train_size)

reshaped_test_images = reshape_images(test_images, test_size)

In [None]:
new_model.fit(reshaped_train_images, np.array(train_labels), epochs=5, shuffle=True)

Epoch 1/5

In [None]:
predict = new_model.predict(reshaped_test_images, verbose=1)

In [None]:
new_model.evaluate(reshaped_test_images, np.array(test_labels), verbose=0)

In [None]:
# base_model = InceptionV3(weights='imagenet', include_top=False)

In [None]:
# x = base_model.output

# x = GlobalAveragePooling2D()(x)

# x = Dense(2048, activation='relu')(x)

# # and a logistic layer -- let's say we have 10 classes
# predictions = Dense(10, activation='softmax')(x)

# model = Model(inputs=base_model.input, outputs=predictions)

# for layer in base_model.layers:
#     layer.trainable = False
    
# model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

# model.fit(fimages, train_labels,
#                     steps_per_epoch=10000, epochs=10)

