# UCLA Results Validation

This notebook is aimed at attemping to validate the results achieved by UCLA in their paper

In [86]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [87]:
%reload_ext autoreload

In [88]:
from protestDB import cursor
from protestDB import models
from lib import analysis_utils as au
import pandas as pd
import random
import os
from PIL import Image
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.preprocessing.image import ImageDataGenerator
import scipy
from keras.models import Model
from keras import backend as K
from keras.applications import ResNet50
from keras.layers import Dense, Input, Flatten
import h5py

### Retrieve a sample of 500 images

In [89]:
# Take a sample of 500 images and their respective labels
pc = cursor.ProtestCursor()
imgs = pc.query(models.Images).filter(models.Images.source == 'UCLA')


Get the labels

In [90]:

# values of violence = 2 are gonna be masked
headers = ["fname", "protest", "violence", "sign", "photo", "fire", "police", "children", "group_20", "group_100", "flag", "night", "shouting"]
df = pd.DataFrame(columns=headers)
indx_non_violence = []

for indx, im in enumerate(imgs[:200]):
    tags = im.tags
    values = {}
    values['fname'] = im.name
    
    label = im.labels
    if len(label) == 1:
        values['violence'] = label[0].label
    elif len(label) == 0:
        values['violence'] = 0
        indx_non_violence.append(indx) # save the index of a image without label for later removing value
    else:
        raise ValueError("there should not be more than one label")
    
    for tag in tags:
        if tag.tagName == 'ucla-test': continue # ignore this
        values[tag.tagName] = 1
    
    df = df.append(values, ignore_index=True)




In [91]:
# Apply a cut point and normalize violence scores
cutpoint = 0.6

#cutpoint 
ix_large = df[df['violence'] > cutpoint].index
df.loc[ix_large, 'violence'] = cutpoint

# normalize
v = np.matrix(df['violence'])
scaler = MinMaxScaler()
df['violence'] = scaler.fit_transform(v.T)

# mask values that did not had violence labels
df.loc[indx_non_violence, 'violence'] = -1

In [92]:
df = df.fillna(0)
df

Unnamed: 0,fname,protest,violence,sign,photo,fire,police,children,group_20,group_100,flag,night,shouting
0,test-00000.jpg,1,0.632513,1,0.0,0.0,0.0,0.0,1,1,0.0,1,0.0
1,test-00001.jpg,0,2.000000,0,0.0,0.0,0.0,0.0,0,0,0.0,0,0.0
2,test-00002.jpg,1,0.370518,1,0.0,0.0,0.0,0.0,0,0,0.0,0,0.0
3,test-00003.jpg,0,2.000000,0,0.0,0.0,0.0,0.0,0,0,0.0,0,0.0
4,test-00004.jpg,0,2.000000,0,0.0,0.0,0.0,0.0,0,0,0.0,0,0.0
5,test-00005.jpg,0,2.000000,0,0.0,0.0,0.0,0.0,0,0,0.0,0,0.0
6,test-00006.jpg,0,2.000000,0,0.0,0.0,0.0,0.0,0,0,0.0,0,0.0
7,test-00007.jpg,0,2.000000,0,0.0,0.0,0.0,0.0,0,0,0.0,0,0.0
8,test-00008.jpg,0,2.000000,0,0.0,0.0,0.0,0.0,0,0,0.0,0,0.0
9,test-00009.jpg,0,2.000000,0,0.0,0.0,0.0,0.0,0,0,0.0,0,0.0


Get the images

In [93]:
image_dir = "../images/"
np_imgs = []
for indx, row in df.iterrows():
    path = os.path.join(image_dir, row['fname'])
    img = Image.open(path)
    img_array = np.array(img)
    np_imgs.append(img_array)
np_imgs = np.array(np_imgs)

In [94]:
np_imgs.shape

(200,)

In [95]:
# Resize images
new_shape = (224,224,3)
np_imgs_resized = np.empty(shape=(np_imgs.shape[0],)+new_shape)
for idx in range(np_imgs.shape[0]):
    np_imgs_resized[idx] = scipy.misc.imresize(np_imgs[idx], new_shape)

`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  """


In [96]:
np_imgs_resized.shape

(200, 224, 224, 3)

Select training and validation sets

In [97]:
train_X = np_imgs_resized[:100]; test_X = np_imgs_resized[100:200]
print("train and test image shapes", train_X.shape, test_X.shape)

train_protest = np.array(df['protest'])[0:100] ;test_protest = np.array(df['protest'])[100:200]
print("protest train  and test shapes", train_protest.shape, test_protest.shape)

train_violence = np.array(df['violence'])[0:100] ;test_violence = np.array(df['violence'])[100:200]
print("violence train  and test shapes", train_violence.shape, test_violence.shape)

train_visual = np.array(df.iloc[:,3:])[0:100] ;test_visual = np.array(df.iloc[:,3:])[100:200]
print("violence train  and test shapes", train_visual.shape, test_visual.shape)

train and test image shapes (100, 224, 224, 3) (100, 224, 224, 3)
protest train  and test shapes (100,) (100,)
violence train  and test shapes (100,) (100,)
violence train  and test shapes (100, 10) (100, 10)


### Some tests

Functions

In [98]:
def generate_data_generator(generator, imgs, protest, violence):
    genImgs = generator.flow(imgs, seed=7)
    genProtest = generator.flow(protest, seed=7)
    genViolence = generator.flow(violence, seed=7)
    while True:
            x = genImgs.next()
            y_protest = genProtest.next()
            y_violence = genViolence.next()
            yield x, [y_protest, y_violence]
            


In [99]:
# test generator
violence_values = np.array(df['violence'])
generator = ImageDataGenerator(vertical_flip=True)

generator.fit(np_imgs_resized)
gen_iter = generator.flow(np_imgs_resized, np.array([violence_values,violence_values]).reshape(200,2) )
for i in range(5):
    n = gen_iter.next()
    print(n[0].shape)
    print(n[1].shape)

(32, 224, 224, 3)
(32, 2)
(32, 224, 224, 3)
(32, 2)
(32, 224, 224, 3)
(32, 2)
(32, 224, 224, 3)
(32, 2)
(32, 224, 224, 3)
(32, 2)


### Modeling

In [100]:
n_images = np_imgs_resized.shape[0]

In [101]:
# testing it works
resnet_model = ResNet50(include_top=False, weights = None, input_shape = (224,224,3))
features = resnet_model.predict(np_imgs_resized[0].reshape(1,224,224,3))
print(features.shape)

(1, 1, 1, 2048)


In [102]:
img_input = Input(shape=(224,224,3), name='img_input')
resnet_model = ResNet50(include_top=False, weights = None) (img_input)
flatten = Flatten()(resnet_model)
protest_out = Dense(1, activation='sigmoid', name='protest_out')(flatten)
visual_out = Dense(10, activation='sigmoid', name='visual_out')(flatten)
violence_out = Dense(1, activation='linear', name='violence_out')(flatten)

model = Model(inputs= img_input, outputs=[protest_out, visual_out, violence_out])
model.compile(optimizer='rmsprop',
              loss={'protest_out': 'binary_crossentropy', 
                    'visual_out': 'binary_crossentropy', 
                    'violence_out': 'mean_squared_error'},
              loss_weights={'protest_out': 1., 
                            'visual_out': 1, 
                            'violence_out': 1 })
model.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
img_input (InputLayer)          (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
resnet50 (Model)                multiple             23587712    img_input[0][0]                  
__________________________________________________________________________________________________
flatten_4 (Flatten)             (None, 2048)         0           resnet50[1][0]                   
__________________________________________________________________________________________________
protest_out (Dense)             (None, 1)            2049        flatten_4[0][0]                  
__________________________________________________________________________________________________
visual_out

In [103]:
model.fit({'img_input': train_X},
          {'protest_out': train_protest, 'visual_out': train_visual, 'violence_out': train_violence},
          epochs=50, batch_size=32, validation_split=0.5)

Train on 50 samples, validate on 50 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50


KeyboardInterrupt: 