# Crime Detection
This notebook implements a method to detect abnormal movements to catch various types of crimes in progress.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2

import datetime
import sys
import os

from tqdm import tqdm_notebook as tqdm

## Common parameters

In [2]:
# number of coordinates
number_of_coordinates = 17  # MS COCO

# input shape of 1 pose/frame
input_shape = (2, number_of_coordinates)

# length of a training sequence in frames
number_of_frames = 300

## Model definition

In [3]:
import keras
from keras.models import Model, Sequential
from keras.layers import Dense, Input

Using TensorFlow backend.


### Autoencoder

In [4]:
# TODO: extend with variational layers for better estimations of "unknown" positions
def get_autoencoder(input_dimension, sizes, activation="relu", is_variational=False, verbose=False):
    inputs = Input(shape=input_dimension, name="encoder_input")
    layer = inputs
    for i, size in enumerate(sizes):
        layer = Dense(size, activation=activation, name="encoder_" + str(i))(layer)
    encoder = Model(inputs=inputs, outputs=layer)
    
    if verbose:
        encoder.summary()
        
    bottleneck_dimension = tuple(list(input_dimension[:-1]) + [sizes[-1]])
    
    encoded_inputs = Input(shape=bottleneck_dimension, name="decoder_input")
    layer = encoded_inputs
    for i, size in enumerate(reversed(sizes[:-1])):
        layer = Dense(size, activation=activation, name="decoder_" + str(i))(layer)
    outputs = Dense(input_dimension[1], activation="linear")(layer)
    decoder = Model(inputs=encoded_inputs, outputs=outputs)
    
    if verbose:
        decoder.summary()
        
    model = Model(inputs=encoder.inputs, outputs=decoder(encoder.outputs))
    
    if verbose:
        model.summary()
        
    return model, encoder, decoder

In [5]:
# layer_sizes = [30, 20, 10]
layer_sizes = [256, 128, 64, 10]
autoencoder, encoder, decoder = get_autoencoder(input_shape, layer_sizes, is_variational=False, verbose=True)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   (None, 2, 17)             0         
_________________________________________________________________
encoder_0 (Dense)            (None, 2, 256)            4608      
_________________________________________________________________
encoder_1 (Dense)            (None, 2, 128)            32896     
_________________________________________________________________
encoder_2 (Dense)            (None, 2, 64)             8256      
_________________________________________________________________
encoder_3 (Dense)            (None, 2, 10)             650       
Total params: 46,410
Trainable params: 46,410
Non-trainable params: 0
_________________________________________________________________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
decode

### Time-distributed recurrent layer for coordinates coming from separate autoencoders for each time step

In [6]:
from keras.layers import Reshape
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import TimeDistributed

# test_autoencoder_only = True
test_autoencoder_only = False

def get_sequence_model(inner_model, frame_dimension, number_of_frames=30, lstm_activation="tanh", verbose=False):

    input_dimension = tuple([number_of_frames] + list(frame_dimension))
    inputs = Input(shape=input_dimension)

    # wrap an autoencoder inside a time-distributed layer for each time-step
    repeating_model = TimeDistributed(inner_model)(inputs)
    
    # reshape coordinates from (X, Y) to (X concat Y) for LSTM
    repeating_shape = repeating_model.get_shape().as_list()
    repeating_shape = tuple(repeating_shape[1:-2] + [repeating_shape[-2] * repeating_shape[-1]])
    
    sequence_layer = Reshape(repeating_shape)(repeating_model)
    

#     outputs = lstm_layer
    if test_autoencoder_only:
        outputs = Reshape(input_dimension)(sequence_layer)  # to debug autoencoder
    else:
        # put each autoencoder output to a LSTM with as many units as frames in a sequence
        lstm_layer = LSTM(units=2 * number_of_coordinates, activation=lstm_activation, return_sequences=True)(sequence_layer)
#         lstm_layer = LSTM(units=2 * number_of_coordinates, activation=lstm_activation, return_sequences=True)(lstm_layer)
#         lstm_layer = LSTM(units=2 * number_of_coordinates, activation=lstm_activation, return_sequences=True)(lstm_layer)
#         lstm_layer = LSTM(units=2 * number_of_coordinates, activation=lstm_activation, return_sequences=True)(lstm_layer)
#         lstm_layer = LSTM(units=2 * number_of_coordinates, return_sequences=True, activation="linear")(lstm_layer)
        outputs = Reshape(input_dimension)(lstm_layer)
        outputs = Dense(number_of_coordinates, activation="linear")(outputs)  # regression layer

    model = Model(inputs=inputs, outputs=outputs)
    if verbose:
        model.summary()

    return model

In [7]:
model = get_sequence_model(autoencoder, (input_shape), number_of_frames=number_of_frames, verbose=True)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 300, 2, 17)        0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 300, 2, 17)        92827     
_________________________________________________________________
reshape_1 (Reshape)          (None, 300, 34)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 300, 34)           9384      
_________________________________________________________________
reshape_2 (Reshape)          (None, 300, 2, 17)        0         
_________________________________________________________________
dense_2 (Dense)              (None, 300, 2, 17)        306       
Total params: 102,517
Trainable params: 102,517
Non-trainable params: 0
_________________________________________________________________


## Data preparation

In [8]:
sequences_df = pd.read_csv("sequences.csv", sep=",", header=0, index_col=None)
print(len(sequences_df), "records")
sequences_df.head(10)

356460 records


Unnamed: 0,sequence_id,step,frame,image,idx,score,x0,y0,c0,x1,...,c13,x14,y14,c14,x15,y15,c15,x16,y16,c16
0,0,0,0,001_00001.png,1,2.442609,70.526169,197.410065,0.830597,74.095299,...,0.005064,84.802704,210.794312,0.022095,89.264114,193.840927,0.00665,95.510101,239.347382,0.006375
1,0,1,1,001_00002.png,1,2.442101,70.526924,197.422592,0.830311,74.097595,...,0.004961,84.809608,210.812622,0.022006,89.272949,193.851929,0.00661,95.521622,239.377991,0.006519
2,0,2,2,001_00003.png,1,2.446754,70.57283,197.566177,0.835175,74.161758,...,0.004928,84.928528,211.024643,0.022225,89.414688,193.977249,0.006271,95.695312,239.736038,0.006782
3,0,3,3,001_00004.png,1,2.447801,70.581573,197.574753,0.83555,74.172974,...,0.005162,84.947159,211.042496,0.022532,89.436409,193.983353,0.006302,95.721344,239.773666,0.006585
4,0,4,4,001_00005.png,1,2.447962,70.584229,197.580795,0.835475,74.175385,...,0.005158,84.948845,211.047623,0.022524,89.43779,193.989639,0.006308,95.722305,239.77684,0.006585
5,0,5,5,001_00006.png,1,2.448433,70.515854,197.582993,0.836206,74.11586,...,0.00521,84.915863,211.082993,0.022336,89.415863,193.982986,0.006287,95.715866,239.88298,0.006673
6,0,6,6,001_00007.png,1,2.448457,70.515572,197.58284,0.836206,74.115509,...,0.00521,84.915337,211.082626,0.022336,89.415268,193.982895,0.006287,95.715164,239.882156,0.006673
7,0,7,7,001_00008.png,1,2.448457,70.515572,197.58284,0.836206,74.115509,...,0.00521,84.915337,211.082626,0.022336,89.415268,193.982895,0.006287,95.715164,239.882156,0.006673
8,0,8,8,001_00009.png,1,2.44854,70.51564,197.58223,0.836206,74.115555,...,0.00521,84.915268,211.081894,0.022336,89.415146,193.982346,0.006287,95.714996,239.881134,0.006673
9,0,9,9,001_00010.png,1,2.448607,70.514908,197.587784,0.836206,74.114944,...,0.00521,84.91507,211.087936,0.022336,89.415123,193.987747,0.006287,95.715187,239.888275,0.006673


### Extract all sequences with at least ${number_of_frames} steps

In [9]:
# get all counts
counts_df = sequences_df.groupby(["sequence_id"], as_index=False).count().loc[:, ["sequence_id", "step"]]
counts_df.head(10)

Unnamed: 0,sequence_id,step
0,0,544
1,1,544
2,2,35
3,3,6
4,4,51
5,5,6
6,6,3
7,7,395
8,8,6
9,9,3


In [10]:
# extract all sequence IDs with at least ${number_of_frames} steps
training_sequences_df = counts_df.loc[counts_df["step"] >= number_of_frames].loc[:, ["sequence_id"]]
training_sequences_df.head(10)

Unnamed: 0,sequence_id
0,0
1,1
7,7
10,11
12,13
15,16
18,19
19,21
20,22
21,23


In [11]:
print("Found", len(training_sequences_df), "suitable sequences for training.")

Found 269 suitable sequences for training.


In [12]:
# join selected sequences and original dataset
training_df = pd.merge(training_sequences_df, sequences_df, how="inner", on=["sequence_id"])
training_df.sort_values(["sequence_id", "step"], ascending=True, inplace=True)
training_df.head(10)

Unnamed: 0,sequence_id,step,frame,image,idx,score,x0,y0,c0,x1,...,c13,x14,y14,c14,x15,y15,c15,x16,y16,c16
0,0,0,0,001_00001.png,1,2.442609,70.526169,197.410065,0.830597,74.095299,...,0.005064,84.802704,210.794312,0.022095,89.264114,193.840927,0.00665,95.510101,239.347382,0.006375
1,0,1,1,001_00002.png,1,2.442101,70.526924,197.422592,0.830311,74.097595,...,0.004961,84.809608,210.812622,0.022006,89.272949,193.851929,0.00661,95.521622,239.377991,0.006519
2,0,2,2,001_00003.png,1,2.446754,70.57283,197.566177,0.835175,74.161758,...,0.004928,84.928528,211.024643,0.022225,89.414688,193.977249,0.006271,95.695312,239.736038,0.006782
3,0,3,3,001_00004.png,1,2.447801,70.581573,197.574753,0.83555,74.172974,...,0.005162,84.947159,211.042496,0.022532,89.436409,193.983353,0.006302,95.721344,239.773666,0.006585
4,0,4,4,001_00005.png,1,2.447962,70.584229,197.580795,0.835475,74.175385,...,0.005158,84.948845,211.047623,0.022524,89.43779,193.989639,0.006308,95.722305,239.77684,0.006585
5,0,5,5,001_00006.png,1,2.448433,70.515854,197.582993,0.836206,74.11586,...,0.00521,84.915863,211.082993,0.022336,89.415863,193.982986,0.006287,95.715866,239.88298,0.006673
6,0,6,6,001_00007.png,1,2.448457,70.515572,197.58284,0.836206,74.115509,...,0.00521,84.915337,211.082626,0.022336,89.415268,193.982895,0.006287,95.715164,239.882156,0.006673
7,0,7,7,001_00008.png,1,2.448457,70.515572,197.58284,0.836206,74.115509,...,0.00521,84.915337,211.082626,0.022336,89.415268,193.982895,0.006287,95.715164,239.882156,0.006673
8,0,8,8,001_00009.png,1,2.44854,70.51564,197.58223,0.836206,74.115555,...,0.00521,84.915268,211.081894,0.022336,89.415146,193.982346,0.006287,95.714996,239.881134,0.006673
9,0,9,9,001_00010.png,1,2.448607,70.514908,197.587784,0.836206,74.114944,...,0.00521,84.91507,211.087936,0.022336,89.415123,193.987747,0.006287,95.715187,239.888275,0.006673


In [13]:
print(len(training_df), "training records")

182981 training records


## Put individual sequences into a dictionary

In [14]:
coordinate_columns = ['x', 'y']
coordinate_columns = [x + str(i) for x in coordinate_columns for i in range(number_of_coordinates)]
print(coordinate_columns)

['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x10', 'x11', 'x12', 'x13', 'x14', 'x15', 'x16', 'y0', 'y1', 'y2', 'y3', 'y4', 'y5', 'y6', 'y7', 'y8', 'y9', 'y10', 'y11', 'y12', 'y13', 'y14', 'y15', 'y16']


In [15]:
# dictionary containing a list of tuples of a list of coordinates and a bounding box for each step
training_sequences = {}

# bounding box of the whole sequence for repositioning/augmentation
sequence_boundaries = {}

LIMIT = 1E10

total = 0

progress_bar = tqdm(total=len(training_df))

previous_sequence_id = training_df.iloc[0]["sequence_id"]

min_x = min_y = LIMIT
max_x = max_y = -LIMIT

for i, record in training_df.iterrows():
    coordinates = record[coordinate_columns].values
#     print(coordinates)
    bounding_box = [np.min(coordinates[:number_of_coordinates]),  # min x, top left
                    np.min(coordinates[number_of_coordinates:]),  # min y
                    np.max(coordinates[:number_of_coordinates]),  # max x, bottom right
                    np.max(coordinates[number_of_coordinates:])]  # max y
#     print(bounding_box)
    sequence_id = record['sequence_id']

    if sequence_id != previous_sequence_id:
        sequence_boundaries[previous_sequence_id] = [min_x, min_y, max_x, max_y]
        min_x = min_y = LIMIT
        max_x = max_y = -LIMIT
        previous_sequence_id = sequence_id
        
    if sequence_id not in training_sequences:
        training_sequences[sequence_id] = []
    steps = training_sequences[sequence_id]
    steps.append((coordinates, bounding_box))
    
    min_x, min_y = min(min_x, bounding_box[0]), min(min_y, bounding_box[1])
    max_x, max_y = max(max_x, bounding_box[2]), max(max_y, bounding_box[3])
    
    total += 1
    
    progress_bar.update(1)
    
sequence_boundaries[previous_sequence_id] = [min_x, min_y, max_x, max_y]

progress_bar.close()

print(len(training_sequences.keys()),"training sequences with", total, "steps and", len(sequence_boundaries), "boundaries")

HBox(children=(IntProgress(value=0, max=182981), HTML(value='')))


269 training sequences with 182981 steps and 269 boundaries


## Augmenting generator

In [16]:
from keras.utils import Sequence

class Single_Track_Generator(Sequence):
    
    def __init__(self, sequences, sequence_boundaries, number_of_frames, batch_size=1, 
                 randomized=True, randomize_positions=False, randomize_geometry=False):
        """
        Initializes generator
        
        Arguments:
        sequences -- training sequences dictionary, sequence_id -> [(coordinates, bounding box) for each step]
        sequence_boundaries -- dictionary with bounding boxes of whole sequences, sequence_id -> bounding box
        number_of_frames -- number of frames for a single training sample, e.g. 300 frames
        batch_size -- training batch size
        randomized -- if true, order of sequences is randomized in each epoch
        """
        self.sequences = sequences
        self.number_of_frames = number_of_frames
        # TODO: allow/test batch_size > 1
        self.batch_size = batch_size
        self.ids = list(sequences.keys())
        self.dimension = len(sequences[self.ids[0]][0][0]) // 2  # length of coordinates
        self.randomized = randomized
        if self.randomized:
            self.ids = np.random.permutation(self.ids)
        self.index = 0
        self.length = None
        self.steps = {}  # index to sequence_id + offset for each training step

    def __len__(self):
        """
        Returns number of batches during a single full pass
        """
        offset = 0
        if self.length is None:
            self.length = 0
            for id in self.ids:
                number_of_subsequences = len(self.sequences[id]) - self.number_of_frames + 1
                self.length += number_of_subsequences
                # for each possible subsequence of a sequence add a new (id, starting step) record
                # to allow random batch retrieval with arbitrary size
                for i in range(number_of_subsequences):
                    self.steps[offset] = (id, i)  # (sequence_id, offset)
                    offset += 1
            self.length = np.floor(self.length / float(self.batch_size))  # skip last incomplete batch
            self.length = int(self.length)
                
        return self.length

    def __getitem__(self, idx):
        """
        Receives a new training batch with index idx
        """
        x = np.ndarray((self.batch_size, self.number_of_frames, 2, self.dimension), dtype=np.float32)
        index = idx * self.batch_size
        for j in range(self.batch_size):
            for i in range(self.number_of_frames):
                sequence, step = self.steps[index]
                coordinates = self.augment_coordinates(self.sequences[sequence][step][0][:])
                x[j, i, :, :] = np.reshape(coordinates, (2, self.dimension))
        
        return np.array([x, x])
    
    def augment_coordinates(self, coordinates):
        """
        Augments coordinates for training
        Arguments:
        coordinates -- all x coordinates followed by all y coordinates
        """
        # TODO: finish random coordinate augmentation
        # horizontal flipping
        # aspect ratio adjustment (constant or changing vertical size)
        # move coordinates anywhere on the "screen" based on whole sequence's bounding box
        # normalize coordinates to [0, 1]x[0, 1]
        result = coordinates
        return result
    
    def on_epoch_end(self):
        """
        Called on epoch end. If randomized is set to true, it shuffles sequence IDs for the next pass
        """
        if self.randomized:
            self.ids = np.random.permutation(self.ids)
        self.index = 0
        self.length = None
        self.__len__()
        

## Train the model

### Hyperparameters

### Training

In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TensorBoard
from keras.optimizers import Adam

# take only a first few sequences to have reasonable training time on a single GPU for testing/demo purposes
generator_sequences = training_sequences
ids = list(training_sequences.keys())[:20]
generator_sequences = {id: training_sequences[id] for id in ids}

generator = Single_Track_Generator(generator_sequences, sequence_boundaries, number_of_frames, BATCH_SIZE)

optimizer = Adam(lr=LEARNING_RATE)
model.compile(optimizer=optimizer, loss="mean_squared_error")

reduce_lr = ReduceLROnPlateau(monitor="loss", factor=0.1, patience=3, verbose=1, mode="min", min_lr=MINIMAL_LEARNING_RATE)  # Hmm, doesn't work with LSTM - why?
early_stopping = EarlyStopping(monitor="loss", patience=8, verbose=1)
checkpoint = ModelCheckpoint(BEST_MODEL_NAME, monitor="loss", verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)

history = model.fit_generator(generator,
                              epochs=EPOCHS,
                              workers=WORKERS,
#                               callbacks=[early_stopping, checkpoint],
                              callbacks=[reduce_lr, early_stopping, checkpoint],
                              verbose=1,
                             )

Epoch 1/100

Epoch 00001: loss improved from inf to 17409.09570, saving model to crime_detection_best_model.h5
Epoch 2/100

Epoch 00002: loss improved from 17409.09570 to 10807.76309, saving model to crime_detection_best_model.h5
Epoch 3/100

Epoch 00003: loss improved from 10807.76309 to 10285.84162, saving model to crime_detection_best_model.h5
Epoch 4/100

Epoch 00004: loss improved from 10285.84162 to 8503.13080, saving model to crime_detection_best_model.h5
Epoch 5/100

Epoch 00005: loss improved from 8503.13080 to 6928.01842, saving model to crime_detection_best_model.h5
Epoch 6/100

Epoch 00006: loss improved from 6928.01842 to 6072.69465, saving model to crime_detection_best_model.h5
Epoch 7/100

Epoch 00007: loss improved from 6072.69465 to 5581.34085, saving model to crime_detection_best_model.h5
Epoch 8/100

Epoch 00008: loss improved from 5581.34085 to 5283.36814, saving model to crime_detection_best_model.h5
Epoch 9/100

Epoch 00009: loss improved from 5283.36814 to 5082.8

### Training progress

In [None]:
print("Available statistics:", ", ".join(list(history.history.keys())))
# accuracy
if 'acc' in history.history.keys():
    plt.plot(history.history['acc'])
    if 'val_acc' in history.history.keys():
        plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
# loss
if 'loss' in history.history.keys():
    plt.plot(history.history['loss'])
    if 'val_loss' in history.history.keys():
        plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

### Evaluate best model

In [None]:
from keras.models import load_model

def get_training_instance(sequences, id, index=0, dimension=number_of_coordinates, length=number_of_frames):
    sequence = sequences[id]
    result = np.ndarray((length, 2, dimension))
    for i in range(length):
        coordinates = sequence[index + i][0]
        result[i, :, :] = np.reshape(coordinates, (2, dimension))
    return result

best_model = load_model(BEST_MODEL_NAME)

instance = get_training_instance(generator_sequences, ids[0])
print("Original coordinates:")
print(instance)

prediction = best_model.predict(np.array([instance]))
print()
print("Predicted coordinates:")
print(prediction)

print()
mse = ((instance - prediction)**2).mean()
rmse = np.sqrt(mse)
print("Error (MSE):", mse)
print("On average, predicted coordinates were off by {0:.2f} across all frames in sequence".format(rmse))