In [1]:
import itertools
import argparse
import numpy as np
import pandas as pd
import sklearn
import sklearn.metrics
import time
import os
import sys
import gc
import tempfile
import keras.optimizers
import random
import cv2
from math import ceil
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
import ast
import h5py
from keras.utils import np_utils
from itertools import tee
import keras.backend as K
import tensorflow as tf
from tensorflow.python.framework.graph_util import convert_variables_to_constants
from IPython.display import display, clear_output

Using TensorFlow backend.


# noscope/DataUtils

In [2]:
def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = tee(iterable)
    next(b, None)
    return zip(a, b)

def nth_elem(list, n):
    return np.array([list[i] for i in xrange(0, len(list), n)])

# def get_labels(csv_fname, limit=None, interval=1, start=0, labels=['person', 'bus', 'car']):
#     df = pd.read_csv(csv_fname)
#     df = df[df['frame'] >= start]
#     df = df[df['frame'] < start + limit]
#     df['frame'] -= start
#     df = df[df['object_name'].isin(labels)]
#     groups = df.set_index('frame')
#     return groups

def get_raw_counts(csv_fname, OBJECTS=['person'], limit=None, interval=1, start=0):
    labels = get_labels(csv_fname, interval=interval, limit=limit, start=start)
    counts = np.zeros( (len(labels), len(OBJECTS)), dtype='uint8' )
    for i, label in enumerate(labels):
        for j, obj in enumerate(OBJECTS):
            counts[i, j] = sum(map(lambda x: 1 if x['object_name'] == obj else 0, label))
    return counts

# FIXME: efficiency
def get_counts(csv_fname, OBJECTS=['person'], limit=None, interval=1, start=0):
    labels = get_labels(csv_fname, interval=interval, limit=limit, start=start)
    counts = np.zeros( (len(labels), len(OBJECTS)), dtype='float' )
    for i, label in enumerate(labels):
        for j, obj in enumerate(OBJECTS):
            counts[i, j] = max([0] + \
                    map(lambda x: x['confidence'] if x['object_name'] == obj else 0, label))
    return counts

def get_differences(csv_fname, OBJECT, limit=None, interval=1, delay=1):
    def sym_diff(first, second):
        first_objs = set(x['object_name'] for x in first if x['object_name'] == OBJECT)
        second_objs = set(x['object_name'] for x in second if x['object_name'] == OBJECT)
        return len(first_objs.symmetric_difference(second_objs)) > 0

    labels = get_labels(csv_fname, limit=limit, interval=interval, start=delay)
    return np.array([1 if sym_diff(labels[i], labels[i-delay]) else 0 for i in xrange(delay, limit, interval)])

def get_binary(csv_fname, OBJECTS=['person'], limit=None, start=0, WINDOW=30):
    df = pd.read_csv(csv_fname)
    df = df[df['object_name'].isin(OBJECTS)]
    groups = df.set_index('frame')
    counts = map(lambda i: i in groups.index, range(start, limit + start))
    counts = np.array(counts)

    smoothed_counts = np.convolve(np.ones(WINDOW), np.ravel(counts), mode='same') > WINDOW * 0.7
    print np.sum(smoothed_counts != counts), np.sum(smoothed_counts)
    smoothed_counts = smoothed_counts.reshape(len(counts), 1)
    counts = smoothed_counts
    return counts

def smooth_binary(counts):
    for i in xrange(1, len(counts) - 1):
        if counts[i][0] > 0:
            continue
        if counts[i - 1][0] > 0 and counts[i + 1][0] > 0:
            counts[i][0] = 1
    return counts

# Given X_train, X_test, center both by the X_train mean
def center_data(X_train, X_test):
    mean = np.mean(X_train, axis=0)
    return X_train - mean, X_test - mean

# Convert (frames, counts) into test, train
def to_test_train(all_frames, all_counts,
                  regression=False, center=True, dtype='float32', train_ratio=0.6):
    assert len(all_frames) == len(all_counts), 'Frame length should equal counts length'

    def split(arr):
        # 250 -> 100, 50, 100
        ind = int(len(arr) * train_ratio)
        if ind > 100000:
            ind = len(arr) - 100000
        return arr[:ind], arr[ind:]

    nb_classes = all_counts.max() + 1
    X = all_frames
    if regression:
        Y = np.array(all_counts)
    else:
        Y = np_utils.to_categorical(all_counts, nb_classes)

    if center:
        X_train, X_test = center_data(*split(X))
        X_train = X_train.astype(dtype)
        X_test = X_test.astype(dtype)
    else:
        X_train, X_test = split(X)
    Y_train, Y_test = split(Y)
    return X_train, X_test, Y_train, Y_test

def read_coco_dataset(coco_dir, object, resol=50):
    def read_hdf5_file(coco_dir, object, resol, data_type):
        fname = '%s/%s_%d_%s2014.h5' % (coco_dir, object, resol, data_type)
        h5f = h5py.File(fname, 'r')
        X = h5f['images'][:]
        Y = h5f['labels'][:].astype('uint8')
        # shuffle X and Y in unison
        rng_state = np.random.get_state()
        np.random.shuffle(X)
        np.random.set_state(rng_state)
        np.random.shuffle(Y)
        h5f.close()
        return X, Y

    X_train, Y_train = read_hdf5_file(coco_dir, object, resol, 'train')
    X_val, Y_val = read_hdf5_file(coco_dir, object, resol, 'val')

    assert np.max(Y_train) == np.max(Y_val)
    nb_classes = np.max(Y_train) + 1
    Y_train = np_utils.to_categorical(Y_train, nb_classes)
    Y_val = np_utils.to_categorical(Y_val, nb_classes)

    return X_train, Y_train, X_val, Y_val

def get_data(csv_fname, video_fname, binary=False, num_frames=None,
             regression=False, OBJECTS=['person'], resol=(50, 50),
             center=True, dtype='float32', train_ratio=0.6):
    def print_class_numbers(Y, nb_classes):
        classes = Y.argmax(axis=-1)
        for i in xrange(nb_classes):
            print 'class %d: %d' % (i, np.sum(classes == i))

    print '\tParsing %s, extracting %s' % (csv_fname, str(OBJECTS))
    if binary:
        all_counts = get_binary(csv_fname, limit=num_frames, OBJECTS=OBJECTS)
    else:
        all_counts = get_counts(csv_fname, limit=num_frames, OBJECTS=OBJECTS)
    print '\tRetrieving all frames from %s' % video_fname
    all_frames = get_all_frames(
            len(all_counts), video_fname, scale=resol, dtype=dtype)
    print '\tSplitting data into training and test sets'
    X_train, X_test, Y_train, Y_test = to_test_train(
            all_frames, all_counts, regression=regression,
            center=center, dtype=dtype, train_ratio=train_ratio)
    if regression:
        nb_classes = 1
        print '(train) mean, std: %f, %f' % \
            (np.mean(Y_train), np.std(Y_train))
        print '(test) mean, std: %f %f' % \
            (np.mean(Y_test), np.std(Y_test))
    else:
        nb_classes = all_counts.max() + 1
        print '(train) positive examples: %d, total examples: %d' % \
            (np.count_nonzero(Y_train.argmax(axis=-1)),
             len(Y_train))
        print_class_numbers(Y_train, nb_classes)
        print '(test) positive examples: %d, total examples: %d' % \
            (np.count_nonzero(Y_test.argmax(axis=-1)),
             len(Y_test))
        print_class_numbers(Y_test, nb_classes)

    print 'shape of image: ' + str(all_frames[0].shape)
    print 'number of classes: %d' % (nb_classes)

    data = (X_train, Y_train, X_test, Y_test)
    return data, nb_classes

def get_class_weights(Y_train, class_weight_factor=1.0):
    n_classes = max(Y_train) + 1
    class_multiplier = np.array([1.0*class_weight_factor, 1.0/class_weight_factor])
    class_weights = float(len(Y_train)) / (n_classes*np.bincount(Y_train)*class_multiplier)
    return dict(zip(range(n_classes), class_weights))

def output_csv(csv_fname, stats, headers):
    df = pd.DataFrame(stats, columns=headers)
    df.to_csv(csv_fname, index=False)

def confidences_to_csv(csv_fname, confidences, OBJECT):
    col_names = ['frame', 'labels']
    labels = map(lambda conf: [{'confidence': conf, 'object_name': OBJECT}],
                 confidences)
    # because past fuccboi DK make yolo_standalone 1-indexed
    frames = range(1, len(confidences) + 1)
    output_csv(csv_fname, zip(frames, labels), col_names)

# noscope/VideoUtils

In [3]:
def VideoIterator(video_fname, scale=None, start=0, frameset=None):
    cap = cv2.VideoCapture(video_fname)
    # Seeks to the Nth frame. The next read is the N+1th frame
    # In OpenCV 2.4, this is cv2.cv.CAP_PROP_POS_FRAMES (I think)
    cap.set(cv2.CAP_PROP_POS_FRAMES, start - 1)
    frame = 0
    frame_ind = -1
    if scale is not None:
        try:
            len(scale)
            resol = scale
            scale = None
        except:
            resol = None
    while frame is not None:
        frame_ind += 1
#         _, frame = cap.read()
        reteval = cap.grab()
        if not reteval:
            print 'Cannot grab next frame: ', frame_ind 
        if frameset is not None and frame_ind not in frameset:
            continue
        _, frame = cap.retrieve()
#         print 'video iter: ', frame_ind
        if scale is not None:
            frame = cv2.resize(frame, None, fx=scale, fy=scale, interpolation=cv2.INTER_NEAREST)
        elif resol is not None:
            frame = cv2.resize(frame, resol, interpolation=cv2.INTER_NEAREST)
        yield frame_ind, frame

def VideoHistIterator(video_fname, scale=None, start=0):
    from noscope.filters import ColorHistogram
    vid_it = VideoIterator(video_fname, scale=scale, start=start)
    frame = 0
    while frame is not None:
        frame_ind, frame = vid_it.next()
        hist = ColorHistogram.compute_histogram(frame)
        yield frame_ind, frame, hist

def get_all_frames(num_frames, video_fname, scale=None, start=0, dtype='float32', frameset=None):
    if video_fname[-4:] == '.bin':
        RESOL = (50, 50) # FIXME
        FRAME_SIZE = RESOL[0] * RESOL[0] * 3
        f = open(video_fname, 'rb')
        f.seek(start * FRAME_SIZE)
        frames = np.fromfile(f, dtype='uint8', count=num_frames * FRAME_SIZE)
        frames = frames.reshape((num_frames, RESOL[0], RESOL[1], 3))
        return frames.astype('float32') / 255.

    vid_it = VideoIterator(video_fname, scale=scale, start=start, frameset=frameset)

    _, frame = vid_it.next()
    frames = np.zeros( tuple([num_frames] + list(frame.shape)), dtype=dtype )
    frames[0, :] = frame

    for i in xrange(1, num_frames):
        _, frame = vid_it.next()
        frames[i, :] = frame

    if dtype == 'float32':
        frames /= 255.0

    return frames

# noscope/Models.py

In [4]:
computed_metrics = ['accuracy', 'mean_squared_error']

# In case we want more callbacks
def get_callbacks(model_fname, patience=2):
    return [ModelCheckpoint(model_fname)]
    return [EarlyStopping(monitor='loss',     patience=patience, min_delta=0.00001),
            EarlyStopping(monitor='val_loss', patience=patience + 2, min_delta=0.0001),
            ModelCheckpoint(model_fname, save_best_only=True)]

def get_loss(regression):
    if regression:
        return 'mean_squared_error'
    else:
        return 'categorical_crossentropy'

def get_optimizer(regression, nb_layers, lr_mult=1):
    if regression:
        return keras.optimizers.RMSprop(lr=0.001 / (1.5 * nb_layers) * lr_mult)
    else:
        return keras.optimizers.RMSprop(lr=0.001 * lr_mult)# / (5 * nb_layers))


def generate_conv_net_base(
        input_shape, nb_classes,
        nb_dense=128, nb_filters=32, nb_layers=1, lr_mult=1,
        kernel_size=(3, 3), stride=(1, 1),
        regression=False):
    assert nb_layers >= 0
    assert nb_layers <= 3
    model = Sequential()
    model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1],
                            border_mode='same',
                            input_shape=input_shape,
                            subsample=stride,
                            activation='relu'))
    model.add(Convolution2D(nb_filters, 3, 3, border_mode='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
#     model.add(Dropout(0.25))

    if nb_layers > 1:
        model.add(Convolution2D(nb_filters * 2, 3, 3, border_mode='same', activation='relu'))
        model.add(Convolution2D(nb_filters * 2, 3, 3, border_mode='same', activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
#         model.add(Dropout(0.25))

    if nb_layers > 2:
        model.add(Convolution2D(nb_filters * 4, 3, 3, border_mode='same', activation='relu'))
        model.add(Convolution2D(nb_filters * 4, 3, 3, border_mode='same', activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
#         model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(nb_dense, activation='relu'))
#     model.add(Dropout(0.5))
    model.add(Dense(nb_classes))
    if not regression:
        model.add(Activation('softmax'))

    loss = get_loss(regression)
    model.compile(loss=loss,
                  optimizer=get_optimizer(regression, nb_layers, lr_mult=lr_mult),
                  metrics=computed_metrics)
    return model


def generate_conv_net(input_shape, nb_classes,
                      nb_dense=128, nb_filters=32, nb_layers=1, lr_mult=1,
                      regression=False):
    return generate_conv_net_base(
            input_shape, nb_classes,
            nb_dense=nb_dense, nb_filters=nb_filters, nb_layers=nb_layers, lr_mult=lr_mult,
            regression=regression)

# Data takes form (X_train, Y_train, X_test, Y_test)
def run_model(model, data, batch_size=32, nb_epoch=1, patience=2,
        validation_data=(None, None)):
    X_train, Y_train, X_test, Y_test = data
    print ('training samples: %d/%d, testing samples: %d/%d' % (
            np.count_nonzero(Y_train.argmax(axis=-1)), X_train.shape[0],
            np.count_nonzero(Y_test.argmax(axis=-1)), X_test.shape[0]))
    temp_fname = tempfile.mkstemp(suffix='.hdf5', dir='/tmp/')[1]

    # 50k should be a reasonable validation split
    if validation_data[0] is None:
        validation_split = 0.33333333
        if len(Y_train) * validation_split > 50000.0:
            validation_split = 50000.0 / float(len(Y_train))
#         print validation_split

        begin_train = time.time()
        print 'shape: ', X_train.shape, Y_train.shape, X_test.shape, Y_test.shape
        model.fit(X_train, Y_train,
                  batch_size=batch_size,
                  nb_epoch=nb_epoch,
                  # validation_split=validation_split,
                  # validation_data=(X_test, Y_test),
                  shuffle=True,
                  class_weight='auto',
                  callbacks=get_callbacks(temp_fname, patience=patience))
        train_time = time.time() - begin_train
    else:
        begin_train = time.time()
        model.fit(X_train, Y_train,
                  batch_size=batch_size,
                  nb_epoch=nb_epoch,
                  validation_data=validation_data,
                  shuffle=True,
                  class_weight='auto',
                  callbacks=get_callbacks(temp_fname, patience=patience)
                 )
        train_time = time.time() - begin_train

    model.load_weights(temp_fname)
    os.remove(temp_fname)

    return train_time


# def get_labels(model, X_test, batch_size=256, get_time=False):
#     begin = time.time()
#     ## Alternate way to compute the classes
#     # proba = model.predict(X_test, batch_size=batch_size, verbose=0)
#     # predicted_labels = np_utils.probas_to_classes(proba)
#     predicted_labels = model.predict_classes(X_test, batch_size=batch_size, verbose=0)
#     end = time.time()
#     if get_time:
#         return predicted_labels, end - begin
#     else:
#         return predicted_labels


def stats_from_proba(proba, Y_test):
    # Binary and one output
    if proba.shape[1] == 1:
        proba = np.concatenate([1 - proba, proba], axis=1)
    if len(Y_test.shape) == 1:
        Y_test = np.transpose(np.array([1 - Y_test, Y_test]))
    predicted_labels = proba.argmax(axis=-1)

    true_labels = Y_test.argmax(axis=-1)
    precision, recall, fbeta, support = sklearn.metrics.precision_recall_fscore_support(
            predicted_labels, true_labels)
    accuracy = sklearn.metrics.accuracy_score(predicted_labels, true_labels)

    num_penalties, thresh_low, thresh_high = \
        StatsUtils.yolo_oracle(Y_test[:, 1], proba[:, 1])
    windowed_acc, windowed_supp = StatsUtils.windowed_accuracy(predicted_labels, Y_test)

    metrics = {'precision': precision,
               'recall': recall,
               'fbeta': fbeta,
               'support': support,
               'accuracy': accuracy,
               'penalities': num_penalties,
               'windowed_accuracy': windowed_acc,
               'windowed_support': windowed_supp}
    return metrics


def evaluate_model_regression(model, X_test, Y_test, batch_size=256):
    begin = time.time()
    raw_predictions = model.predict(X_test, batch_size=batch_size, verbose=0)
    end = time.time()
    mse = sklearn.metrics.mean_squared_error(Y_test, raw_predictions)

    Y_classes = Y_test > 0.2 # FIXME
    Y_classes = np.concatenate([1 - Y_classes, Y_classes], axis=1)

    best = {'accuracy': 0}
    for cutoff in np.arange(0.01, 0.75, 0.01):
        predictions = raw_predictions > cutoff # FIXME
        proba = np.concatenate([1 - predictions, predictions], axis=1)
        metrics = stats_from_proba(proba, Y_classes)
        metrics['cutoff'] = cutoff
        print 'Cutoff: %f, metrics: %s' % (cutoff, str(metrics))
        if metrics['accuracy'] > best['accuracy']:
            best = metrics

    metrics = best
    metrics['mse'] = mse
    metrics['test_time'] = end - begin
    return metrics

def evaluate_model(model, X_test, Y_test, batch_size=256):
    predicted_labels, test_time = get_labels(model, X_test, batch_size, True)
    true_labels = Y_test.argmax(axis=-1)

    confusion = sklearn.metrics.confusion_matrix(true_labels, predicted_labels)

    # Minor smoothing to prevent division by 0 errors
    TN = float(confusion[0][0]) + 1
    FN = float(confusion[1][0]) + 1
    TP = float(confusion[1][1]) + 1
    FP = float(confusion[0][1]) + 1
    metrics = {'recall': TP / (TP + FN),
               'specificity': TN / (FP + TN),
               'precision': TP / (TP + FP),
               'npv':  TN / (TN + FN),
               'fpr': FP / (FP + TN),
               'fdr': FP / (FP + TP),
               'fnr': FN / (FN + TP),
               'accuracy': (TP + TN) / (TP + FP + TN + FN),
               'f1': (2 * TP) / (2 * TP + FP + FN),
               'test_time': test_time}
    return metrics


def learn_and_eval(model, data, nb_epoch=2, batch_size=128,
        validation_data=(None, None)):
    X_train, Y_train, X_test, Y_test = data
    train_time = run_model(model, data, nb_epoch=nb_epoch,
            batch_size=batch_size, validation_data=validation_data)
    metrics = evaluate_model(model, X_test, Y_test, batch_size=batch_size)
    return train_time, metrics


# NOTE: assumes first two parameters are: (image_size, nb_classes)
def try_params(model_gen, params, data,
               output_dir, base_fname, model_name, OBJECT,
               regression=False, nb_epoch=2, validation_data=(None, None)):
    def metrics_names(metrics):
        return sorted(metrics.keys())
    def metrics_to_list(metrics):
        return map(lambda key: metrics[key], metrics_names(metrics))

    summary_csv_fname = os.path.join(
            output_dir, base_fname + '_' + model_name + '_summary.csv')

    X_train, Y_train, X_test, Y_test = data
    to_write = []
    for param in params:
        param_base_fname = base_fname + '_' + model_name + '_' + '_'.join(map(str, param[2:]))
        model_fname = os.path.join(
                output_dir, param_base_fname + '.h5')
        csv_fname = os.path.join(
                output_dir, param_base_fname + '.csv')

        # Make, train, and evaluate the model
        model = model_gen(*param, regression=regression)
        if regression:
            train_time = run_model(model, data, nb_epoch=nb_epoch,
                    validation_data=validation_data)
            metrics = evaluate_model_regression(model, X_test, Y_test)
        else:
            train_time, metrics = learn_and_eval(model, data,
                    validation_data=validation_data)

        # Output predictions and save the model
        # Redo some computation to save my sanity
        conf1 = model.predict(X_train, batch_size=256, verbose=0)
        conf2 = model.predict(X_test,  batch_size=256, verbose=0)
        conf = np.concatenate([conf1, conf2])
        if len(conf.shape) > 1:
            assert len(conf.shape) == 2
            assert conf.shape[1] <= 2
            if conf.shape[1] == 2:
                conf = conf[:, 1]
            else:
                conf = np.ravel(conf)
        confidences_to_csv(csv_fname, conf, OBJECT)
        model.save(model_fname)

        to_write.append(list(param[2:]) + [train_time] + metrics_to_list(metrics))
        print ('params: ', param)
        print ('training time: ', train_time)
        print ('metrics: ', metrics)
        print
    print to_write
    # First two params don't need to be written out
    param_column_names = map(lambda i: 'param' + str(i), xrange(len(params[0]) - 2))
    column_names = param_column_names + ['train_time'] + metrics_names(metrics)
    output_csv(summary_csv_fname, to_write, column_names)

# exp/shuffled_small_cnn

In [5]:
# def to_test_train(avg_fname, all_frames, all_counts, train_ratio=0.6):
#     assert len(all_frames) == len(all_counts), 'Frame length should equal counts length'

#     nb_classes = all_counts.max() + 1
#     X = all_frames

#     mean = np.mean(X, axis=0)
#     np.save(avg_fname, mean)

#     N = 150000
#     # N = 500000

#     '''pos_inds = np.random.permutation(np.where(all_counts.ravel() == 0))
#     pos_inds = pos_inds[0, 0 : N/2]
#     neg_inds = np.random.permutation(np.where(all_counts.ravel() == 1))
#     neg_inds = neg_inds[0, 0 : N/2]
#     print pos_inds.shape
#     print neg_inds.shape
#     p = np.concatenate([pos_inds, neg_inds])
#     np.random.shuffle(p)'''

#     p = np.random.permutation(len(all_counts))
#     p = p[0:N]

#     Y = np_utils.to_categorical(all_counts, nb_classes)
#     X, Y = X[p], Y[p]
#     X -= mean

#     def split(arr):
#         # 250 -> 100, 50, 100
#         ind = int(len(arr) * train_ratio)
#         if ind > 50000:
#             ind = len(arr) - 50000
#         return arr[:ind], arr[ind:]

#     X_train, X_test = split(X)
#     Y_train, Y_test = split(Y)

#     return X_train, X_test, Y_train, Y_test

# def get_data(csv_fname, video_fname, avg_fname,
#              num_frames=None, start_frame=0,
#              OBJECTS=['person'], resol=(50, 50),
#              center=True, dtype='float32', train_ratio=0.6):
#     def print_class_numbers(Y, nb_classes):
#         classes = Y.argmax(axis=-1)
#         for i in xrange(nb_classes):
#             print 'class %d: %d' % (i, np.sum(classes == i))

#     print '\tParsing %s, extracting %s' % (csv_fname, str(OBJECTS))
#     all_counts = get_binary(csv_fname, limit=num_frames, OBJECTS=OBJECTS, start=start_frame)
#     print '\tRetrieving all frames from %s' % video_fname
#     all_frames = get_all_frames(
#             len(all_counts), video_fname, scale=resol, start=start_frame)
#     print '\tSplitting data into training and test sets'
#     X_train, X_test, Y_train, Y_test = to_test_train(avg_fname, all_frames, all_counts)

#     nb_classes = all_counts.max() + 1
#     print '(train) positive examples: %d, total examples: %d' % \
#         (np.count_nonzero(Y_train.argmax(axis=-1)),
#          len(Y_train))
#     print_class_numbers(Y_train, nb_classes)
#     print '(test) positive examples: %d, total examples: %d' % \
#         (np.count_nonzero(Y_test.argmax(axis=-1)),
#          len(Y_test))
#     print_class_numbers(Y_test, nb_classes)

#     print 'shape of image: ' + str(all_frames[0].shape)
#     print 'number of classes: %d' % (nb_classes)

#     data = (X_train, Y_train, X_test, Y_test)
#     return data, nb_classes

# def get_csv_adaptively(csv_fname, num_frames, OBJ, interval=1, start=0):
#     df = pd.read_csv(csv_fname)
#     df = df[df['object_name'].isin([OBJ])]
#     groups = df.set_index('frame')
#     end = start + num_frames * interval
#     res = {i: i in groups.index for i in range(start, end, interval)}
#     return res

# def get_data_adaptively(csv_fname, video_fname, avg_fname,
#              num_frames=None, start_frame=0,
#              OBJECTS=['person'], resol=(50, 50),
#              center=True, dtype='float32', train_ratio=0.6, burst=0, interval=1, decoded_images=False):
#     def print_class_numbers(Y, nb_classes):
#         classes = Y.argmax(axis=-1)
#         for i in xrange(nb_classes):
#             print 'class %d: %d' % (i, np.sum(classes == i))
#     print '\tParsing %s, extracting %s' % (csv_fname, str(OBJECTS))
#     all_counts = get_csv_adaptively(csv_fname, num_frames, OBJECTS[0],
#                                     start=start_frame, burst=burst, interval=interval)
#     print '\tCutting frames %d %d' % (len(all_counts.keys()), num_frames)
#     all_counts = sorted(all_counts.items(), key=lambda x: x[0])[:num_frames]
#     all_indexs = [x[0] for x in all_counts]
#     all_labels = np.array([x[1] for x in all_counts])
#     print '\tRetrieving all frames from %s' % video_fname
#     if not decoded_images:
#         all_frames = get_all_frames(
#             num_frames, video_fname, scale=resol, start=start_frame, frameset=all_indexs)
#     else:
#         all_frames = get_frames_from_images(all_indexs, video_fname, resol=resol, start=start_frame)
#     print '\tSplitting data into training and test sets'
#     X_train, X_test, Y_train, Y_test = to_test_train(avg_fname, all_frames, all_labels)

#     nb_classes = all_labels.max() + 1
#     print '(train) positive examples: %d, total examples: %d' % \
#         (np.count_nonzero(Y_train.argmax(axis=-1)),
#          len(Y_train))
#     print_class_numbers(Y_train, nb_classes)
#     print '(test) positive examples: %d, total examples: %d' % \
#         (np.count_nonzero(Y_test.argmax(axis=-1)),
#          len(Y_test))
#     print_class_numbers(Y_test, nb_classes)

#     print 'shape of image: ' + str(all_frames[0].shape)
#     print 'number of classes: %d' % (nb_classes)

#     data = (X_train, Y_train, X_test, Y_test)
#     return data, nb_classes

def to_test_train(all_frames, all_labels, avg_num=[], nb_classes=2):
    assert len(all_frames) == len(all_labels), 'Frame length should equal counts length'
    X = all_frames
    if len(avg_num) == 0:
        mean = np.mean(X, axis=0)
    else:
        mean = avg_num
    Y = np_utils.to_categorical(all_labels, nb_classes)
    X -= mean

    return X, Y, mean

def get_csv_samples(csv_fname, OBJ):
    df = pd.read_csv(csv_fname)
    df = df[df['object_name'].isin([OBJ])]
    groups = df.set_index('frame')
    return groups.index

def get_labels(csv_res, frameset):
    ret = [t in csv_res for t in frameset]
    print ('reading label sum: %d, pos: %d' % (len(ret), sum(ret)))
    return np_utils.to_categorical(ret, 2)

def get_frames_from_images(frameset, video_fname, resol=(50, 50), start=0, dtype='float32'):
    if frameset == None:
        print 'ERROR: frameset is none!'
        sys.exit()
    print ('reading images... %d') % (len(frameset))
    frames = np.zeros( tuple([len(frameset)] + list(resol) + [3]), dtype=dtype )
    for i in range(len(frameset)):
#         if i % 1000 == 0:
#             sys.stdout.write("\033[F") # Cursor up one line
#             clear_output(wait=True)
#             display('Iteration '+str(i)+' Score: '+str(uniform(0, 1)))
#             display('\timages read %d/%d' % (i, len(frameset))
        img_path = os.path.join(video_fname, str(frameset[i] + 1).zfill(7) + '.jpg')
        frame = cv2.imread(img_path)
        frame = cv2.resize(frame, resol, interpolation=cv2.INTER_NEAREST)
        frames[i, :] = frame

    if dtype == 'float32':
        frames /= 255.0

    return frames

def get_train_test_data(csv_fname, video_fname, train_nums, OBJ, pos_train_ratio=None):
    total_sample = range(0, 3000000)
    total_sample_num = len(total_sample)
    print ('reading csv file: %s %s' % (csv_fname, OBJ))
    csv_ret = get_csv_samples(csv_fname, OBJ)
    total_pos_sample = [t for t in total_sample if t in csv_ret]
    total_pos_sample_num = len(total_pos_sample)
    total_neg_sample = [t for t in total_sample if t not in csv_ret]
    total_neg_sample_num = len(total_neg_sample)
    print ('total sample: %d, positive sample: %d' % (total_sample_num, total_pos_sample_num))
    if pos_train_ratio != None:
        pos_train_nums = int(train_nums * pos_train_ratio)
    else:
        pos_train_nums = int(train_nums * (float(total_pos_sample_num) / total_sample_num))
    print ('training sample: %d, positive training sample: %d' % (train_nums, pos_train_nums))
#     print csv_ret
    
    # get training samples
    pos_frame_ids = np.random.permutation(total_pos_sample)
    pos_frame_ids = pos_frame_ids[0:pos_train_nums]
    neg_frame_ids = np.random.permutation(total_neg_sample)
    neg_frame_ids = neg_frame_ids[0:train_nums - pos_train_nums]
    train_frame_ids = np.concatenate((pos_frame_ids, neg_frame_ids), axis=0)
    train_frame_ids = sorted(train_frame_ids)
    print ('reading training image files...%d' % (len(train_frame_ids)))
    train_frames = get_frames_from_images(train_frame_ids, video_fname)
    train_labels = np.array([i in csv_ret for i in train_frame_ids])
    X_train, Y_train, mean = to_test_train(train_frames, train_labels)
    
    # get testing samples
    test_frame_ids = range(0, total_sample_num, 30)
    print ('reading testing image files...%d' % (len(test_frame_ids)))
    test_frames = get_frames_from_images(test_frame_ids, video_fname)
    test_labels = [i in total_pos_sample for i in test_frame_ids]
#     print len(test_frames), len(test_labels)
    X_test, Y_test, _ = to_test_train(test_frames, test_labels, avg_num=mean)
    print ('testing sample: %d, positive testing sample: %d' %
           (len(test_frames), np.count_nonzero(Y_test.argmax(axis=-1))))
    return [X_train, Y_train, X_test, Y_test]

# Run Run Run!

In [6]:
# data_path = '/host/hybridvs_data/'
# # video_name = 'coral-reef-long'
# # OBJECT = 'person'
# video_name = 'jackson-town-square'
# OBJECT = 'bus'
# csv_in = os.path.join(data_path, 'csv', video_name + '.csv')
# video_in = os.path.join(data_path, 'videos/scaled_50X50', video_name + '.mp4')
# output_dir = os.path.join(data_path, 'cnn-models')
# avg_fname = video_name + '.npy'
# start_frame = 0
# resol = (50, 50)
# objects = [OBJECT]
# split_image_path = '/host/mengwei/frames_50X50_jackson-town-square/'
# num_frames = 100000

In [7]:
# print 'Preparing data....'
# gc.collect()
# data = get_train_test_data(
#         csv_in, split_image_path, num_frames, OBJECT, pos_train_ratio=0.4)

In [8]:
def train_model(model, data, batch_size=128,
              regression=False, nb_epoch=2, validation_data=(None, None)):
    train_time = run_model(model, data, nb_epoch=nb_epoch,
            batch_size=batch_size, validation_data=validation_data)
    return train_time, model
# def test_model_two_thre(model, data, batch_size=128, error_rate=1):
#     X_train, Y_train, X_test, Y_test = data
#     probs = model.predict(X_test, batch_size=batch_size, verbose=0)
#     pos_samples = np.count_nonzero(Y_test.argmax(axis=-1))
#     error_tolerate = pos_samples * (error_rate / 100.0)
#     total_test_num = probs.shape[0]
#     print ('test num: %d, pos num: %d, tolerate: %d' % (total_test_num, pos_samples, error_tolerate))
#     combined_res = np.column_stack((probs[:,1], Y_test[:,1]))
#     combined_res = combined_res[combined_res[:,0].argsort()]
#     print combined_res
#     # get T_low
#     _error = 0
#     for i in range(total_test_num):
#         _error += combined_res[i,1]
#         if _error > error_tolerate:
#             _error -= 1
#             T_low = combined_res[i - 1, 0]
#             pos_low = i - 1
#             break
#     # get T_high
#     _error = 0
#     for i in reversed(range(total_test_num)):
#         _error += (1 - combined_res[i,1])
#         if _error > error_tolerate:
#             _error -= 1
#             T_high = combined_res[i + 1, 0]
#             pos_high = i + 1
#             break
#     filter_ratio = 1 - float(pos_high - pos_low) / total_test_num
#     return filter_ratio, float(_error) / pos_samples, T_low, T_high
# return list(threshold, recall, precision)
def test_model_one_thre(model, data, batch_size=128):
    _, _, X_test, Y_test = data
    probs = model.predict(X_test, batch_size=128, verbose=0)
    pos_samples = np.count_nonzero(Y_test.argmax(axis=-1))
    total_test_num = probs.shape[0]
    print ('test num: %d, pos num: %d' % (total_test_num, pos_samples))
    combined_res = np.column_stack((probs[:,1], Y_test[:,1]))
    combined_res = combined_res[combined_res[:,0].argsort()]
    print combined_res
    # recall = TP / (TP + FN)
    # precision = TP / (TP + FP)
    ret = []
    FN = 0
    for i in range(total_test_num - 1):
        FN += combined_res[i,1]
        TP = pos_samples - FN
        FP = total_test_num - i - 1 - TP
        recall = float(TP) / (TP + FN)
        precision = float(TP) / (TP + FP)
#         print FN, TP, FP
        if len(ret) > 0 and ret[-1][1] == recall:
            ret[-1][2] = max(precision, ret[-1][2])
        else:
            ret.append([combined_res[i,0], recall, precision])
    return ret
# def try_model(model, data, OBJECT):
#     train_time, model = train_model(model, data, OBJECT)
#     filter_ratio, error, T_low, T_high = test_model(model, data)
#     print train_time, filter_ratio, error, T_low, T_high
def try_model_one_thre(model, data, nb_epoch=2, batch_size=128):
    train_time, model = train_model(model, data, nb_epoch=nb_epoch, batch_size=batch_size)
    ret = test_model_one_thre(model, data)
    return ret

In [9]:
# Prepare the data for training and testing
# data = [X_train, Y_train, X_test, Y_test]
# X_train and X_test: np.array([N X 50 X 50 X 3]), using the function get_frames_from_images above
# Y_train and Y_test: np.array([N X 2]), the second column is the chance of positive object,
# for example: Y_train=np.array([[0, 1], [1, 0]]), the first sample contains the object, the second doesn't

resol=(50, 50)
#frameset=None
# Data comes from ILSVRC2014, in /media/teddyxu/blue-2tb
# n02924116 are bus images, can change as needed
pos_names = os.listdir("/host/hybridvs_data/ILSVRC2014_DET_train/n02924116/")
frames = np.zeros(tuple([1000] + list(resol) + [3]), dtype='float32')

# Read positive images from the disk
for i in range(0, len(pos_names)):
    #print len(pos_names)
    pos_img_path = os.path.join('/host/hybridvs_data/ILSVRC2014_DET_train/n02924116/', pos_names[i])
    #print pos_img_path
    frame = cv2.imread(pos_img_path)
    frame = cv2.resize(frame, resol, interpolation=cv2.INTER_NEAREST)
    frames[i,:] = frame
    
#pos_frames /= 255.0

# Read negative images from the disk
neg_dir = os.listdir("/host/hybridvs_data/ILSVRC2014_DET_train/")
for i in range(0, 500):
    # True labels
    if neg_dir[i] == 'n02924116':
        continue
    # False Labels, Randomly picked from other directories
    neg_names = os.listdir(os.path.join('/host/hybridvs_data/ILSVRC2014_DET_train/', neg_dir[i]))
    neg_img_path = os.path.join('/host/hybridvs_data/ILSVRC2014_DET_train/', neg_dir[i], neg_names[0])
    #print neg_img_path
    frame = cv2.imread(neg_img_path)
    frame = cv2.resize(frame, resol, interpolation=cv2.INTER_NEAREST)
    frames[i + 500,:] = frame

frames /= 255.0

Y = []
for i in range(0, 1000):
    if i < 500:
        Y.append([0,1])
    else:
        Y.append([1,0])

#print Y

list_seq = range(0, 1000)
#print list_seq
random.shuffle(list_seq)
#print list_seq

#print len(frames)

X_train = np.random.rand(1000, 50, 50, 3)
Y_train = np.random.randint(2, size=(1000, 2))
X_test = np.random.rand(1000, 50, 50, 3)
Y_test = np.random.randint(2, size=(1000, 2))

# First 800 samples as training set
for i in range(0, 800):
    X_train[i] = frames[list_seq[i]]
    Y_train[i] = Y[list_seq[i]]

# First 200 samples as training set
for i in range(0, 200):
    X_test[i] = frames[list_seq[i + 800]]
    Y_test[i] = Y[list_seq[i + 800]]
    
data = [X_train, Y_train, X_test, Y_test]

In [11]:
# after data prepared, just run this cell
K.clear_session()
param = ((50, 50, 3), 2, 128, 32, 2)
model = generate_conv_net_base(*param, regression=False)
accuracy = try_model_one_thre(model, data, nb_epoch=5)
RECALL_LIST = [0.99, 0.95, 0.90]
for r in RECALL_LIST:
    temp = [a for a in accuracy if a[1] >= r]
    item = max(temp, key=lambda x: x[2])
    print 'precision', r, item
model.save('/host/hybridvs_data/models/imagenet_init_model.h5')



training samples: 450/1000, testing samples: 285/1000
shape:  (1000, 50, 50, 3) (1000, 2) (1000, 50, 50, 3) (1000, 2)




Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
test num: 1000, pos num: 285
[[0.25309771 0.        ]
 [0.25553218 0.        ]
 [0.29276204 0.        ]
 ...
 [0.87391955 1.        ]
 [0.88470554 1.        ]
 [0.88616997 1.        ]]
precision 0.99 [0.46174463629722595, 0.9929824561403509, 0.29915433403805497]
precision 0.95 [0.5027193427085876, 0.9719298245614035, 0.302071973827699]
precision 0.9 [0.5027193427085876, 0.9719298245614035, 0.302071973827699]
