In [None]:
from PIL import Image
import cv2
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import os
import tensorflow as tf
import math
import datetime

import caffe
from sklearn.cross_validation import train_test_split

# Segmented video location
set_1_folder = "../segmented_set1/"
set_1_files = os.listdir(set_1_folder)
height = 50
width = 50

# Avi files have the following naming convention:
# file-id_sequence-number_activity-class

def get_frames_and_labels(f):
    cap = cv2.VideoCapture(f)
    frames = []
    label = int(f.split("_")[-1][0])
#     label = int(f.split("_")[2]) # This looks at sequence
#     label = int(label.split("/")[2])
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
#     trim = int(len(frames) / 4.0)
#     frames = frames[trim:-trim]
    labels = [label for i in range(len(frames))]
    cap.release()
    cv2.destroyAllWindows()
    return frames, labels

set_1_frames = []
set_1_labels = []
for f in set_1_files:
    frames, labels = get_frames_and_labels(set_1_folder + f)
    set_1_frames = set_1_frames + frames
    set_1_labels = set_1_labels + labels
    
train_frames, test_frames, train_labels, test_labels = train_test_split(set_1_frames, 
                                                        set_1_labels, test_size=0.1)

resized = [np.array(Image.fromarray(img.astype(np.uint8)).resize((width, height), 
                                    Image.ANTIALIAS)) / 255.0 for img in train_frames]
restructured = [cv2.cvtColor(img.astype(np.float32), cv2.COLOR_BGR2RGB) for img in resized]
plt.imshow(restructured[0])
print restructured[0].shape

X_train = restructured
test_resized = [np.array(Image.fromarray(img.astype(np.uint8)).resize((width, height), 
                                    Image.ANTIALIAS)) / 255.0 for img in test_frames]
X_test = [cv2.cvtColor(img.astype(np.float32), cv2.COLOR_BGR2RGB) for img in test_resized]

X_train = np.array(X_train)
y_train = np.array(train_labels)
X_test = np.array(X_test)
y_test = np.array(test_labels)
print X_train.shape
print y_train.shape
print X_test.shape
print y_test.shape

In [None]:
mean_imgs = [np.mean(X_train[:, :, :, i]) for i in range(3)]
mean_imgs = np.reshape(a=np.array(mean_imgs), newshape=[1, 1, 3])
subtracted = X_train[0] - mean_imgs
plt.imshow(subtracted)

In [None]:
def lrelu(x, leak=0.2, name="lrelu"):
    """Leaky rectifier.
    Parameters
    ----------
    x : Tensor
        The tensor to apply the nonlinearity to.
    leak : float, optional
        Leakage parameter.
    name : str, optional
        Variable scope to use.
    Returns
    -------
    x : Tensor
        Output of the nonlinearity.
    """
    with tf.variable_scope(name):
        f1 = 0.5 * (1 + leak)
        f2 = 0.5 * (1 - leak)
        return f1 * x + f2 * abs(x)

def max_pool_2x2(x):
      return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')
    
def unpool(value, name='unpool'):
    """N-dimensional version of the unpooling operation from
    https://www.robots.ox.ac.uk/~vgg/rg/papers/Dosovitskiy_Learning_to_Generate_2015_CVPR_paper.pdf

    :param value: A Tensor of shape [b, d0, d1, ..., dn, ch]
    :return: A Tensor of shape [b, 2*d0, 2*d1, ..., 2*dn, ch]
    """
    sh = value.get_shape().as_list()
    print sh
    dim = len(sh[1:-1])
    out = (tf.reshape(value, [-1] + sh[-dim:]))
    for i in range(dim, 0, -1):
        out = tf.concat(i, [out, tf.zeros_like(out)])
    out_size = [-1] + [s * 2 for s in sh[1:-1]] + [sh[-1]]
    out = tf.reshape(out, out_size)
    return out

def autoencoder(input_shape=[None, height, width, 3],
                n_filters=[3, 30, 50, 50],
                filter_sizes=[5, 10, 10, 10],
                corruption=False):
    """Build a deep denoising autoencoder w/ tied weights.
    Parameters
    ----------
    input_shape : list, optional
        Description
    n_filters : list, optional
        Description
    filter_sizes : list, optional
        Description
    Returns
    -------
    x : Tensor
        Input placeholder to the network
    z : Tensor
        Inner-most latent representation
    y : Tensor
        Output reconstruction of the input
    cost : Tensor
        Overall cost to use for training
    Raises
    ------
    ValueError
        Description
    """
    # %%
    # input to the network
    x = tf.placeholder(tf.float32, input_shape, name='x')


    # %%
    # ensure 2-d is converted to square tensor.
    if len(x.get_shape()) == 2:
        x_dim = np.sqrt(x.get_shape().as_list()[1])
        if x_dim != int(x_dim):
            raise ValueError('Unsupported input dimensions')
        x_dim = int(x_dim)
        x_tensor = tf.reshape(
            x, [-1, x_dim, x_dim, n_filters[0]])
    elif len(x.get_shape()) == 4:
        x_tensor = x
    else:
        raise ValueError('Unsupported input dimensions')
    current_input = x_tensor

    # %%
    # Build the encoder
    encoder_weights = []
    encoder_ops = []
    shapes = []
    for layer_i, n_output in enumerate(n_filters[1:]):
        n_input = current_input.get_shape().as_list()[3]
        shapes.append(current_input.get_shape().as_list())
        W = tf.Variable(
            tf.random_uniform([
                filter_sizes[layer_i],
                filter_sizes[layer_i],
                n_input, n_output],
                -1.0 / math.sqrt(n_input),
                1.0 / math.sqrt(n_input)))
        b = tf.Variable(tf.zeros([n_output]))
        encoder_weights.append(W)
        output = tf.nn.sigmoid(
#         output = tf.nn.relu6(
            tf.add(tf.nn.conv2d(
                current_input, W, strides=[1, 1, 1, 1], padding='SAME'), b))
        encoder_ops.append(output)
        current_input = output

    # %%
    # store the latent representation
    z = current_input
    encoder_weights.reverse()
    shapes.reverse()

    # %%
    # Build the decoder using the same weights
    for layer_i, shape in enumerate(shapes):
        W = encoder_weights[layer_i]
        b = tf.Variable(tf.zeros([W.get_shape().as_list()[2]]))
        output = tf.nn.sigmoid(
#         output = tf.nn.relu6(
            tf.add(tf.nn.conv2d_transpose(
                current_input, W,
                tf.pack([tf.shape(x)[0], shape[1], shape[2], shape[3]]),
                strides=[1, 1, 1, 1], padding='SAME'), b))
        current_input = output
        
    decoder = current_input

    # %%
    # now have the reconstruction through the network
    y = current_input
    # cost function measures pixel-wise difference
    cost = tf.reduce_sum(tf.square(y - x_tensor))

    # %%
    return {'x': x, 'z': z, 'y': y, 'cost': cost, 
            "encoder": encoder_ops, "decoder": decoder}


# %%
def test_ut(X_train, X_test, n_filters, filter_sizes):
    import tensorflow as tf
#     mean_img = np.mean(X_train, axis=0)
    ae = autoencoder(n_filters=n_filters, filter_sizes=filter_sizes)

    # %%
    learning_rate = 0.01
    optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(ae['cost'])

    # %%
    # We create a session to use the graph
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    sess = tf.Session(config=config)
    sess.run(tf.initialize_all_variables())

    # %%
    # Fit all training data
    batch_size = 50
    n_epochs = 100
    step_size = 10
    for epoch_i in range(n_epochs):
        for batch_i in range(X_train.shape[0] // batch_size):
            batch_xs = X_train[batch_i * batch_size:(batch_i + 1) * batch_size]
#             train = np.array([img - mean_img for img in batch_xs])
            train = batch_xs
            sess.run(optimizer, feed_dict={ae['x']: train})
        if epoch_i % step_size == 0:
            print(str(datetime.datetime.now()), epoch_i, sess.run(ae['cost'], feed_dict={ae['x']: train}))

    # %%
    # Plot example reconstructions
    n_examples = 10
    test_xs = X_test[:n_examples]
#     test_xs_norm = np.array([img - mean_img for img in test_xs])
    test_xs_norm = test_xs
    recon = sess.run(ae['y'], feed_dict={ae['x']: test_xs_norm})
    print(recon.shape)
    fig, axs = plt.subplots(2, n_examples, figsize=(10, 2))
    for example_i in range(n_examples):
        axs[0][example_i].imshow(
            np.reshape(test_xs[example_i, :], (height, width, 3)))
        axs[0][example_i].axis("off")

        axs[1][example_i].imshow(
            np.reshape(
#                 np.reshape(recon[example_i, ...], (height, width, 3)) + mean_img,
                np.reshape(recon[example_i, ...], (height, width, 3)),
                (height, width, 3)))
        axs[1][example_i].axis("off")
    
    fig.show()
    plt.draw()
    
    ae["session"] = sess
    
    return ae

In [None]:
ae = test_ut(X_train, X_test,
                n_filters=[3, 3, 3, 3],
                filter_sizes=[5, 5, 5, 5])

In [None]:
ae = test_ut(X_train, X_test,
                n_filters=[3, 3, 3, 3],
                filter_sizes=[3, 3, 3, 3])

In [None]:
y_train = np.array(train_labels)
y_test = np.array(test_labels)
print y_train.shape
print y_test.shape

In [None]:
train = X_train
sess = ae["session"]
layers = [sess.run(ae["encoder"][i], 
        feed_dict={ae['x']: train}) for i in range(len(ae["encoder"]))]
ravels = (np.array([row.ravel() for row in layers[i]]) for i in range(len(ae["encoder"])))
combined = np.hstack(ravels)
print combined.shape

In [None]:
# Scale and visualize the embedding vectors
def plot_embedding(X, y, title=None):
    x_min, x_max = np.min(X, 0), np.max(X, 0)
    X = (X - x_min) / (x_max - x_min)

    plt.figure(figsize=(20, 10))
    ax = plt.subplot(111)
    for i in range(X.shape[0]):
        plt.text(X[i, 0], X[i, 1], str(y[i]),
                 color=plt.cm.Set1(y[i] / 10.),
                 fontdict={'weight': 'bold', 'size': 12})

    plt.xticks([]), plt.yticks([])
    if title is not None:
        plt.title(title)

vectorized_imgs = combined

from sklearn.manifold import TSNE

tsne = TSNE(n_components=2, random_state=0)
np.set_printoptions(suppress=True)
X_tsne = tsne.fit_transform(vectorized_imgs)

In [None]:
plot_embedding(X_tsne, y_train)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_tsne, y_train)

In [None]:
knn.score(X_tsne, y_train)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(combined, y_train)

In [None]:
knn.score(combined, y_train)

In [None]:
y = y_train[:combined.shape[0]]

from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(combined, y)
print str(datetime.datetime.now())

In [None]:
y_pred = knn.predict(combined)
print str(datetime.datetime.now())

In [None]:
from sklearn.metrics import confusion_matrix
import itertools
import numpy as np

plt.figure(figsize=(30, 30))

class_names = ["Hand Shaking", "Hugging", "Kicking", "Pointing", "Punching", "Pushing"]

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, str(cm[i, j])[:4],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

cnf_matrix = confusion_matrix(y_train[:combined.shape[0]][:1000], y_pred[:1000])
np.set_printoptions(precision=2)

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                      title='Normalized confusion matrix for UT Interaction')

plt.show()

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
X_train_ravelled = np.array([row.ravel() for row in X_train])
knn.fit(X_train_ravelled, y_train)

In [None]:
knn.score(X_train_ravelled, y_train)

In [None]:
X_train_ravelled.shape

In [None]:
tsne = TSNE(n_components=2, random_state=0)
np.set_printoptions(suppress=True)
X_tsne_2 = tsne.fit_transform(X_train_ravelled)

In [None]:
plot_embedding(X_tsne_2, y_train)

In [None]:
del ae

# just-knn

In [None]:
X_train_ravelled = np.array([row.ravel() for row in X_train])
X_train = X_train_ravelled

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

In [None]:
knn.score(X_train, y_train)

# just k-means

In [None]:
from sklearn.cluster import KMeans

X_train = X_train[:5000]
y_train = y_train[:5000]

kmeans = KMeans(n_clusters=6, random_state=0)
kmeans.fit(X_train)

def cluster_acc(Y_pred, Y):
    """
    Finds the cluster accuracy
    """
    from sklearn.utils.linear_assignment_ import linear_assignment
    Y_pred = np.array(Y_pred)
    Y = np.array(Y)
    D = max(Y_pred.max(), Y.max())+1
    w = np.zeros((D,D), dtype=np.int64)
    for i in xrange(Y_pred.size):
        w[Y_pred[i], Y[i]] += 1
    ind = linear_assignment(w.max() - w)
    return sum([w[i,j] for i,j in ind])*1.0/Y_pred.size, w

y_pred = kmeans.predict(X_train)
cluster_acc(y_pred, y_train)

# just t-SNE

In [None]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=2)
np.set_printoptions(suppress=True)
X_tsne = tsne.fit_transform(X_train)

from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_tsne, y_train)

In [None]:
knn.score(X_tsne, y_train[:X_tsne.shape[0]])