# Import, define, and mount

In [None]:
try:
    import tflearn    # try to import tflearn
except ImportError:
    !pip3 install -q tflearn # install tflearn first
    import tflearn # then import it

In [None]:
!apt-get install -y -q ffmpeg
!pip3 install -q scikit-video
import skvideo.io # converts video to frames

In [None]:
import matplotlib.pyplot as plt # used for plotting images
import numpy as np # to do math functions
from glob import glob # helps find files in a folder
import os, sys  # to interact with filesystem
import cv2
import tensorflow as tf  # tensorflow during training
from tflearn.activations import relu # rectified linear activation function

# normalizes data 
from tflearn.layers.normalization import batch_normalization as bn
from scipy.misc import imread, bytescale, imresize  # image manipulation functions
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d, global_avg_pool
from tflearn.layers.estimator import regression  # trainer for the network

# turns scalar label into vector where appropriate class is value 1 and others 0
from tflearn.data_utils import to_categorical  

# makes new images from the ones we have by flipping them, rotating, etc.
from tflearn.data_augmentation import ImageAugmentation

# to visualize the data space
from tensorflow.contrib.tensorboard.plugins import projector

In [None]:
    def install_tensorboard_dep():
        '''Installs tensorboard to be used in colab.'''
        if 'ngrok-stable-linux-amd64.zip' not in os.listdir(os.getcwd()):
            !wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
            !unzip ngrok-stable-linux-amd64.zip
            os.system('n')

In [None]:
            def montage(x, plot_shape=(15, 15), return_grid=False, cmap='viridis'):
                '''
                Takes in a 4-D tensor, x, of shape [NxHxWxC] and puts all of the images
                together in a single matrix to show as one image. If return_grid is True, 
                it will not show the matrix, but return it to be used in some other 
                operations.
                '''
                
                count = 0
                if not return_grid:
                    scale = [0, 255]
                else:
                    scale = [0, 1]
                
                
                if len(x.shape) == 4 and x.shape[-1] == 3:
                    num, m, n, c = x.shape
                else:
                    num, m, n = x.shape
                    c = 1
                    
                num = int(np.ceil(np.sqrt(num)))
                grid = np.zeros([num*m, num*n, c])
                
                if c == 1:
                    grid = grid[..., 0]
                
                for i in range(num):
                    for j in range(num):
                        if count < x.shape[0]:
                            if c == 1:
                                grid[i*m:i*m+m, j*n:j*n+n] = bytescale(x[count, ...], 
                                                                                                             low=scale[0], high=scale[1])
                            else:
                                grid[i*m:i*m+m, j*n:j*n+n, :] = bytescale(x[count, ...],
                                                                                                                    low=scale[0], high=scale[1])
                            count += 1 
                            
                if return_grid:
                    return grid
                else:
                    fig = plt.figure(figsize=plot_shape)
                    a1 = fig.add_subplot(111)
                    a1.set_xticks(np.arange(-0.5, num*n, n))
                    a1.set_yticks(np.arange(-0.5, num*m, m))
                    a1.set_yticklabels([])
                    a1.set_xticklabels([])
                    a1.imshow(grid, cmap=cmap)
                    plt.show()

In [None]:
def start_tensorboard():
        '''Starts tensorboard on colaboratory.'''
        LOG_DIR = '/tmp/tflearn_logs'    # where the log files will go
        get_ipython().system_raw('tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'.format(LOG_DIR))
        get_ipython().system_raw('./ngrok http 6006 &')
        ! curl -s http://localhost:4040/api/tunnels | python3 -c \
        "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

In [None]:
def residual_block(incoming, n_filters, filter_size, n_blocks, stride=1):
    
    for block in range(n_blocks):
        n, h, w, c = incoming.get_shape().as_list() # get the shape or input
        
        if block > 0 and stride > 1:
            stride = 1
    
        # convolution 2
        conv1 = conv_2d(incoming, n_filters, filter_size, activation='linear', strides=stride)
        conv1_norm = relu(bn(conv1))
    
        # convolution 2
        conv2 = conv_2d(conv1_norm, n_filters, filter_size, activation='linear', strides=1)
        conv2_norm = bn(conv2)
        
        if stride > 1:
            incoming = max_pool_2d(incoming, 3, stride)
        
        if c != n_filters:
            incoming = conv_2d(incoming, n_filters, 1, activation='linear', strides=1)
    
        # elementwise addition between conv2_norm and input
        incoming = incoming + conv2_norm
    
        #relu on addition
        incoming = relu(incoming)
        
        return incoming

In [None]:
def video_loader():
    vid_names = glob('*.avi') # get all filenames in this location in .avi
    print(vid_names)
    #keep_interval = 5 # how many frames to skip when going back 
    num_past_frames = 3
    vids = np.zeros([0, 240, 320, num_past_frames])
    
    for indx, vid_name in enumerate(vid_names):
        try:
            vid = skvideo.io.vread(vid_name) # read in the image
            print(vid.shape)
        except ValueError:
            continue
       
        # if video is not correct size, resize it
        if vid.shape[1] != 240 or vid.shape[2] != 320:
            vid = [imresize(vid[f, ...], [240, 320]) for f in range(vid.shape[0])]
     
        vid = np.mean(vid, 3, keepdims=True)    #make all frames grayscale
        
        # make empty array to stack frames together in
        vid_stack = np.zeros([vid.shape[0]-num_past_frames, vid.shape[1], vid.shape[2], num_past_frames])
        
        for frame_id in range((num_past_frames-1), vid.shape[0]):
            #pull out past 4 frames
            past_1 = vid[frame_id-num_past_frames, ...]
            past_2 = vid[frame_id-2*num_past_frames, ...]
            
            # stack them together in channel dimension
            stack = np.concatenate((vid[frame_id, ...], past_1, past_2), 2)
            
            vid_stack[frame_id-num_past_frames, ...] = stack
            
                                                    
        vids = np.concatenate((vids, vid_stack), 0)
        
    return vids 

In [None]:
# navigate through filesystem to data folders
os.chdir('/home/rachel_wong/Documents/HMDB51(2)')

In [None]:
os.listdir() # list all files and folder in this location

# Input training data

In [None]:
os.chdir('climb_stairs')

In [None]:
# call the function to load the images in this directory
imgs_train = video_loader()

In [None]:
climbstairs_group_imgs_train = imgs_train.shape[0]
print(imgs_train.shape)

In [None]:
labels_train = np.zeros([climbstairs_group_imgs_train, ]) # number of rows, number of columns
print(labels_train.shape)

In [None]:
labels_train[climbstairs_group_imgs_train:] = 0. # we know that halfway it switches from non-violent to violent so we want it labeled 1 for the bottom half

In [None]:
climb_stairs_img_train = imgs_train[0, ...]    # get first stack
for i in range(3):    #loop through the five frames in that stack
    plt.imshow(climb_stairs_img_train[..., i], cmap='gray')    # plot that number frame in the stack
    plt.grid(False) # get rid of grid
    plt.show()

In [None]:
os.chdir('..') # go back a folder
os.chdir('run') # go into run folder

In [None]:
imgs_train = np.concatenate((imgs_train, video_loader()), 0) # load for the run folder and then add onto climb_stairs images

In [None]:
run_group_imgs_train = imgs_train.shape[0]
print(imgs_train.shape)

In [None]:
labels_train = np.zeros([run_group_imgs_train, ]) # number of rows, number of columns
print(labels_train.shape)

In [None]:
labels_train[climbstairs_group_imgs_train:run_group_imgs_train] = 1. # we know that halfway it switches from non-violent to violent so we want it labeled 1 for the bottom half

In [None]:
run_img_train = imgs_train[0, ...]        # get first stack
for i in range(3):        #loop through the five frames in that stack
        plt.imshow(run_img_train[..., i], cmap='gray')        # plot that number frame in the stack
        plt.grid(False) # get rid of grid
        plt.show()

In [None]:
os.chdir('..') # go back a folder
os.chdir('ride_bike') # go into walk folder

In [None]:
imgs_train = np.concatenate((imgs_train, video_loader()), 0) # load for the walk folder and then add onto climb_stairs images

In [None]:
ridebike_group_imgs_train = imgs_train.shape[0]
print(imgs_train.shape)

In [None]:
labels_train = np.zeros([ridebike_group_imgs_train, ]) # number of rows, number of columns
print(labels_train.shape)

In [None]:
labels_train[run_group_imgs_train:ridebike_group_imgs_train] = 2. # we know that halfway it switches from non-violent to violent so we want it labeled 1 for the bottom half

In [None]:
ride_bike_img_train = imgs_train[0, ...]        # get first stack
for i in range(3):        #loop through the five frames in that stack
        plt.imshow(ride_bike_img_train[..., i], cmap='gray')        # plot that number frame in the stack
        plt.grid(False) # get rid of grid
        plt.show()

In [None]:
print(imgs_train.shape)

In [None]:
from tflearn.data_utils import to_categorical # makes the multi-column labels for us
labels_train = to_categorical(labels_train, 3)  # to_categorical(which column to put the 1, number of classes);for number of labels, make the same amount of columns

In [None]:
print(labels_train.shape)

# Input test data

In [None]:
os.chdir('..')
os.chdir('climb_stairs_test')

In [None]:
# call the function to load the images in this directory
imgs_test = video_loader()

In [None]:
climbstairs_group_imgs_test = imgs_test.shape[0]
print(imgs_test.shape)

In [None]:
labels_test = np.zeros([climbstairs_group_imgs_test, ])# number of rows, number of columns
print(labels_test.shape)

In [None]:
labels_test[climbstairs_group_imgs_test:] = 0. # we know that halfway it switches from non-violent to violent so we want it labeled 1 for the bottom half

In [None]:
climb_stairs_img_test = imgs_test[0, ...]        # get first stack
for i in range(3):        #loop through the five frames in that stack
        plt.imshow(climb_stairs_img_test[..., i], cmap='gray')        # plot that number frame in the stack
        plt.grid(False) # get rid of grid
        plt.show()

In [None]:
os.chdir('..') # go back a folder
os.chdir('run_test') # go into run folder

In [None]:
imgs_test = np.concatenate((imgs_test, video_loader()), 0) # load for the run folder and then add onto climb_stairs images

In [None]:
run_group_imgs_test = imgs_test.shape[0]
print(imgs_test.shape)

In [None]:
labels_test = np.zeros([run_group_imgs_test, ]) # number of rows, number of columns
print(labels_test.shape)

In [None]:
labels_test[climbstairs_group_imgs_test:run_group_imgs_test] = 1. # we know that halfway it switches from non-violent to violent so we want it labeled 1 for the bottom half

In [None]:
run_img_test = imgs_test[0, ...]        # get first stack
for i in range(3):        #loop through the five frames in that stack
        plt.imshow(run_img_test[..., i], cmap='gray')        # plot that number frame in the stack
        plt.grid(False) # get rid of grid
        plt.show()

In [None]:
os.chdir('..') # go back a folder
os.chdir('ride_bike_test') # go into walk folder

In [None]:
imgs_test = np.concatenate((imgs_test, video_loader()), 0) # load for the walk folder and then add onto climb_stairs images

In [None]:
ridebike_group_imgs_test = imgs_test.shape[0]
print(imgs_test.shape)

In [None]:
labels_test = np.zeros([ridebike_group_imgs_test, ]) # number of rows, number of columns
print(labels_test.shape)

In [None]:
labels_test[run_group_imgs_test:ridebike_group_imgs_test] = 2. # we know that halfway it switches from non-violent to violent so we want it labeled 1 for the bottom half

In [None]:
ride_bike_img_test = imgs_test[0, ...]        # get first stack
for i in range(3):        #loop through the five frames in that stack
        plt.imshow(ride_bike_img_test[..., i], cmap='gray')        # plot that number frame in the stack
        plt.grid(False) # get rid of grid
        plt.show()

In [None]:
print(imgs_test.shape)

In [None]:
from tflearn.data_utils import to_categorical # makes the multi-column labels for us
labels_test = to_categorical(labels_test, 3)  # to_categorical(which column to put the 1, number of classes);for number of labels, make the same amount of columns

In [None]:
print(labels_test.shape)

# Training vs testing for x = imgs and y = labels

In [None]:
print(imgs_train.shape, labels_train.shape, imgs_test.shape, labels_test.shape)  # print the shapes of training and testing sets
print(labels_train[:10, ...])  # print the first 10 labels - value 1 in appropriate spot for that image

# ResNet and TensorFlow

In [None]:
# create the input layer for the network
input_layer = input_data([None, 240, 320, 3])

# first convolution --- 120 x 160 x 32
conv1 = conv_2d(input_layer, 32, 7, strides=2, activation='relu')

# max pooling --- 60 x 80 x 32
pool1 = max_pool_2d(conv1, 3, 2)

# residual blocks 1-3 --- 60 x 80 x 32
residual_block1 = residual_block(pool1, # input to this layer
                                 32, # number of features to look for
                                 3, # filter size
                                 3) # number of residual blocks

# residual blocks 4-7 --- 30 x 40 x 64
residual_block2 = residual_block(residual_block1, # input 
                                 64, # num. features to look for
                                 3, # filter size
                                 4, # number of blocks
                                 2) # stride length 

# residual blocks 8-13 --- 15 x 20 x 128
residual_block3 = residual_block(residual_block2,
                                 128,
                                 3,
                                 6, 
                                 2)

# residual blocks 14-16 --- 7 x 10 x 256
residual_block4 = residual_block(residual_block3,
                                 256,
                                 3,
                                 3,
                                 2)

# global average pooling --- 256
gap = global_avg_pool(residual_block4)

# output layer
output_layer = fully_connected(gap, 3, activation='softmax')

In [None]:
network = regression(output_layer, optimizer='adam', 
                     loss='categorical_crossentropy', learning_rate=.0001)

In [None]:
# build the network based on the description above
model = tflearn.DNN(network, tensorboard_verbose=2)

In [None]:
# start tensorboard --- might need to run this code cell twice
install_tensorboard_dep()
start_tensorboard()

In [None]:
model.fit(imgs_train, # input data
          labels_train, # corresponding labels
          n_epoch=25, # number of times to go through entire dataset
          shuffle=True,   # shuffle the images each epoch
          validation_set=(imgs_test, labels_test),  # validation dataset/labels
          show_metric=True, # show validation accuracy/loss in tensorboard
          batch_size=20,   # go through dataset 50 examples at a time
          run_id='three_group_diff')  # name that will show up on tensorboard
  
# saved the trained model for later as the name in red
model.save('three_group_diff_ResNet')

In [None]:
sess = tf.Session()  # tensorflow session
sess.run(tf.global_variables_initializer())