In [18]:
from collections import Counter
from sklearn.model_selection import train_test_split
import numpy as np
import cv2
#from google.colab.patches import cv2_imshow
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import glob
import gc
import math

In [19]:
def cyclical_learning_rate(epoch, lr):
    base_lr = 0.001  # The minimum learning rate
    max_lr = 0.01    # The maximum learning rate
    step_size = 3   # The number of epochs between each cycle

    cycle = math.floor(1 + epoch/(2*step_size))
    x = abs(epoch/step_size - 2*cycle + 1)
    new_lr = base_lr + (max_lr - base_lr) * max(0, (1 - x))

    return new_lr * (6 / (cycle + 5))

# Stanford 40
### Download the data
You can see the zip files if you click the Files tab (looks like a folder symbol on the left of the screen)

In [2]:
!wget http://vision.stanford.edu/Datasets/Stanford40_JPEGImages.zip
!wget http://vision.stanford.edu/Datasets/Stanford40_ImageSplits.zip

--2023-04-13 22:56:26--  http://vision.stanford.edu/Datasets/Stanford40_JPEGImages.zip
Resolving vision.stanford.edu (vision.stanford.edu)... 171.64.68.10
Connecting to vision.stanford.edu (vision.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 304771808 (291M) [application/zip]
Saving to: 'Stanford40_JPEGImages.zip'

     0K .......... .......... .......... .......... ..........  0%  156K 31m54s
    50K .......... .......... .......... .......... ..........  0%  146K 32m53s
   100K .......... .......... .......... .......... ..........  0%  288K 27m40s
   150K .......... .......... .......... .......... ..........  0%  312K 24m43s
   200K .......... .......... .......... .......... ..........  0%  328K 22m47s
   250K .......... .......... .......... .......... ..........  0% 4,67M 19m10s
   300K .......... .......... .......... .......... ..........  0%  333K 18m33s
   350K .......... .......... .......... .......... ..........  0% 7

### Unzip it

In [3]:
os.system("unzip Stanford40_JPEGImages.zip -d Stanford40/")
os.system("unzip Stanford40_ImageSplits.zip -d Stanford40/")

0

## Read the train and test splits, combine them and make better splits to help training networks easier.

In [4]:
keep_stanford40 = ["applauding", "climbing", "drinking", "jumping", "pouring_liquid", "riding_a_bike", "riding_a_horse", 
        "running", "shooting_an_arrow", "smoking", "throwing_frisby", "waving_hands"]
with open('Stanford40/ImageSplits/train.txt', 'r') as f:
    # We won't use these splits but split them ourselves
    sf_train_files = [file_name for file_name in list(map(str.strip, f.readlines())) if '_'.join(file_name.split('_')[:-1]) in keep_stanford40]
    sf_train_labels = ['_'.join(name.split('_')[:-1]) for name in sf_train_files]

with open('Stanford40/ImageSplits/test.txt', 'r') as f:
    # We won't use these splits but split them ourselves
    sf_test_files = [file_name for file_name in list(map(str.strip, f.readlines())) if '_'.join(file_name.split('_')[:-1]) in keep_stanford40]
    sf_test_labels = ['_'.join(name.split('_')[:-1]) for name in sf_test_files]

# Combine the splits and split for keeping more images in the training set than the test set.
sf_all_files = sf_train_files + sf_test_files
sf_all_labels = sf_train_labels + sf_test_labels
sf_train_validation_files, sf_test_files = train_test_split(sf_all_files, test_size=0.1, random_state=0, stratify=sf_all_labels)
sf_train_validation_labels = ['_'.join(name.split('_')[:-1]) for name in sf_train_validation_files]
sf_train_files, sf_validation_files = train_test_split(sf_train_validation_files, test_size=0.1, random_state=0, stratify=sf_train_validation_labels)

sf_train_labels = ['_'.join(name.split('_')[:-1]) for name in sf_train_files]
sf_train_labels = list(map(lambda x: keep_stanford40.index(x), sf_train_labels))
sf_test_labels = ['_'.join(name.split('_')[:-1]) for name in sf_test_files]
sf_test_labels = list(map(lambda x: keep_stanford40.index(x), sf_test_labels))
sf_validation_labels = ['_'.join(name.split('_')[:-1]) for name in sf_validation_files]
sf_validation_labels = list(map(lambda x: keep_stanford40.index(x), sf_validation_labels))

print(f'Train files ({len(sf_train_files)}):\n\t{sf_train_files}')
print(f'Train labels ({len(sf_train_labels)}):\n\t{map(str, sf_train_labels)}\n'\
      f'Train Distribution:{list(Counter(sorted(sf_train_labels)).items())}\n')
print(f'Test files ({len(sf_test_files)}):\n\t{sf_test_files}')
print(f'Test labels ({len(sf_test_labels)}):\n\t{map(str, sf_test_labels)}\n'\
      f'Test Distribution:{list(Counter(sorted(sf_test_labels)).items())}\n')
print(f'Validation files ({len(sf_validation_files)}):\n\t{sf_validation_files}')
print(f'Validation labels ({len(sf_validation_labels)}):\n\t{map(str, sf_validation_labels)}\n'\
      f'Validation Distribution:{list(Counter(sorted(sf_validation_labels)).items())}\n')

sf_action_categories = sorted(list(set(sf_train_labels)))
print(f'Action categories ({len(sf_action_categories)}):\n{sf_action_categories}')

Train files (2459):
	['riding_a_bike_140.jpg', 'running_196.jpg', 'jumping_005.jpg', 'riding_a_bike_282.jpg', 'climbing_130.jpg', 'running_168.jpg', 'waving_hands_168.jpg', 'riding_a_horse_161.jpg', 'climbing_048.jpg', 'pouring_liquid_053.jpg', 'riding_a_bike_292.jpg', 'running_039.jpg', 'drinking_220.jpg', 'drinking_005.jpg', 'smoking_030.jpg', 'riding_a_bike_286.jpg', 'drinking_025.jpg', 'applauding_215.jpg', 'smoking_117.jpg', 'waving_hands_196.jpg', 'applauding_140.jpg', 'riding_a_horse_292.jpg', 'throwing_frisby_013.jpg', 'smoking_172.jpg', 'applauding_005.jpg', 'climbing_210.jpg', 'pouring_liquid_104.jpg', 'riding_a_bike_128.jpg', 'waving_hands_041.jpg', 'riding_a_bike_083.jpg', 'shooting_an_arrow_048.jpg', 'drinking_034.jpg', 'drinking_223.jpg', 'drinking_100.jpg', 'running_188.jpg', 'pouring_liquid_031.jpg', 'smoking_184.jpg', 'jumping_225.jpg', 'jumping_272.jpg', 'riding_a_horse_069.jpg', 'jumping_273.jpg', 'smoking_178.jpg', 'climbing_253.jpg', 'jumping_185.jpg', 'pouring_liq

Resize images

In [5]:
sf_train_images = []
for img_nr in range(len(sf_train_files)):
  img = cv2.imread(f'Stanford40/JPEGImages/{sf_train_files[img_nr]}')
  img = cv2.resize(img, (224, 224), interpolation = cv2.INTER_AREA)
  cv2.imwrite(f'Stanford40/JPEGImages/{sf_train_files[img_nr]}', img)
  sf_train_images.append(img)
sf_train_images = np.array(sf_train_images) / 255.0
sf_train_labels = np.array(sf_train_labels)

sf_test_images = []
for img_nr in range(len(sf_test_files)):
  img = cv2.imread(f'Stanford40/JPEGImages/{sf_test_files[img_nr]}')
  img = cv2.resize(img, (224, 224), interpolation = cv2.INTER_AREA)
  cv2.imwrite(f'Stanford40/JPEGImages/{sf_test_files[img_nr]}', img)
  sf_test_images.append(img)
sf_test_images = np.array(sf_test_images) / 255.0
sf_test_labels = np.array(sf_test_labels)

sf_validation_images = []
for img_nr in range(len(sf_validation_files)):
  img = cv2.imread(f'Stanford40/JPEGImages/{sf_validation_files[img_nr]}')
  img = cv2.resize(img, (224, 224), interpolation = cv2.INTER_AREA)
  cv2.imwrite(f'Stanford40/JPEGImages/{sf_validation_files[img_nr]}', img)
  sf_validation_images.append(img)
sf_validation_images = np.array(sf_validation_images) / 255.0
sf_validation_labels = np.array(sf_validation_labels)



### Visualize a photo from the training files and also print its label

In [None]:
image_no = 232  # change this to a number between [0, 1200] and you can see a different training image
img = cv2.imread(f'Stanford40/JPEGImages/{sf_train_files[image_no]}')
print(f'An image with the label - {sf_train_labels[image_no]}')
#cv2_imshow(img)

Make Stanford CNN

In [22]:
#poging 1
filter_count = 32
kernel_size = (5, 5)
strides = (2)
batch_input_shape = (32, 224, 224, 3)

Base_Model = tf.keras.Sequential([  
    tf.keras.layers.Conv2D(filter_count, kernel_size, strides, padding='same', activation='relu'),
    tf.keras.layers.ReLU(),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Conv2D(filter_count, kernel_size, strides, padding='valid', activation='relu'),
    tf.keras.layers.ReLU(),
    tf.keras.layers.BatchNormalization(),
    
    tf.keras.layers.Conv2D(filter_count, kernel_size, strides, padding='valid', activation='relu'),
    tf.keras.layers.ReLU(),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Conv2D(filter_count, kernel_size, strides, padding='valid', activation='relu'),
    tf.keras.layers.ReLU(),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Conv2D(filter_count, kernel_size, strides, padding='valid', activation='relu'),
    tf.keras.layers.ReLU(),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Conv2D(filter_count, (3, 3), 2, padding='valid', activation='relu'),
    tf.keras.layers.ReLU(),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.50),
    tf.keras.layers.Dense(12, activation ='softmax')
])

opti = tf.keras.optimizers.Adam(learning_rate=0.001)

Base_Model.compile(optimizer=opti,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

Base_Model.build(batch_input_shape)
Base_Model.summary()

history_base_model = Base_Model.fit(sf_train_images, sf_train_labels, epochs=25, validation_data=(sf_validation_images, sf_validation_labels))

validate_loss, validate_acc = Base_Model.evaluate(sf_validation_images,  sf_validation_labels, verbose=2)

print(validate_loss)
print(validate_acc)

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_26 (Conv2D)          (32, 112, 112, 32)        2432      
                                                                 
 re_lu_12 (ReLU)             (32, 112, 112, 32)        0         
                                                                 
 batch_normalization_25 (Bat  (32, 112, 112, 32)       128       
 chNormalization)                                                
                                                                 
 conv2d_27 (Conv2D)          (32, 54, 54, 32)          25632     
                                                                 
 re_lu_13 (ReLU)             (32, 54, 54, 32)          0         
                                                                 
 batch_normalization_26 (Bat  (32, 54, 54, 32)         128       
 chNormalization)                                     

In [None]:
#poging 2
filter_count = 32
kernel_size = (5, 5)
strides = (2)
batch_input_shape = (32, 224, 224, 3)

Base_Model = tf.keras.Sequential([  
    tf.keras.layers.Conv2D(filter_count, kernel_size, 2, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Conv2D(filter_count, kernel_size, 3, padding='valid', activation='relu', kernel_regularizer=tf.keras.regularizers.L1(l=0.05)),
    tf.keras.layers.MaxPooling2D((3, 3)),
    tf.keras.layers.BatchNormalization(),
    
    tf.keras.layers.Conv2D(filter_count, kernel_size, strides, padding='valid', activation='relu'),
    #tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.BatchNormalization(),


    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.50),
    tf.keras.layers.Dense(12, activation ='softmax')
])

opti = tf.keras.optimizers.Adam(learning_rate=0.0001)

Base_Model.compile(optimizer=opti,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

Base_Model.build(batch_input_shape)
Base_Model.summary()

history_base_model = Base_Model.fit(sf_train_images, sf_train_labels, epochs=15, validation_data=(sf_validation_images, sf_validation_labels))

validate_loss, validate_acc = Base_Model.evaluate(sf_validation_images,  sf_validation_labels, verbose=2)

print(validate_loss)
print(validate_acc)

In [None]:
#poging 3
filter_count = 32
kernel_size = (3, 3)
strides = (2)
batch_input_shape = (32, 224, 224, 3)

Base_Model = tf.keras.Sequential([  
    tf.keras.layers.Conv2D(filter_count, kernel_size, 2, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Conv2D(filter_count, (5, 5), 3, padding='valid', activation='relu', kernel_regularizer=tf.keras.regularizers.L1(l=0.05)),
    tf.keras.layers.MaxPooling2D((3, 3)),
    tf.keras.layers.BatchNormalization(),
    
    tf.keras.layers.Conv2D(filter_count, (3, 3), strides, padding='valid', activation='relu'),
    #tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.BatchNormalization(),


    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Dense(12, activation ='softmax')
])

opti = tf.keras.optimizers.Adam(learning_rate=0.001)

Base_Model.compile(optimizer=opti,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

Base_Model.build(batch_input_shape)
Base_Model.summary()

history_base_model = Base_Model.fit(sf_train_images, sf_train_labels, epochs=15, validation_data=(sf_validation_images, sf_validation_labels))

validate_loss, validate_acc = Base_Model.evaluate(sf_validation_images,  sf_validation_labels, verbose=2)

print(validate_loss)
print(validate_acc)

In [52]:
#poging 4
filter_count = 32
kernel_size = (5, 5)
strides = (2)
batch_input_shape = (32, 224, 224, 3)

Base_Model = tf.keras.Sequential([  
    tf.keras.layers.Conv2D(filter_count, kernel_size, strides, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D((3, 3)),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Conv2D(filter_count, kernel_size, 3, padding='valid', activation='relu'),
    tf.keras.layers.MaxPooling2D((3, 3)),
    tf.keras.layers.BatchNormalization(),
    
    #tf.keras.layers.Conv2D(filter_count, kernel_size, strides, padding='valid', activation='relu'),
    #tf.keras.layers.BatchNormalization(),


    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.50),
    tf.keras.layers.Dense(12, activation ='softmax')
])

Base_Model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

Base_Model.build(batch_input_shape)
Base_Model.summary()

history_base_model = Base_Model.fit(sf_train_images, sf_train_labels, epochs=15, validation_data=(sf_validation_images, sf_validation_labels))

validate_loss, validate_acc = Base_Model.evaluate(sf_validation_images,  sf_validation_labels, verbose=2)

Base_Model.save_weights('Weights/StanfordModel')

print(validate_loss)

print(validate_acc)

Model: "sequential_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_31 (Conv2D)          (32, 112, 112, 32)        2432      
                                                                 
 max_pooling2d_30 (MaxPoolin  (32, 37, 37, 32)         0         
 g2D)                                                            
                                                                 
 batch_normalization_27 (Bat  (32, 37, 37, 32)         128       
 chNormalization)                                                
                                                                 
 conv2d_32 (Conv2D)          (32, 11, 11, 32)          25632     
                                                                 
 max_pooling2d_31 (MaxPoolin  (32, 3, 3, 32)           0         
 g2D)                                                            
                                                     

In [25]:
#poging 5
filter_count = 32
kernel_size = (5, 5)
strides = (2)
batch_input_shape = (32, 224, 224, 3)

Base_Model = tf.keras.Sequential([
    tf.keras.layers.RandomContrast(0.05),
    tf.keras.layers.RandomFlip("horizontal"),
    #tf.keras.layers.RandomBrightness(0.01),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.Conv2D(filter_count, kernel_size, strides, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D((3, 3)),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Conv2D(filter_count, kernel_size, 3, padding='valid', activation='relu'),
    tf.keras.layers.MaxPooling2D((3, 3)),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.50),
    tf.keras.layers.Dense(12, activation ='softmax')
])

Base_Model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

Base_Model.build(batch_input_shape)
Base_Model.summary()

lr_scheduler = tf.keras.callbacks.LearningRateScheduler(cyclical_learning_rate)

history_base_model = Base_Model.fit(sf_train_images, sf_train_labels, epochs=25, validation_data=(sf_validation_images, sf_validation_labels))

validate_loss, validate_acc = Base_Model.evaluate(sf_validation_images,  sf_validation_labels, verbose=2)

Base_Model.save_weights('Weights/StanfordModel')

print(validate_loss)
print(validate_acc)

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 random_contrast_5 (RandomCo  (32, 224, 224, 3)        0         
 ntrast)                                                         
                                                                 
 random_flip_5 (RandomFlip)  (32, 224, 224, 3)         0         
                                                                 
 random_rotation_5 (RandomRo  (32, 224, 224, 3)        0         
 tation)                                                         
                                                                 
 conv2d_36 (Conv2D)          (32, 112, 112, 32)        2432      
                                                                 
 max_pooling2d_18 (MaxPoolin  (32, 37, 37, 32)         0         
 g2D)                                                            
                                                     

In [24]:
#poging 5 with cyclical learning
filter_count = 32
kernel_size = (5, 5)
strides = (2)
batch_input_shape = (32, 224, 224, 3)

Base_Model = tf.keras.Sequential([
    tf.keras.layers.RandomContrast(0.05),
    tf.keras.layers.RandomFlip("horizontal"),
    #tf.keras.layers.RandomBrightness(0.01),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.Conv2D(filter_count, kernel_size, strides, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D((3, 3)),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Conv2D(filter_count, kernel_size, 3, padding='valid', activation='relu'),
    tf.keras.layers.MaxPooling2D((3, 3)),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.50),
    tf.keras.layers.Dense(12, activation ='softmax')
])

Base_Model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

Base_Model.build(batch_input_shape)
Base_Model.summary()

lr_scheduler = tf.keras.callbacks.LearningRateScheduler(cyclical_learning_rate)

history_base_model = Base_Model.fit(sf_train_images, sf_train_labels, epochs=25, validation_data=(sf_validation_images, sf_validation_labels), callbacks=[lr_scheduler])

validate_loss, validate_acc = Base_Model.evaluate(sf_validation_images,  sf_validation_labels, verbose=2)

Base_Model.save_weights('Weights/StanfordModel')

print(validate_loss)
print(validate_acc)

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 random_contrast_4 (RandomCo  (32, 224, 224, 3)        0         
 ntrast)                                                         
                                                                 
 random_flip_4 (RandomFlip)  (32, 224, 224, 3)         0         
                                                                 
 random_rotation_4 (RandomRo  (32, 224, 224, 3)        0         
 tation)                                                         
                                                                 
 conv2d_34 (Conv2D)          (32, 112, 112, 32)        2432      
                                                                 
 max_pooling2d_16 (MaxPoolin  (32, 37, 37, 32)         0         
 g2D)                                                            
                                                     

  output, from_logits = _get_logits(


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
9/9 - 0s - loss: 1.9013 - accuracy: 0.3321 - 121ms/epoch - 13ms/step
1.901329517364502
0.3321167826652527


In [11]:
try: del sf_train_images
except: print("sf_train_images is not defined")
try: del sf_train_labels
except: print("sf_train_labels is not defined")
try: del sf_validation_images
except: print("sf_validation_images is not defined")
try: del sf_validation_labels
except: print("sf_validation_labels is not defined")
try: del Base_Model
except: print("Base_Model is not defined")

gc.collect()

1687

# Human Motion Database 51 (HMDB51)
### Download the dataset

In [7]:
# Download HMDB51 data and splits from serre lab website
! wget http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar
! wget http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/test_train_splits.rar

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

--2023-04-13 23:31:30--  http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/test_train_splits.rar
Resolving serre-lab.clps.brown.edu (serre-lab.clps.brown.edu)... 128.148.254.114
Connecting to serre-lab.clps.brown.edu (serre-lab.clps.brown.edu)|128.148.254.114|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/test_train_splits.rar [following]
--2023-04-13 23:31:30--  https://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/test_train_splits.rar
Connecting to serre-lab.clps.brown.edu (serre-lab.clps.brown.edu)|128.148.254.114|:443... connected.
HTTP request sent, awai

# Extract and organize the data.

In [8]:
os.system("mkdir video_data test_train_splits")
os.system("unrar e test_train_splits.rar test_train_splits")
os.system("del test_train_splits.rar")
os.system("unrar e hmdb51_org.rar")
os.system("del hmdb51_org.rar")
os.system("move *.rar video_data")
keep_hmdb51 = ["clap", "climb", "drink", "jump", "pour", "ride_bike", "ride_horse", 
        "run", "shoot_bow", "smoke", "throw", "wave"]
for files in os.listdir('video_data'):
    foldername = files.split('.')[0]
    if foldername in keep_hmdb51:
      # extract only the relevant classes for the assignment.
      os.system("mkdir video_data\\" + foldername)
      os.system("unrar e video_data\\"+ files + " video_data\\"+foldername)

os.system("del video_data\*.rar")


0

# Split the dataset into train and test 

In [9]:
TRAIN_TAG, TEST_TAG = 1, 2
hm_train_files, hm_test_files = [], []
hm_train_labels, hm_test_labels = [], []
split_pattern_name = f"*test_split1.txt"
split_pattern_path = os.path.join('test_train_splits', split_pattern_name)
annotation_paths = glob.glob(split_pattern_path)
for filepath in annotation_paths:
    class_name = '_'.join(filepath.split('\\')[-1].split('_')[:-2])
    if class_name not in keep_hmdb51:
        continue  # skipping the classes that we won't use.
    with open(filepath) as fid:
        lines = fid.readlines()
    for line in lines:
        video_filename, tag_string = line.split()
        tag = int(tag_string)
        if tag == TRAIN_TAG:
            hm_train_files.append(video_filename)
            hm_train_labels.append(class_name)
        elif tag == TEST_TAG:
            hm_test_files.append(video_filename)
            hm_test_labels.append(class_name)

hm_train_files, hm_val_files, hm_train_labels, hm_val_labels = train_test_split(hm_train_files, hm_train_labels, test_size=0.1)

print(f'Train files ({len(hm_train_files)}):\n\t{hm_train_files}')
print(f'Train labels ({len(hm_train_labels)}):\n\t{hm_train_labels}\n'\
      f'Train Distribution:{list(Counter(sorted(hm_train_labels)).items())}\n')
print(f'Test files ({len(hm_test_files)}):\n\t{hm_test_files}')
print(f'Test labels ({len(hm_test_labels)}):\n\t{hm_test_labels}\n'\
      f'Test Distribution:{list(Counter(sorted(hm_test_labels)).items())}\n')
print(f'Validation files ({len(hm_val_files)}):\n\t{hm_val_files}')
print(f'Validation labels ({len(hm_val_labels)}):\n\t{hm_val_labels}\n'\
      f'Validation Distribution:{list(Counter(sorted(hm_val_labels)).items())}\n')
action_categories = sorted(list(set(hm_train_labels)))
print(f'Action categories ({len(action_categories)}):\n{action_categories}')

hm_train_labels_nr = np.array(list(map(lambda x: action_categories.index(x), hm_train_labels)))
hm_test_labels_nr = np.array(list(map(lambda x: action_categories.index(x), hm_test_labels)))
hm_val_labels_nr = np.array(list(map(lambda x: action_categories.index(x), hm_val_labels)))

Train files (756):
	['TheLastManOnearth_drink_f_cm_np1_ri_med_12.avi', 'Newall_Green_High_Students_Waving_Goodbye_wave_u_cm_np1_fr_med_0.avi', 'Radfahren_um_die_Aggertalsperre_06_09_2009_ride_bike_f_cm_np1_ba_med_13.avi', 'WeddingCrashers_drink_h_cm_np1_le_goo_5.avi', 'RETURN_OF_THE_KING_drink_f_nm_np1_fr_med_9.avi', 'more_smoking_smoke_h_cm_np1_ri_med_1.avi', 'Gregoire_Airman_showreel_2008_jump_f_cm_np1_ri_bad_6.avi', '#437_How_To_Ride_A_Bike_ride_bike_f_cm_np1_ba_med_1.avi', 'After_work_smoke_in_the_garage_smoke_h_nm_np1_fr_bad_0.avi', 'Spectacular_tea_pouring_pour_u_cm_np1_ri_med_1.avi', 'Kletterwand_climb_f_cm_np1_ba_goo_2.avi', 'RETURN_OF_THE_KING_drink_u_cm_np2_fr_goo_2.avi', 'ChikiMovie_ride_horse_f_cm_np1_ri_med_3.avi', 'Goalkeeper_Felix_Schwake_jump_f_cm_np1_le_bad_5.avi', 'Return_of_the_King_1_smoke_h_nm_np1_ri_goo_3.avi', 'Sommerland_Syd_sprung_in_den_tod_jump_f_cm_np1_fr_bad_0.avi', 'Song_I_Can_Wave_My_Hands_-_Cullen_s_Abc_s_clap_u_cm_np1_fr_med_3.avi', 'Veoh_Alpha_Dog_1_th

In [10]:
hm_train_images = []
hm_train_flow = []

hm_test_images = []
hm_test_flow = []

hm_val_images = []
hm_val_flow = []

def calcFlow(path, fname):
  cap = cv2.VideoCapture(path)

  half_point = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) // 2

  cap.set(cv2.CAP_PROP_POS_FRAMES, half_point-1)
  ret, frame1 = cap.read()


  cap.set(cv2.CAP_PROP_POS_FRAMES, half_point)
  ret, frame2 = cap.read()

  prvs = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
  next = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
  flow = cv2.calcOpticalFlowFarneback(prvs, next, None, 0.5, 3, 15, 3, 5, 1.2, 0)
  mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
  hsv = np.zeros_like(frame1)
  hsv[..., 0] = ang*180/np.pi/2
  hsv[..., 1] = 255
  hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
  bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
  frame2 = cv2.resize(frame2, (224, 224), interpolation = cv2.INTER_AREA)
  bgr = cv2.resize(bgr, (224, 224), interpolation = cv2.INTER_AREA)
    
  cv2.imwrite('video_data\\images\\' + fname[:-3] + '.png', frame2)
  cv2.imwrite('video_data\\flow\\' + fname[:-3] + '_flow.png', bgr)

  return (frame2, bgr)

for i, train_file in enumerate(hm_train_files):
  path = 'video_data\\' + hm_train_labels[i] + '\\' + train_file
  img, flow = calcFlow(path, train_file)
  hm_train_images.append(img)
  hm_train_flow.append(flow)

hm_train_images = np.array(hm_train_images) / 255.0
hm_train_flow = np.array(hm_train_flow) / 255.0

print("done train files")

for j, test_file in enumerate(hm_test_files):
  path = 'video_data\\' + hm_test_labels[j] + '\\' + test_file
  img, flow = calcFlow(path, test_file)
  hm_test_images.append(img)
  hm_test_flow.append(flow)

hm_test_images = np.array(hm_test_images) / 255.0
hm_test_flow = np.array(hm_test_flow) / 255.0

print("done test files")

for k, val_file in enumerate(hm_val_files):
  path = 'video_data\\' + hm_val_labels[k] + '\\' + val_file
  img, flow = calcFlow(path, val_file)
  hm_val_images.append(img)
  hm_val_flow.append(flow)

hm_val_images = np.array(hm_val_images) / 255.0
hm_val_flow = np.array(hm_val_flow) / 255.0

print("done val files")


done train files
done test files
done val files


Pretrain Model

In [15]:
filter_count = 32
kernel_size = (5, 5)
strides = (2)
batch_input_shape = (32, 224, 224, 3)

Pre_Trained_Base_Model = tf.keras.Sequential([
    tf.keras.layers.RandomContrast(0.05),
    tf.keras.layers.RandomFlip("horizontal"),
    #tf.keras.layers.RandomBrightness(0.01),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.Conv2D(filter_count, kernel_size, strides, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D((3, 3)),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Conv2D(filter_count, kernel_size, 3, padding='valid', activation='relu'),
    tf.keras.layers.MaxPooling2D((3, 3)),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.50),
    tf.keras.layers.Dense(12, activation ='softmax')
])

Pre_Trained_Base_Model.load_weights('Weights/StanfordModel')

opti = tf.keras.optimizers.Adam(learning_rate=0.00001)

Pre_Trained_Base_Model.compile(optimizer=opti,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

Pre_Trained_Base_Model.build(batch_input_shape)
Pre_Trained_Base_Model.summary()

history_base_model = Pre_Trained_Base_Model.fit(np.array(hm_train_images), np.array(hm_train_labels_nr), epochs=15, validation_data=(np.array(hm_val_images), np.array(hm_val_labels_nr)))

validate_loss, validate_acc = Pre_Trained_Base_Model.evaluate(np.array(hm_val_images),  np.array(hm_val_labels_nr), verbose=2)

print(validate_loss)
print(validate_acc)


Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 random_contrast_1 (RandomCo  (32, 224, 224, 3)        0         
 ntrast)                                                         
                                                                 
 random_flip_1 (RandomFlip)  (32, 224, 224, 3)         0         
                                                                 
 random_rotation_1 (RandomRo  (32, 224, 224, 3)        0         
 tation)                                                         
                                                                 
 conv2d_10 (Conv2D)          (32, 112, 112, 32)        2432      
                                                                 
 max_pooling2d_10 (MaxPoolin  (32, 37, 37, 32)         0         
 g2D)                                                            
                                                      

# Optical Flow

In [16]:
filter_count = 32
kernel_size = (5, 5)
strides = (2)
batch_input_shape = (32, 224, 224, 3)

Optical_Flow_Model = tf.keras.Sequential([
    tf.keras.layers.RandomContrast(0.05),
    tf.keras.layers.RandomFlip("horizontal"),
    #tf.keras.layers.RandomBrightness(0.01),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.Conv2D(filter_count, kernel_size, strides, activation="relu"),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Conv2D(filter_count, (3, 3), 1, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.50),
    tf.keras.layers.Dense(12, activation ='softmax')
])

opti = tf.keras.optimizers.Adam(learning_rate=0.001)

Optical_Flow_Model.compile(optimizer=opti,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

Optical_Flow_Model.build(batch_input_shape)
Optical_Flow_Model.summary()

history_base_model = Optical_Flow_Model.fit(np.array(hm_train_flow), np.array(hm_train_labels_nr), epochs=15, validation_data=(np.array(hm_val_flow), np.array(hm_val_labels_nr)))

validate_loss, validate_acc = Optical_Flow_Model.evaluate(np.array(hm_val_flow),  np.array(hm_val_labels_nr), verbose=2)

print(validate_loss)
print(validate_acc)


Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 random_contrast_2 (RandomCo  (32, 224, 224, 3)        0         
 ntrast)                                                         
                                                                 
 random_flip_2 (RandomFlip)  (32, 224, 224, 3)         0         
                                                                 
 random_rotation_2 (RandomRo  (32, 224, 224, 3)        0         
 tation)                                                         
                                                                 
 conv2d_12 (Conv2D)          (32, 110, 110, 32)        2432      
                                                                 
 max_pooling2d_12 (MaxPoolin  (32, 55, 55, 32)         0         
 g2D)                                                            
                                                      

In [17]:
Pre_Trained_Base_Model.trainable = False
Optical_Flow_Model.trainable = False

fusionLayers = tf.keras.layers.Add()([
    Pre_Trained_Base_Model.output,
    Optical_Flow_Model.output
])

# for bonus connect with 1x1 convolutionial layer change this to conv layer
fusionLayers = tf.keras.layers.Dense(12, activation='softmax')(fusionLayers)

print(Pre_Trained_Base_Model.input_shape)
print(Optical_Flow_Model.input_shape)

fusionModel = tf.keras.models.Model([
    Pre_Trained_Base_Model.input,
    Optical_Flow_Model.input
], fusionLayers)

opti = tf.keras.optimizers.Adam(learning_rate=0.001)

fusionModel.compile(optimizer=opti, loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])

fusionHistory = fusionModel.fit([hm_train_images,
                                 hm_train_flow],
                                hm_train_labels_nr,
                                validation_data=([hm_val_images, hm_val_flow],
                                                 hm_val_labels_nr),
                                epochs=20)

(None, 224, 224, 3)
(None, 224, 224, 3)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
