# Implementing C3D Model for Video Classification

In [2]:
%load_ext autoreload
%autoreload 2

from tensorflow_docs.vis import embed
from tensorflow import keras
from imutils import paths

import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np
import imageio
import pickle
import glob
import cv2
import sys
import os
import time

from sklearn import metrics
from sklearn.model_selection import KFold, StratifiedKFold
import seaborn as sns

 
# adding video-download folder to the system path
sys.path.insert(0, '/workspace/youtube-humpback-whale-classifier/video-download')
 
# importing read_frames_hdf5 function
from hdf5_data_loading import read_frames_hdf5

#ngc workspace path (where we keep our data)
workspace_path = '/mount/data'

In [3]:
import wandb

#start wandb session for metric logging
wandb.login() 

wandb.init(project="whale-classification-inception")

wandb.run.name = "feature-ext-limited-memory-growth"

wandb: Currently logged in as: micheller (epg). Use `wandb login --relogin` to force relogin


In [2]:
print("Num GPUs available: ", len(tf.config.list_physical_devices('GPU'))) #1 if we select GPU mode in Colab Notebook, 0 if running on local machine

Num GPUs available:  4


In [4]:
#limit GPU memory growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

4 Physical GPUs, 4 Logical GPUs


2022-07-26 21:32:56.612441: I tensorflow/core/platform/cpu_feature_guard.cc:152] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-26 21:33:01.153726: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14649 MB memory:  -> device: 0, name: Tesla V100-SXM2-16GB-N, pci bus id: 0000:85:00.0, compute capability: 7.0
2022-07-26 21:33:01.155976: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 14649 MB memory:  -> device: 1, name: Tesla V100-SXM2-16GB-N, pci bus id: 0000:86:00.0, compute capability: 7.0
2022-07-26 21:33:01.158005: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/repli

## Load Dataset

In [5]:
#load dataset in
data = pd.read_csv(workspace_path + '/downloaded_videos.csv')

In [6]:
y = data.pop('relevant')
X = data

In [4]:
labels = dict()
for i, row in data.iterrows():
    clip = row['renamed_title'].replace('_','_clip_').replace('.mp4','')
    labels[clip] = int(row['relevant'])

In [5]:
from sklearn.model_selection import train_test_split

y = data.pop('relevant')
X = data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)

In [6]:
#create partition and labels dict

partition = {'train': [video.replace('_','_clip_').replace('.mp4','') for video in X_train.renamed_title.tolist()],
             'validation': [video.replace('_','_clip_').replace('.mp4','') for video in X_test.renamed_title.tolist()]
            }

In [None]:
import numpy as np

from keras.models import Sequential
from DataGenerator import DataGenerator

# Parameters
params = {'batch_size': 1, 
          'n_classes': 2,
          'shuffle': False}

# Datasets
# partition = # IDs
# labels = # Labels

# Generators
training_generator = DataGenerator(partition['train'], labels, **params)
validation_generator = DataGenerator(partition['validation'], labels, **params)

# # Design model
# model = Sequential()
# [...] # Architecture
# model.compile()

# # Train model on dataset
# model.fit_generator(generator=training_generator,
#                     validation_data=validation_generator,
#                     use_multiprocessing=True,
#                     workers=6)

In [3]:
from cnn import CNN

ConvNet = CNN(224)
feature_extractor = ConvNet.ResNet50()

In [None]:
features = np.empty((364, 461, 2048), dtype=np.float32)
start = time.time()
features = feature_extractor.predict_on_batch(training_generator, steps = 364)
stop = time.time()

print(f"Got features in {stop-start} seconds.")
features.shape

## Load Frames

In [None]:
#load in frames for all videos
start = time.time()

N = X.shape[0] #number of videos in our dataset
videos = np.empty((N, 461, 224, 224, 3), dtype=np.float32)
labels = np.empty(N, dtype = np.uint8)

for i, video in enumerate(list(X.renamed_title)):
    if i % 50 == 0:
        print(f'Loading frames for video {i}...')
        
    clip_name = video.replace("_", "_clip_").replace(".mp4", "")
    frames, frame_labels = read_frames_hdf5(clip_name) #returns frames array of shape (461, 224, 224, 3)
    
    videos[i, ...] = frames
    labels[i] = frame_labels[0] #all frames have the same label since label is assigned to overall video

stop = time.time()
print(f'Done loading frames in {stop-start} seconds.')
videos.shape

Loading frames for video 0...
Loading frames for video 50...
Loading frames for video 100...
Loading frames for video 150...
Loading frames for video 200...
Loading frames for video 250...
Done loading frames in 1168.3442537784576 seconds.


(364, 461, 224, 224, 3)

In [8]:
with tf.device('/CPU:0'):
#     dataset = tf.data.Dataset.from_tensor_slices((videos, labels))
    dataset = tf.data.Dataset.from_tensor_slices(videos)
    dataset = dataset.batch(32)
    dataset = dataset.cache()
    dataset = dataset.prefetch(buffer_size=1)

2022-07-26 21:59:09.406830: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 101036802048 exceeds 10% of free system memory.


In [9]:
dataset

<PrefetchDataset element_spec=TensorSpec(shape=(None, 461, 224, 224, 3), dtype=tf.float32, name=None)>

## Get Features

In [10]:
#when we limit GPU memory growth,feature extractor takes up 700MiB / 16160MiB
#instead of 15096MiB / 16160MiB, which nearly exhausts all our GPU memory on GPU 0
from cnn import CNN

# with tf.device('/CPU:0'):
ConvNet = CNN(224)
feature_extractor = ConvNet.ResNet50()

feature_extractor.summary()

Model: "feature_extractor"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 tf.__operators__.getitem (S  (None, 224, 224, 3)      0         
 licingOpLambda)                                                 
                                                                 
 tf.nn.bias_add (TFOpLambda)  (None, 224, 224, 3)      0         
                                                                 
 resnet50 (Functional)       (None, 2048)              23587712  
                                                                 
Total params: 23,587,712
Trainable params: 23,534,592
Non-trainable params: 53,120
_________________________________________________________________


In [1]:
results = feature_extractor.predict(dataset)
results

NameError: name 'feature_extractor' is not defined

In [10]:
#try getting features and time
# with tf.device('/CPU:0'):
features = np.empty((N, 461, 2048), dtype=np.float32)
start = time.time()

for i, video in enumerate(videos):
    if i % 50 == 0:
        print(f"Video {i}...")
    features[i, ...] = feature_extractor.predict_on_batch(video) #video has shape (461, 224, 224, 3)

stop = time.time()
print(f"Finished extracting features from all {len(videos)} videos in {stop-start} seconds.")
features.shape

Video 0...
Video 50...
Video 100...
Video 150...
Video 200...
Video 250...
Video 300...
Video 350...
Finished extracting features from all 364 videos in 450.8314187526703 seconds.


(364, 461, 2048)

## Split Data

In [11]:
features.shape

(364, 461, 2048)

In [12]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)

train_index = list(X_train.index)
test_index = list(X_test.index)

# index data accordingly
train_features, train_labels = features[train_index], labels[train_index]
test_features, test_labels = features[test_index], labels[test_index]

# reshape label arrays as horizontal arrays
train_labels = np.reshape(train_labels, (train_labels.shape[0], 1))
test_labels = np.reshape(test_labels, (test_labels.shape[0], 1))

In [13]:
print(train_features.shape)
print(test_features.shape)

(291, 461, 2048)
(73, 461, 2048)


## Train RNN Model

In [14]:
from keras.layers import Dense, Flatten, Conv3D, MaxPooling3D, Dropout, BatchNormalization

from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPUs will likely run quickly with dtype policy mixed_float16 as they all have compute capability of at least 7.0


In [15]:
# recurrent_dropout does not allow training to use cuDNN kernel
features_input       = keras.Input((461, 2048))
x                    = keras.layers.Bidirectional(keras.layers.LSTM(32, return_sequences=True, recurrent_dropout=0.5))(features_input)
x                    = keras.layers.Bidirectional(keras.layers.LSTM(32, return_sequences=True))(x)
x                    = keras.layers.Bidirectional(keras.layers.LSTM(32, return_sequences=True, recurrent_dropout=0.5))(x)
x                    = keras.layers.Bidirectional(keras.layers.LSTM(32))(x)

output               = keras.layers.Dense(2, activation="softmax")(x) #2 bc 2 class categories (0,1)
model                = keras.Model(features_input, output)

model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

my_callbacks    = [keras.callbacks.EarlyStopping(monitor="val_accuracy", 
                                                 patience=3,
                                                 mode="max",
                                                 min_delta = 0.01,
                                                 restore_best_weights=True)]
history = model.fit(train_features, 
                    train_labels,
                    validation_split = 0.2,
                    epochs = 15,
                    callbacks = my_callbacks,
                    verbose= 1)

print('Done training.')

loss, accuracy = model.evaluate(test_features, test_labels)
print(f"Test Metrics - Loss: {loss}, Accuracy: {accuracy}")

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Done training.
Test Metrics - Loss: 0.5353636145591736, Accuracy: 0.7397260069847107


In [16]:
#remove recurrent_dropout to train model using cuDNN kernel

features_input       = keras.Input((461, 2048))
x                    = keras.layers.Bidirectional(keras.layers.LSTM(32, return_sequences=True))(features_input)
x                    = keras.layers.Bidirectional(keras.layers.LSTM(32, return_sequences=True))(x)
x                    = keras.layers.Bidirectional(keras.layers.LSTM(32, return_sequences=True))(x)
x                    = keras.layers.Bidirectional(keras.layers.LSTM(32))(x)

output               = keras.layers.Dense(2, activation="softmax")(x) #2 bc 2 class categories (0,1)
model                = keras.Model(features_input, output)

model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

my_callbacks    = [keras.callbacks.EarlyStopping(monitor="val_accuracy", 
                                                 patience=3,
                                                 mode="max",
                                                 min_delta = 0.01,
                                                 restore_best_weights=True)]
history = model.fit(train_features, 
                    train_labels,
                    validation_split = 0.2,
                    epochs = 15,
                    callbacks = my_callbacks,
                    verbose= 1)

print('Done training.')

loss, accuracy = model.evaluate(test_features, test_labels)
print(f"Test Metrics - Loss: {loss}, Accuracy: {accuracy}")

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Done training.
Test Metrics - Loss: 0.8408470749855042, Accuracy: 0.6849315166473389


## Trying to Distribute Data + Run RNN Model

In [19]:
strategy = tf.distribute.MirroredStrategy()

global_batch_size = 91*4
dataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels)).batch(global_batch_size)
dist_dataset = strategy.experimental_distribute_dataset(dataset)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


2022-07-25 23:17:29.071859: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_2"
op: "TensorSliceDataset"
input: "Placeholder/_0"
input: "Placeholder/_1"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_FLOAT
      type: DT_UINT8
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 291
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\027TensorSliceDataset:1273"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: 461
        }
        dim {
          size: 2048
        }
      }
      shape {
        dim {
          size: 1
        }
      }
    }
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_P

In [17]:
# with strategy.scope():
#     features_input       = keras.Input((461, 2048))
#     x                    = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True))(features_input)
#     x                    = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True))(x)
#     x                    = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True))(x)
#     x                    = keras.layers.Bidirectional(keras.layers.LSTM(128))(x)

#     output               = keras.layers.Dense(2, activation="softmax")(x) #2 bc 2 class categories (0,1)
#     model                = keras.Model(features_input, output)

#     model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# history = model.fit(dist_dataset,
#                     epochs = 15,
#                     steps_per_epoch = 4,
#                     verbose= 1)

# print('Done training.')

# loss, accuracy = model.evaluate(test_features, test_labels)
# print(f"Test Metrics - Loss: {loss}, Accuracy: {accuracy}")

In [14]:
#log training and validation metrics on wandb
for epoch, train_loss in enumerate(history.history['loss']):
    wandb.log({'training_loss': train_loss, "epoch": epoch})
    
for epoch, train_acc in enumerate(history.history['accuracy']):
    wandb.log({'training_accuracy': train_acc, "epoch": epoch})
    
for epoch, val_loss in enumerate(history.history['val_loss']):
    wandb.log({'val_loss': val_loss, "epoch": epoch})
    
for epoch, val_acc in enumerate(history.history['val_accuracy']):
    wandb.log({'val_accuracy': val_acc, "epoch": epoch})
    
print('Done Logging WandB metrics.')

Done Logging WandB metrics.


In [18]:
# wandb.finish()