In [1]:
cd set3

/media/datastorage/Phong/ufc101/set3


In [2]:
from tensorflow_docs.vis import embed
from tensorflow import keras
from imutils import paths

import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np
import imageio
import cv2
import os

In [3]:
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 20

MAX_SEQ_LENGTH = 40
NUM_FEATURES = 2048

In [4]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

print(f"Total videos for training: {len(train_df)}")
print(f"Total videos for testing: {len(test_df)}")

train_df.sample(10)

Total videos for training: 9624
Total videos for testing: 3696


Unnamed: 0,video_name,tag
2768,v_FieldHockeyPenalty_g05_c07.avi,FieldHockeyPenalty
6062,v_PlayingGuitar_g05_c04.avi,PlayingGuitar
231,v_Archery_g08_c02.avi,Archery
5111,v_MilitaryParade_g22_c03.avi,MilitaryParade
8448,v_Swing_g07_c01.avi,Swing
53,v_ApplyEyeMakeup_g09_c05.avi,ApplyEyeMakeup
990,v_Biking_g01_c02.avi,Biking
7083,v_RockClimbingIndoor_g13_c03.avi,RockClimbingIndoor
586,v_BaseballPitch_g02_c03.avi,BaseballPitch
5620,v_PlayingCello_g08_c01.avi,PlayingCello


In [5]:
# The following two methods are taken from this tutorial:
# https://www.tensorflow.org/hub/tutorials/action_recognition_with_tf_hub


def crop_center_square(frame):
    y, x = frame.shape[0:2]
    min_dim = min(y, x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]


def load_video(path, max_frames=0, resize=(IMG_SIZE, IMG_SIZE)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, [2, 1, 0]]
            frames.append(frame)

            if len(frames) == max_frames:
                break
    finally:
        cap.release()
    return np.array(frames)


In [6]:

def build_feature_extractor():
    feature_extractor = keras.applications.ResNet152(
        weights="imagenet",
        include_top=False,
        pooling="avg",
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
    )
    preprocess_input = keras.applications.resnet.preprocess_input

    inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
    preprocessed = preprocess_input(inputs)

    outputs = feature_extractor(preprocessed)
    return keras.Model(inputs, outputs, name="feature_extractor")


feature_extractor = build_feature_extractor()

2023-02-01 16:05:50.511151: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-01 16:05:51.104225: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10415 MB memory:  -> device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1


In [5]:
label_processor = keras.layers.StringLookup(
    num_oov_indices=0, vocabulary=np.unique(train_df["tag"])
)
print(label_processor.get_vocabulary())

['ApplyEyeMakeup', 'ApplyLipstick', 'Archery', 'BabyCrawling', 'BalanceBeam', 'BandMarching', 'BaseballPitch', 'Basketball', 'BasketballDunk', 'BenchPress', 'Biking', 'Billiards', 'BlowDryHair', 'BlowingCandles', 'BodyWeightSquats', 'Bowling', 'BoxingPunchingBag', 'BoxingSpeedBag', 'BreastStroke', 'BrushingTeeth', 'CleanAndJerk', 'CliffDiving', 'CricketBowling', 'CricketShot', 'CuttingInKitchen', 'Diving', 'Drumming', 'Fencing', 'FieldHockeyPenalty', 'FloorGymnastics', 'FrisbeeCatch', 'FrontCrawl', 'GolfSwing', 'Haircut', 'HammerThrow', 'Hammering', 'HandstandPushups', 'HandstandWalking', 'HeadMassage', 'HighJump', 'HorseRace', 'HorseRiding', 'HulaHoop', 'IceDancing', 'JavelinThrow', 'JugglingBalls', 'JumpRope', 'JumpingJack', 'Kayaking', 'Knitting', 'LongJump', 'Lunges', 'MilitaryParade', 'Mixing', 'MoppingFloor', 'Nunchucks', 'ParallelBars', 'PizzaTossing', 'PlayingCello', 'PlayingDaf', 'PlayingDhol', 'PlayingFlute', 'PlayingGuitar', 'PlayingPiano', 'PlayingSitar', 'PlayingTabla', 'P

2023-02-02 16:03:25.337968: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-02 16:03:26.548518: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10415 MB memory:  -> device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1


In [8]:
from tqdm import tqdm

def prepare_all_videos(df, root_dir):
    num_samples = len(df)
    video_paths = df["video_name"].values.tolist()
    labels = df["tag"].values
    labels = label_processor(labels[..., None]).numpy()

    # `frame_masks` and `frame_features` are what we will feed to our sequence model.
    # `frame_masks` will contain a bunch of booleans denoting if a timestep is
    # masked with padding or not.
    frame_masks = np.zeros(shape=(num_samples, MAX_SEQ_LENGTH), dtype="bool")
    frame_features = np.zeros(
        shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
    )

    # For each video.
    for idx, path in tqdm(enumerate(video_paths)):
        # Gather all its frames and add a batch dimension.
        frames = load_video(os.path.join(root_dir, path))
        frames = frames[None, ...]

        # Initialize placeholders to store the masks and features of the current video.
        temp_frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
        temp_frame_features = np.zeros(
            shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
        )

        # Extract features from the frames of the current video.
        for i, batch in enumerate(frames):
            video_length = batch.shape[0]
            length = min(MAX_SEQ_LENGTH, video_length)
            for j in range(length):
                temp_frame_features[i, j, :] = feature_extractor.predict(
                    batch[None, j, :]
                )
            temp_frame_mask[i, :length] = 1  # 1 = not masked, 0 = masked

        frame_features[idx,] = temp_frame_features.squeeze()
        frame_masks[idx,] = temp_frame_mask.squeeze()

    return (frame_features, frame_masks), labels


# train_data, train_labels = prepare_all_videos(train_df, "train")
test_data, test_labels = prepare_all_videos(test_df, "test")

# print(f"Frame features in train set: {train_data[0].shape}")
# print(f"Frame masks in train set: {train_data[1].shape}")

0it [00:00, ?it/s]2023-02-01 16:06:05.431316: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8100
2023-02-01 16:06:05.682521: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-02-01 16:06:05.682989: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-02-01 16:06:05.683008: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2023-02-01 16:06:05.684958: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-02-01 16:06:05.685015: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.
3696it [2:53:16,  2.81s/it]


In [9]:
train_data, train_labels = prepare_all_videos(train_df, "train")

9624it [7:44:25,  2.90s/it]


In [None]:
# # Utility for our sequence model.
# # Bidirectional

# def get_sequence_model():
#     class_vocab = label_processor.get_vocabulary()

#     frame_features_input = keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES))
#     mask_input = keras.Input((MAX_SEQ_LENGTH,), dtype="bool")

#     # Refer to the following tutorial to understand the significance of using `mask`:
#     # https://keras.io/api/layers/recurrent_layers/gru/
#     x = keras.layers.Bidirectional(keras.layers.LSTM(2048, return_sequences=False,dropout=0.5))(#16, 512
#         frame_features_input, mask=mask_input
#     )
# #     x = keras.layers.LSTM(2048)(x)#8, 256
# #     x = keras.layers.Dropout(0.1)(x)
#     x = keras.layers.Dense(1024)(x)#8, 256
#     x = keras.layers.LeakyReLU()(x)
#     output = keras.layers.Dense(len(class_vocab), activation="softmax")(x)

#     rnn_model = keras.Model([frame_features_input, mask_input], output)

#     rnn_model.compile(
#         loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
#     )
#     return rnn_model


# # Utility for running experiments.
# def run_experiment():
#     filepath = "/tmp/video_classifier"
#     checkpoint = keras.callbacks.ModelCheckpoint(
#         filepath, save_weights_only=True, save_best_only=True, verbose=1
#     )

#     seq_model = get_sequence_model()
#     print(seq_model.summary())
    
#     history = seq_model.fit(
#         [train_data[0], train_data[1]],
#         train_labels,
#         validation_split=0.2,
#         epochs=EPOCHS,
#         callbacks=[checkpoint],
#     )

#     seq_model.load_weights(filepath)
#     _, accuracy = seq_model.evaluate([test_data[0], test_data[1]], test_labels)
#     print(f"Test accuracy: {round(accuracy * 100, 2)}%")

#     return history, seq_model


# _, sequence_model = run_experiment()

In [None]:
#40.13% LSTM(2048,dropout=0.1)
#46.47% LSTM(2048,dropout=0.5)
#43.96% LSTM(4096,dropout=0.5)

In [None]:
# class_vocab = label_processor.get_vocabulary()

# frame_features_input = keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES))
# mask_input = keras.Input((MAX_SEQ_LENGTH,), dtype="bool")

# # Refer to the following tutorial to understand the significance of using `mask`:
# # https://keras.io/api/layers/recurrent_layers/gru/
# x = keras.layers.LSTM(2048, return_sequences=True,dropout=0.1)(#16, 512
#     frame_features_input, mask=mask_input
# )
# #     x = keras.layers.LSTM(2048)(x)#8, 256
# #     x = keras.layers.Dropout(0.1)(x)
# x = keras.layers.Dense(1024, activation="relu")(x)#8, 256
# output = keras.layers.Dense(len(class_vocab), activation="softmax")(x)

# rnn_model = keras.Model([frame_features_input, mask_input], output)

# rnn_model.compile(
#     loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
# )

# print(rnn_model.summary())

In [None]:
# class_vocab = label_processor.get_vocabulary()

# frame_features_input = keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES))
# mask_input = keras.Input((MAX_SEQ_LENGTH,), dtype="bool")

# # Refer to the following tutorial to understand the significance of using `mask`:
# # https://keras.io/api/layers/recurrent_layers/gru/
# x = keras.layers.LSTM(2048, return_sequences=False,dropout=0.1)(#16, 512
#     frame_features_input, mask=mask_input
# )
# # x = keras.layers.LSTM(2048)(x)#8, 256
# #     x = keras.layers.Dropout(0.1)(x)
# x = keras.layers.Dense(1024, activation="relu")(x)#8, 256
# output = keras.layers.Dense(len(class_vocab), activation="softmax")(x)

# rnn_model = keras.Model([frame_features_input, mask_input], output)

# rnn_model.compile(
#     loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
# )

# print(rnn_model.summary())

In [None]:

# def prepare_single_video(frames):
#     frames = frames[None, ...]
#     frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
#     frame_features = np.zeros(shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32")

#     for i, batch in enumerate(frames):
#         video_length = batch.shape[0]
#         length = min(MAX_SEQ_LENGTH, video_length)
#         for j in range(length):
#             frame_features[i, j, :] = feature_extractor.predict(batch[None, j, :])
#         frame_mask[i, :length] = 1  # 1 = not masked, 0 = masked

#     return frame_features, frame_mask


# def sequence_prediction(path):
#     class_vocab = label_processor.get_vocabulary()

#     frames = load_video(os.path.join("test", path))
#     frame_features, frame_mask = prepare_single_video(frames)
#     probabilities = sequence_model.predict([frame_features, frame_mask])[0]

#     for i in np.argsort(probabilities)[::-1]:
#         print(f"  {class_vocab[i]}: {probabilities[i] * 100:5.2f}%")
#     return frames


# # This utility is for visualization.
# # Referenced from:
# # https://www.tensorflow.org/hub/tutorials/action_recognition_with_tf_hub
# def to_gif(images):
#     converted_images = images.astype(np.uint8)
#     imageio.mimsave("animation.gif", converted_images, fps=10)
#     return embed.embed_file("animation.gif")


# test_video = np.random.choice(test_df["video_name"].values.tolist())
# print(f"Test video path: {test_video}")
# test_frames = sequence_prediction(test_video)
# to_gif(test_frames[:MAX_SEQ_LENGTH])

In [14]:
import pickle

with open('ResNet152_CNNRNN_train_40_set3.pickle', 'wb') as f:
    pickle.dump(train_data, f)

In [15]:
import pickle

with open('ResNet152_CNNRNN_test_40_set3.pickle', 'wb') as f:
    pickle.dump(test_data, f)

In [16]:
import numpy as np
import tensorflow as tf 

# np.save('densenet_train_data.npy', train_data)    # .npy extension is added if not given
# np.save('densenet_test_data.npy', test_data)    # .npy extension is added if not given
np.save('ResNet152_CNNRNN_train_40_labels_set3.npy', train_labels)    # .npy extension is added if not given
np.save('ResNet152_CNNRNN_test_40_labels_set3.npy', test_labels)    # .npy extension is added if not given

In [None]:
# print(train_data[0].shape)
# print(train_data[1].shape)

In [None]:
# print(train_data.type)

In [10]:
# import pickle

# with open('Resnet152_CNNRNN_train_100_set3.pickle', 'wb') as f:
#     pickle.dump(train_data, f)

In [11]:
# import pickle

# with open('Resnet152_CNNRNN_test_100_set3.pickle', 'wb') as f:
#     pickle.dump(test_data, f)

In [12]:
# import numpy as np
# import tensorflow as tf 

# # np.save('densenet_train_data.npy', train_data)    # .npy extension is added if not given
# # np.save('densenet_test_data.npy', test_data)    # .npy extension is added if not given
# np.save('Resnet152_CNNRNN_train_labels_set3.npy', train_labels)    # .npy extension is added if not given
# np.save('Resnet152_CNNRNN_test_labels_set3.npy', test_labels)    # .npy extension is added if not given

In [None]:
# ls -l

In [None]:
# mv Inception_CNNRNN_20.pickle Inception_CNNRNN_train_20.pickle

In [6]:
import numpy as np
import tensorflow as tf 
import pickle

with open('ResNet152_CNNRNN_train_40_set3.pickle', 'rb') as f:
     d_train_data = pickle.load(f)
with open('ResNet152_CNNRNN_test_40_set3.pickle', 'rb') as f:
     d_test_data = pickle.load(f)
        
# d_train_data = np.load('densenet_train_data.npy')
# d_test_data = np.load('densenet_test_data.npy')
d_train_labels = np.load('ResNet152_CNNRNN_train_40_labels_set3.npy')
d_test_labels = np.load('ResNet152_CNNRNN_test_40_labels_set3.npy')

In [8]:
label_processor = keras.layers.StringLookup(
    num_oov_indices=0, vocabulary=np.unique(train_df["tag"])
)
# print(label_processor.get_vocabulary())

def get_sequence_model():
    class_vocab = label_processor.get_vocabulary()

    frame_features_input = keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES))
    mask_input = keras.Input((MAX_SEQ_LENGTH,), dtype="bool")

    # Refer to the following tutorial to understand the significance of using `mask`:
    # https://keras.io/api/layers/recurrent_layers/gru/
    x = keras.layers.LSTM(2048, return_sequences=False,dropout=0.5)(#16, 512
        frame_features_input, mask=mask_input
    )    
#     x = keras.layers.Bidirectional(keras.layers.LSTM(2048, return_sequences=False,dropout=0.5),merge_mode='concat')(#16, 512
#         frame_features_input, mask=mask_input
#     )
#     x = keras.layers.LSTM(2048)(x)#8, 256
#     x = keras.layers.Dropout(0.1)(x)
    x = keras.layers.Dense(1024)(x)#8, 256
    x = keras.layers.GaussianNoise(0.4)(x)
    x = keras.layers.LeakyReLU(0.1)(x)    
    output = keras.layers.Dense(len(class_vocab), activation="softmax")(x)

    rnn_model = keras.Model([frame_features_input, mask_input], output)

#     rnn_model.compile(
#         loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
#     )
    return rnn_model

# Utility for running experiments.
def d_run_experiment():
    filepath = "ResNet152_CNNRNN2-set3-seq40"
    checkpoint = keras.callbacks.ModelCheckpoint(
        filepath, save_weights_only=True, save_best_only=True, verbose=1
    )

    seq_model = get_sequence_model()
    print(seq_model.summary())
    
    optimizer = keras.optimizers.SGD(lr=1e-1)
#     optimizer = keras.optimizers.Adadelta()
    
    seq_model.compile(
        loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"]
    )    
    
    history = seq_model.fit(
        [d_train_data[0], d_train_data[1]],
        d_train_labels,
#         validation_split=0.2,
        validation_data=([d_test_data[0], d_test_data[1]],
        d_test_labels),
        epochs=100,
#         epochs=EPOCHS,
        callbacks=[checkpoint],
    )

    seq_model.load_weights(filepath)
    _, accuracy = seq_model.evaluate([d_test_data[0], d_test_data[1]], d_test_labels)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%")

    return history, seq_model


d_run_experiment()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 40, 2048)]   0           []                               
                                                                                                  
 input_4 (InputLayer)           [(None, 40)]         0           []                               
                                                                                                  
 lstm_1 (LSTM)                  (None, 2048)         33562624    ['input_3[0][0]',                
                                                                  'input_4[0][0]']                
                                                                                                  
 dense_2 (Dense)                (None, 1024)         2098176     ['lstm_1[0][0]']           

2023-02-02 17:12:25.850980: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 3153592320 exceeds 10% of free system memory.
2023-02-02 17:12:27.826072: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 3153592320 exceeds 10% of free system memory.


Epoch 1/100
Epoch 1: val_loss improved from inf to 1.76351, saving model to ResNet152_CNNRNN2-set3-seq40
Epoch 2/100
Epoch 2: val_loss improved from 1.76351 to 1.46764, saving model to ResNet152_CNNRNN2-set3-seq40
Epoch 3/100
Epoch 3: val_loss improved from 1.46764 to 1.36142, saving model to ResNet152_CNNRNN2-set3-seq40
Epoch 4/100
Epoch 4: val_loss did not improve from 1.36142
Epoch 5/100
Epoch 5: val_loss improved from 1.36142 to 1.34996, saving model to ResNet152_CNNRNN2-set3-seq40
Epoch 6/100
Epoch 6: val_loss improved from 1.34996 to 1.22006, saving model to ResNet152_CNNRNN2-set3-seq40
Epoch 7/100
Epoch 7: val_loss improved from 1.22006 to 1.21166, saving model to ResNet152_CNNRNN2-set3-seq40
Epoch 8/100
Epoch 8: val_loss did not improve from 1.21166
Epoch 9/100
Epoch 9: val_loss did not improve from 1.21166
Epoch 10/100
Epoch 10: val_loss did not improve from 1.21166
Epoch 11/100
Epoch 11: val_loss did not improve from 1.21166
Epoch 12/100
Epoch 12: val_loss did not improve fro

Epoch 29: val_loss did not improve from 1.21166
Epoch 30/100
Epoch 30: val_loss did not improve from 1.21166
Epoch 31/100
Epoch 31: val_loss did not improve from 1.21166
Epoch 32/100
Epoch 32: val_loss did not improve from 1.21166
Epoch 33/100
Epoch 33: val_loss did not improve from 1.21166
Epoch 34/100
Epoch 34: val_loss did not improve from 1.21166
Epoch 35/100
Epoch 35: val_loss did not improve from 1.21166
Epoch 36/100
Epoch 36: val_loss did not improve from 1.21166
Epoch 37/100
Epoch 37: val_loss did not improve from 1.21166
Epoch 38/100
Epoch 38: val_loss did not improve from 1.21166
Epoch 39/100
Epoch 39: val_loss did not improve from 1.21166
Epoch 40/100
Epoch 40: val_loss did not improve from 1.21166
Epoch 41/100
Epoch 41: val_loss did not improve from 1.21166
Epoch 42/100
Epoch 42: val_loss did not improve from 1.21166
Epoch 43/100
Epoch 43: val_loss did not improve from 1.21166
Epoch 44/100
Epoch 44: val_loss did not improve from 1.21166
Epoch 45/100
Epoch 45: val_loss did n

Epoch 58/100
Epoch 58: val_loss did not improve from 1.21166
Epoch 59/100
Epoch 59: val_loss did not improve from 1.21166
Epoch 60/100
Epoch 60: val_loss did not improve from 1.21166
Epoch 61/100
Epoch 61: val_loss did not improve from 1.21166
Epoch 62/100
Epoch 62: val_loss did not improve from 1.21166
Epoch 63/100
Epoch 63: val_loss did not improve from 1.21166
Epoch 64/100
Epoch 64: val_loss did not improve from 1.21166
Epoch 65/100
Epoch 65: val_loss did not improve from 1.21166
Epoch 66/100
Epoch 66: val_loss did not improve from 1.21166
Epoch 67/100
Epoch 67: val_loss did not improve from 1.21166
Epoch 68/100
Epoch 68: val_loss did not improve from 1.21166
Epoch 69/100
Epoch 69: val_loss did not improve from 1.21166
Epoch 70/100
Epoch 70: val_loss did not improve from 1.21166
Epoch 71/100
Epoch 71: val_loss did not improve from 1.21166
Epoch 72/100
Epoch 72: val_loss did not improve from 1.21166
Epoch 73/100
Epoch 73: val_loss did not improve from 1.21166
Epoch 74/100
Epoch 74: v

Epoch 87/100
Epoch 87: val_loss did not improve from 1.21166
Epoch 88/100
Epoch 88: val_loss did not improve from 1.21166
Epoch 89/100
Epoch 89: val_loss did not improve from 1.21166
Epoch 90/100
Epoch 90: val_loss did not improve from 1.21166
Epoch 91/100
Epoch 91: val_loss did not improve from 1.21166
Epoch 92/100
Epoch 92: val_loss did not improve from 1.21166
Epoch 93/100
Epoch 93: val_loss did not improve from 1.21166
Epoch 94/100
Epoch 94: val_loss did not improve from 1.21166
Epoch 95/100
Epoch 95: val_loss did not improve from 1.21166
Epoch 96/100
Epoch 96: val_loss did not improve from 1.21166
Epoch 97/100
Epoch 97: val_loss did not improve from 1.21166
Epoch 98/100
Epoch 98: val_loss did not improve from 1.21166
Epoch 99/100
Epoch 99: val_loss did not improve from 1.21166
Epoch 100/100
Epoch 100: val_loss did not improve from 1.21166
Test accuracy: 70.81%


(<keras.callbacks.History at 0x7ff9e8153cd0>,
 <keras.engine.functional.Functional at 0x7ff9d06894f0>)

In [None]:
#lr=0.1 = 0.7577
#lr=0.01 = 0.7746
#lr=0.001 = 0.7635

In [None]:
# label_processor = keras.layers.StringLookup(
#     num_oov_indices=0, vocabulary=np.unique(train_df["tag"])
# )
# # print(label_processor.get_vocabulary())

# def get_sequence_model():
#     class_vocab = label_processor.get_vocabulary()

#     frame_features_input = keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES))
#     mask_input = keras.Input((MAX_SEQ_LENGTH,), dtype="bool")

#     # Refer to the following tutorial to understand the significance of using `mask`:
#     # https://keras.io/api/layers/recurrent_layers/gru/
#     x = keras.layers.LSTM(2048, return_sequences=False,dropout=0.5)(#16, 512
#         frame_features_input, mask=mask_input
#     )    
# #     x = keras.layers.Bidirectional(keras.layers.LSTM(2048, return_sequences=False,dropout=0.5),merge_mode='concat')(#16, 512
# #         frame_features_input, mask=mask_input
# #     )
# #     x = keras.layers.LSTM(2048)(x)#8, 256
# #     x = keras.layers.Dropout(0.1)(x)
#     x = keras.layers.Dense(1024)(x)#8, 256
#     x = keras.layers.GaussianNoise(0.4)(x)
#     x = keras.layers.LeakyReLU(0.1)(x)    
#     output = keras.layers.Dense(len(class_vocab), activation="softmax")(x)

#     rnn_model = keras.Model([frame_features_input, mask_input], output)

# #     rnn_model.compile(
# #         loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
# #     )
#     return rnn_model

# # Utility for running experiments.
# def d_run_experiment():
#     filepath = "/tmp/video_classifier"
#     checkpoint = keras.callbacks.ModelCheckpoint(
#         filepath, save_weights_only=True, save_best_only=True, verbose=1
#     )

#     seq_model = get_sequence_model()
#     print(seq_model.summary())
    

#     lr_schedule = keras.callbacks.LearningRateScheduler(
#                   lambda epoch: 1e-6 * 10**(4*epoch / 10))
    
#     optimizer = keras.optimizers.SGD(lr=1e-3)
    
#     seq_model.compile(
#         loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"]
#     )    
# #     seq_model.compile(optimizer=optimizer,
# #                       loss='categorical_crossentropy',
# #                      metrics=['accuracy'])     
    
#     history = seq_model.fit(
#         [d_train_data[0], d_train_data[1]],
#         d_train_labels,
# #         validation_split=0.2,
#         validation_data=([d_test_data[0], d_test_data[1]],
#         d_test_labels),
#         epochs=20,
# #         epochs=EPOCHS,
#         callbacks=[lr_schedule,checkpoint],
#     )

#     seq_model.load_weights(filepath)
#     _, accuracy = seq_model.evaluate([d_test_data[0], d_test_data[1]], d_test_labels)
#     print(f"Test accuracy: {round(accuracy * 100, 2)}%")

#     return history, seq_model


# his, seq = d_run_experiment()


In [None]:
# from matplotlib import pyplot as plt

# plt.semilogx(his.history['lr'], his.history['accuracy'])
# plt.axis([1e-6, 1, 0, 1])
# plt.xlabel('lr')
# plt.ylabel('accuracy')
# plt.show()

In [None]:
import numpy as np

MAX_SEQ_LENGTH = 50#override max sequence length
#max_seq=40
# copy_indices=[0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38]
# copy_indices=[0,4,8,12,16,20,24,28,32,36]
# copy_indices=[0,5,10,15,20,25,30,35]
# copy_indices=[0,8,16,24,32]
# copy_indices=[0,19]
# copy_indices=[0]
# max_seq=100
# # copy_indices=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,
#               26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,
#               51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,
#               76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99]
copy_indices=[0,2,4,6,8,10,12,14,16,18,
              20,22,24,26,28,30,32,34,36,38,
              40,42,44,46,48,50,52,54,56,58,
              60,62,64,66,68,70,72,74,76,78,
              80,82,84,86,88,90,92,94,96,98]
# copy_indices=[0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95]
# copy_indices=[0,10,20,30,40,50,60,70,80,90]
# copy_indices=[0,20,40,60,80]
# copy_indices=[0,25,50,75]
# copy_indices=[0,50]
d_train_data2_1 = np.copy(d_train_data[0][:,copy_indices, ])
d_train_data2_2 = np.copy(d_train_data[1][:,copy_indices, ])
d_train_data2 = (d_train_data2_1,d_train_data2_2)

d_test_data2_1 = np.copy(d_test_data[0][:,copy_indices, ])
d_test_data2_2 = np.copy(d_test_data[1][:,copy_indices, ])
d_test_data2 = (d_test_data2_1,d_test_data2_2)
# print(d_train_data2.shape)

label_processor = keras.layers.StringLookup(
    num_oov_indices=0, vocabulary=np.unique(train_df["tag"])
)
# print(label_processor.get_vocabulary())

def get_sequence_model():
    class_vocab = label_processor.get_vocabulary()

    frame_features_input = keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES))
    mask_input = keras.Input((MAX_SEQ_LENGTH,), dtype="bool")

    # Refer to the following tutorial to understand the significance of using `mask`:
    # https://keras.io/api/layers/recurrent_layers/gru/
    x = keras.layers.LSTM(2048, return_sequences=False,dropout=0.5)(#16, 512
        frame_features_input, mask=mask_input
    )    
#     x = keras.layers.Bidirectional(keras.layers.LSTM(2048, return_sequences=False,dropout=0.5),merge_mode='concat')(#16, 512
#         frame_features_input, mask=mask_input
#     )
#     x = keras.layers.LSTM(2048)(x)#8, 256
#     x = keras.layers.Dropout(0.1)(x)
    x = keras.layers.Dense(1024)(x)#8, 256
    x = keras.layers.GaussianNoise(0.4)(x)
    x = keras.layers.LeakyReLU(0.1)(x)    
    output = keras.layers.Dense(len(class_vocab), activation="softmax")(x)

    rnn_model = keras.Model([frame_features_input, mask_input], output)

#     rnn_model.compile(
#         loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
#     )
    return rnn_model

# Utility for running experiments.
def d_run_experiment():
    filepath = "ResNet152_CNNRNN2-set3"
    checkpoint = keras.callbacks.ModelCheckpoint(
        filepath, save_weights_only=True, save_best_only=True, verbose=1
    )

    seq_model = get_sequence_model()
    print(seq_model.summary())
    
    optimizer = keras.optimizers.SGD(lr=1e-2)
#     optimizer = keras.optimizers.Adadelta()
    
    seq_model.compile(
        loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"]
    )    
    
    history = seq_model.fit(
        [d_train_data2[0], d_train_data2[1]],
        d_train_labels,
#         validation_split=0.2,
        validation_data=([d_test_data2[0], d_test_data2[1]],
        d_test_labels),
        epochs=100,
#         epochs=EPOCHS,
        callbacks=[checkpoint],
    )

    seq_model.load_weights(filepath)
    _, accuracy = seq_model.evaluate([d_test_data2[0], d_test_data2[1]], d_test_labels)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%")

    return history, seq_model


d_run_experiment()

Model: "model_11"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_25 (InputLayer)          [(None, 50, 2048)]   0           []                               
                                                                                                  
 input_26 (InputLayer)          [(None, 50)]         0           []                               
                                                                                                  
 lstm_11 (LSTM)                 (None, 2048)         33562624    ['input_25[0][0]',               
                                                                  'input_26[0][0]']               
                                                                                                  
 dense_22 (Dense)               (None, 1024)         2098176     ['lstm_11[0][0]']         

2023-01-21 23:45:13.957826: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 3941990400 exceeds 10% of free system memory.
2023-01-21 23:45:54.080650: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 3941990400 exceeds 10% of free system memory.


Epoch 1/100
Epoch 1: val_loss improved from inf to 1.47586, saving model to ResNet152_CNNRNN2-set3
Epoch 2/100
Epoch 2: val_loss improved from 1.47586 to 1.02317, saving model to ResNet152_CNNRNN2-set3
Epoch 3/100
Epoch 3: val_loss improved from 1.02317 to 0.90648, saving model to ResNet152_CNNRNN2-set3
Epoch 4/100
Epoch 4: val_loss improved from 0.90648 to 0.87458, saving model to ResNet152_CNNRNN2-set3
Epoch 5/100
Epoch 5: val_loss did not improve from 0.87458
Epoch 6/100
Epoch 6: val_loss improved from 0.87458 to 0.84093, saving model to ResNet152_CNNRNN2-set3
Epoch 7/100
Epoch 7: val_loss improved from 0.84093 to 0.82932, saving model to ResNet152_CNNRNN2-set3
Epoch 8/100
Epoch 8: val_loss did not improve from 0.82932
Epoch 9/100
Epoch 9: val_loss improved from 0.82932 to 0.82305, saving model to ResNet152_CNNRNN2-set3
Epoch 10/100
Epoch 10: val_loss did not improve from 0.82305
Epoch 11/100
Epoch 11: val_loss improved from 0.82305 to 0.82089, saving model to ResNet152_CNNRNN2-set3

Epoch 58: val_loss did not improve from 0.82089
Epoch 59/100
Epoch 59: val_loss did not improve from 0.82089
Epoch 60/100
Epoch 60: val_loss did not improve from 0.82089
Epoch 61/100
Epoch 61: val_loss did not improve from 0.82089
Epoch 62/100
Epoch 62: val_loss did not improve from 0.82089
Epoch 63/100
Epoch 63: val_loss did not improve from 0.82089
Epoch 64/100
Epoch 64: val_loss did not improve from 0.82089
Epoch 65/100
Epoch 65: val_loss did not improve from 0.82089
Epoch 66/100
Epoch 66: val_loss did not improve from 0.82089
Epoch 67/100
Epoch 67: val_loss did not improve from 0.82089
Epoch 68/100
Epoch 68: val_loss did not improve from 0.82089
Epoch 69/100
Epoch 69: val_loss did not improve from 0.82089
Epoch 70/100
Epoch 70: val_loss did not improve from 0.82089
Epoch 71/100
Epoch 71: val_loss did not improve from 0.82089
Epoch 72/100
Epoch 72: val_loss did not improve from 0.82089
Epoch 73/100
Epoch 73: val_loss did not improve from 0.82089
Epoch 74/100
Epoch 74: val_loss did n

Epoch 87/100
Epoch 87: val_loss did not improve from 0.82089
Epoch 88/100
Epoch 88: val_loss did not improve from 0.82089
Epoch 89/100
Epoch 89: val_loss did not improve from 0.82089
Epoch 90/100
Epoch 90: val_loss did not improve from 0.82089
Epoch 91/100
Epoch 91: val_loss did not improve from 0.82089
Epoch 92/100
Epoch 92: val_loss did not improve from 0.82089
Epoch 93/100
Epoch 93: val_loss did not improve from 0.82089
Epoch 94/100
Epoch 94: val_loss did not improve from 0.82089
Epoch 95/100
Epoch 95: val_loss did not improve from 0.82089
Epoch 96/100
Epoch 96: val_loss did not improve from 0.82089
Epoch 97/100
Epoch 97: val_loss did not improve from 0.82089
Epoch 98/100
Epoch 98: val_loss did not improve from 0.82089
Epoch 99/100
Epoch 99: val_loss did not improve from 0.82089
Epoch 100/100
Epoch 100: val_loss did not improve from 0.82089


In [None]:
#max_seq=40
#seq=1 = 75.16
#seq=2 = 77.87
#seq=5 = 78.49
#seq=8 = 78.60
#seq=10 = 78.90
#seq=20 = 77.46

#max_seq=100
#seq=2 = 78.54
#seq=4 = 80.36
#seq=5 = 80.68
#seq=10 = 80.74
#seq=20 = 80.68
#seq=50 = 80.01

In [15]:
for i in range(0,100):
    print(str(i)+',', end='')
#     print(',')

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,