# Right Left up down 1200 spatial 30

## Setup

In [2]:
import os
from pathlib import Path
import pickle

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
import pandas as pd
from sklearn.metrics import precision_recall_fscore_support

from frame_generator import FrameGenerator
from model import create_model, visualise_model

In [3]:
DATA_PATH = "data_30_right_left_up_down_1200"
NOTEBOOK_NAME = "2_2d_plus_1_rlud_sp30_1200_n_step_2"
RESULTS_PATH = DATA_PATH + "/" + NOTEBOOK_NAME

# PARAMS
# number of frames taken from each video
n_frames = 36
# number of frames skipped from each video
frame_step = 2
batch_size = 8
# Define the dimensions of one frame in the set of frames created
HEIGHT = 224
WIDTH = 224

assert(os.path.isdir(DATA_PATH ))

if not os.path.isdir(RESULTS_PATH):
    os.mkdir(RESULTS_PATH)

index_df = pd.read_csv(f'{DATA_PATH}/indx_df.csv')

print(f"classes being compared {index_df['category'].unique()}")

classes being compared ['Pushing something from left to right'
 'Pushing something from right to left' 'Moving something up'
 'Moving something down']


In [4]:
from validate_files import get_video_lengths

lengths_array = get_video_lengths(f"./{DATA_PATH}/test/")
np.append(lengths_array, get_video_lengths(f"./{DATA_PATH}/train/"))
np.append(lengths_array, get_video_lengths(f"./{DATA_PATH}/validation/"))

# Calculate average
average = np.mean(lengths_array)

# Calculate maximum
maximum = np.max(lengths_array)

# Calculate minimum
minimum = np.min(lengths_array)

print("Average:", average)
print("Maximum:", maximum)
print("Minimum:", minimum)

Average: 46.21875
Maximum: 71
Minimum: 30


## Preprocess video data

Load something something data tf.data.Dataset

In [4]:
subset_paths = {
    "test": Path(f'{DATA_PATH}/test'),
    "train": Path(f'{DATA_PATH}/train'),
    "val": Path(f'{DATA_PATH}/validation'),
}

output_signature = (
    tf.TensorSpec(shape = (None, None, None, 3), dtype = tf.float32),
    tf.TensorSpec(shape = (), dtype = tf.int16)
)

train_ds = tf.data.Dataset.from_generator(
    FrameGenerator(subset_paths['train'],
        n_frames=n_frames,
        index_df=index_df,
        frame_step=frame_step,
        training=True
    ),
    output_signature = output_signature
)
# Batch the data
train_ds = train_ds.batch(batch_size)

val_ds = tf.data.Dataset.from_generator(
    FrameGenerator(
        subset_paths['val'],
        n_frames=n_frames,
        index_df=index_df,
        frame_step=frame_step,
        training=False
    ),
    output_signature = output_signature
)
# Batch the data
val_ds = val_ds.batch(batch_size)

test_ds = tf.data.Dataset.from_generator(
    FrameGenerator(
        subset_paths['test'],
        n_frames=n_frames,
        index_df=index_df,
        frame_step=frame_step,
        training=False
    ),
    output_signature = output_signature
)

print(type(test_ds))

# Batch the data
test_ds = test_ds.batch(batch_size)

print(type(test_ds))


<class 'tensorflow.python.data.ops.dataset_ops.FlatMapDataset'>
<class 'tensorflow.python.data.ops.dataset_ops.BatchDataset'>


## Model Creation

In [5]:
model = create_model(n_frames=n_frames, height=HEIGHT, width=WIDTH)

## Validate Model and Data

In [6]:
# testing tensor is setup correct
iter(train_ds)

<tensorflow.python.data.ops.iterator_ops.OwnedIterator at 0x7f2bc8588ee0>

## Build Model

In [7]:
frames, label = next(iter(train_ds))

In [8]:
model.build(frames)

## Visualise Model

In [9]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 36, 224, 22  0           []                               
                                4, 3)]                                                            
                                                                                                  
 conv2_plus1d (Conv2Plus1D)     (None, 36, 224, 224  3152        ['input_1[0][0]']                
                                , 16)                                                             
                                                                                                  
 batch_normalization (BatchNorm  (None, 36, 224, 224  64         ['conv2_plus1d[0][0]']           
 alization)                     , 16)                                                         

In [10]:
visualise_model(
    model=model,
    file_path=DATA_PATH + "/model_summary.dot"
)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


## Load the Model

Using BinaryCrossentropy as it is more effective for binary data

from_logits is false because final layer includes a sigmoid activation,

In [11]:
previous_runs = 5

In [12]:
previous_runs += 1

model.compile(loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              optimizer = keras.optimizers.Adam(learning_rate = 0.0001),
              metrics=[
                    'accuracy',
                    # tf.keras.metrics.Precision(),
                    # tf.keras.metrics.Recall()
                    # tf.keras.metrics.Precision(class_id=0, name='precision_neg'),
                    # tf.keras.metrics.Precision(class_id=1, name='precision_pos'),
                    # tf.keras.metrics.Recall(class_id=0, name='recall_neg'),
                    # tf.keras.metrics.Recall(class_id=1, name='recall_pos')
                ]
            )

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    RESULTS_PATH + '/model-runs-' + str(previous_runs) + '-cp-{epoch:02d}-{val_loss:.2f}.ckpt',
    save_best_only=True,  # Save only the best model based on a monitored metric (e.g., val_loss), will only replace saved value if it is better
    monitor='val_loss',
    mode='min',  # 'min' for loss, 'max' for accuracy
    save_weights_only=True
)


early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,  # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True
)

In [13]:
latest = tf.train.latest_checkpoint(RESULTS_PATH)

if latest is not None:
    print(f"loading model from weights: {latest}")
    model.load_weights(latest)

loading model from weights: data_30_right_left_up_down_1200/2_2d_plus_1_rlud_sp30_1200_n_step_2/model-runs-6-cp-03-0.27.ckpt


## Train the Model

In [None]:
previously_run_epochs = 0 + 6 + 12 + 12 + 5 + 11
history = model.fit(
        x=train_ds,
        epochs = 50 - previously_run_epochs,
        validation_data=val_ds,
        callbacks=[checkpoint_callback, early_stopping],
    )

Epoch 1/4


2023-12-27 10:20:56.744254: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 173408256 exceeds 10% of free system memory.


      1/Unknown - 17s 17s/step - loss: 0.1519 - accuracy: 0.8750

2023-12-27 10:21:06.976382: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 173408256 exceeds 10% of free system memory.


      2/Unknown - 20s 3s/step - loss: 0.1382 - accuracy: 0.9375 

2023-12-27 10:21:09.940478: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 173408256 exceeds 10% of free system memory.


      3/Unknown - 23s 3s/step - loss: 0.1165 - accuracy: 0.9583

2023-12-27 10:21:13.147074: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 173408256 exceeds 10% of free system memory.


    223/Unknown - 678s 3s/step - loss: 0.2325 - accuracy: 0.9148

## Analyse results

In [14]:
import numpy as np
from scipy.special import softmax
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

true_labels = []
predictions = []

# Iterate over the batched test dataset
for batch in test_ds:
    x, y = batch  # x is the batch of features, y is the batch of labels
    true_labels.extend(y.numpy())  # Store true labels
    preds = model.predict(x)  # Generate predictions for the batch
    preds = softmax(preds, axis=1)  # Apply softmax to convert logits to probabilities
    preds = np.argmax(preds, axis=1)  # Get the class with the highest probability
    predictions.extend(preds)

# Convert lists to numpy arrays
true_labels = np.array(true_labels)
predictions = np.array(predictions)



In [15]:
fg = FrameGenerator(
    subset_paths['test'],
    n_frames=n_frames,
    index_df=index_df,
    frame_step=frame_step,
    training=False
)
class_id_value = {
    fg.class_ids_for_name[x]: x for x in fg.class_ids_for_name.keys()
 }

# Convert lists to numpy arrays if they aren't already
true_labels = np.array(true_labels)
predictions = np.array(predictions)

# Calculate accuracy
accuracy = accuracy_score(true_labels, predictions)

# Calculate precision, recall, and F1-score for each class
precision, recall, f1_score, _ = precision_recall_fscore_support(true_labels, predictions, average=None)

# Print accuracy and F1-scores for each class
print(f"Overall Accuracy: {accuracy}")
for i, (prec, rec, f1) in enumerate(zip(precision, recall, f1_score)):
    print(f"Class {class_id_value[i]}: Precision: {prec}, Recall: {rec}, F1 Score: {f1}")

Overall Accuracy: 0.8729166666666667
Class Pushing something from left to right: Precision: 0.7905405405405406, Recall: 0.975, F1 Score: 0.8731343283582089
Class Pushing something from right to left: Precision: 0.9401709401709402, Recall: 0.9166666666666666, F1 Score: 0.9282700421940928
Class Moving something up: Precision: 0.8818181818181818, Recall: 0.8083333333333333, F1 Score: 0.8434782608695653
Class Moving something down: Precision: 0.9047619047619048, Recall: 0.7916666666666666, F1 Score: 0.8444444444444444
