<a href="https://colab.research.google.com/github/net39/ML-anomaly-detection/blob/main/P2_06_CNN2_vid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

CNN-LSTM Human Action Recognition Pipeline

# Install Libraries and Google Access

In [2]:
from google.colab import auth
auth.authenticate_user()

from google.colab import drive
drive.mount('/content/drive')

from google.colab import files
files.upload()

from google.cloud import storage
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/content/p2-anomaly-c4545180e308.json'

storage_client = storage.Client()
bucket_name = 'p2-anomaly'
bucket = storage_client.bucket(bucket_name)

Mounted at /content/drive


Saving p2-anomaly-c4545180e308.json to p2-anomaly-c4545180e308.json


In [3]:
# Install GCS and other dependencies
!pip install google-cloud-storage --quiet
!pip install opencv-python-headless --quiet
!pip install joblib --quiet
!pip install tensorflow --quiet
!pip install scikit-learn
!pip install tqdm
!pip install pandas

# Imports
import os
import cv2
import numpy as np
import pandas as pd
import json
import time
import shutil
from datetime import datetime
from tqdm import tqdm
import joblib

from collections import defaultdict
from tensorflow.keras.applications import VGG19
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, TimeDistributed, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import classification_report, confusion_matrix





In [4]:
# Hyperparameters
IMG_SIZE = (224, 224)
IMG_CHANNELS = 3
SEQUENCE_LENGTH = 5
BATCH_SIZE = 16
EPOCHS = 20

In [5]:
if not os.path.exists('/content/frame_labels.csv'):
    blob = bucket.blob('processed/frame_labels.csv')
    blob.download_to_filename('/content/frame_labels.csv')
    print("Downloaded frame_labels.csv from GCS.")
else:
    print("frame_labels.csv already exists locally.")

# Load Labels CSV
label_df = pd.read_csv('/content/frame_labels.csv')
unique_actions = label_df['action'].unique()

# Encode Labels
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(label_df['action'])
onehot_encoder = OneHotEncoder(sparse_output=False)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded.reshape(-1, 1))


Downloaded frame_labels.csv from GCS.


In [6]:
image_dir = '/content/preprocessed-mcfd/'

# Check and download images if folder doesn't exist
if not os.path.exists(image_dir):
    os.makedirs(image_dir, exist_ok=True)
    blobs = bucket.list_blobs(prefix='processed/preprocessed-mcfd/')
    for blob in blobs:
        if blob.name.endswith(('.jpg', '.png')):
            local_path = os.path.join('/content', blob.name.split('processed/')[1])
            os.makedirs(os.path.dirname(local_path), exist_ok=True)
            blob.download_to_filename(local_path)
    print("Downloaded all preprocessed frames from GCS.")
else:
    print("Preprocessed-mcfd folder already exists locally.")


Downloaded all preprocessed frames from GCS.


In [7]:
# Load Images and Labels
image_dir = '/content/preprocessed-mcfd/'  # local path after downloading from GCS
X = []
Y = []

missing_count = 0
for idx, row in tqdm(label_df.iterrows(), total=label_df.shape[0]):
    img_path = os.path.join('/content', row['filename'].replace('processed/', ''))
    if os.path.exists(img_path):
        img = cv2.imread(img_path)
        img = cv2.resize(img, IMG_SIZE)
        X.append(img)
        Y.append(onehot_encoded[idx])
    else:
        missing_count += 1

print(f"Total missing images: {missing_count}")

X = np.array(X)
Y = np.array(Y)


100%|██████████| 1000/1000 [00:00<00:00, 1221.39it/s]

Total missing images: 0





In [8]:
print(f"Total images loaded: {len(X)}")
print(f"Total labels loaded: {len(Y)}")
print(f"Unique actions: {unique_actions}")

Total images loaded: 1000
Total labels loaded: 1000
Unique actions: ['walking' 'empty_room' 'bending' 'lying_down' 'fallen' 'sitting'
 'standing' 'crouch_down' 'kneeling' 'unidentified' 'carry_item'
 'housekeeping' 'gathering' 'occlusion' 'sleeping' 'using_laptop'
 'crawling']


In [9]:
import glob

image_files = glob.glob('/content/preprocessed-mcfd/**/*.jpg', recursive=True)
print(f"Total images in local preprocessed-mcfd/: {len(image_files)}")

Total images in local preprocessed-mcfd/: 8795


In [10]:
label_counts = np.sum(Y, axis=0)
for label, count in zip(label_encoder.classes_, label_counts):
    print(f"{label}: {count}")

bending: 30.0
carry_item: 15.0
crawling: 1.0
crouch_down: 38.0
empty_room: 245.0
fallen: 202.0
gathering: 15.0
housekeeping: 36.0
kneeling: 13.0
lying_down: 17.0
occlusion: 10.0
sitting: 70.0
sleeping: 4.0
standing: 132.0
unidentified: 14.0
using_laptop: 1.0
walking: 157.0


In [15]:
min_count = 2
valid_class_indices = [i for i, c in enumerate(label_counts) if c >= min_count]
print("Valid classes:", [label_encoder.classes_[i] for i in valid_class_indices])


Valid classes: ['bending', 'carry_item', 'crouch_down', 'empty_room', 'fallen', 'gathering', 'housekeeping', 'kneeling', 'lying_down', 'occlusion', 'sitting', 'sleeping', 'standing', 'unidentified', 'walking']


In [16]:
X_filtered = []
Y_filtered = []

for i in range(len(Y)):
    if any(Y[i][j] == 1 and j in valid_class_indices for j in range(len(Y[i]))):
        filtered_label = Y[i][valid_class_indices]
        X_filtered.append(X[i])
        Y_filtered.append(filtered_label)

X = np.array(X_filtered)
Y = np.array(Y_filtered)

In [18]:
min_count = 2
mask = np.sum(Y, axis=1).astype(bool)
counts = np.sum(Y, axis=0)
valid_classes = [i for i, c in enumerate(counts) if c >= min_count]

label_counts_filtered = np.sum(Y, axis=0)
for label, count in zip(valid_classes, label_counts_filtered):
    print(f"{label}: {count:.1f}")

print(f"\n New total images: {len(X)}")
print(f" New total labels: {len(Y)}")

0: 30.0
1: 15.0
2: 38.0
3: 245.0
4: 202.0
5: 15.0
6: 36.0
7: 13.0
8: 17.0
9: 10.0
10: 70.0
11: 4.0
12: 132.0
13: 14.0
14: 157.0

 New total images: 998
 New total labels: 998


In GCS CLI - gsutil ls gs://p2-anomaly/processed/preprocessed-mcfd/** | wc -l
there are **8795 images in total across all chutes and cams**.

Only 1000 of them are annotated in frame_labels.csv and the rest are unlabelled



In [41]:
# Train-Validation-Test Split

# First split
X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.3, random_state=42, stratify=Y)

# Check class counts in Y_temp
temp_label_counts = np.sum(Y_temp, axis=0)
print("Class counts in Y_temp before second split:")
for i, count in enumerate(temp_label_counts):
    print(f"Class {i}: {count}")


# Filter out classes with only one sample in Y_temp for the second split
valid_temp_classes_indices = [i for i, count in enumerate(temp_label_counts) if count >= 2]

X_temp_filtered = []
Y_temp_filtered = []

# This filtering assumes Y_temp is already one-hot encoded
for i in range(len(Y_temp)):
    # Check if the sample belongs to any of the valid temp classes
    if any(Y_temp[i][j] == 1 for j in valid_temp_classes_indices):
        # Create a new one-hot encoded vector with only the valid classes
        # We need to map the original class indices to the new filtered indices
        original_class_index = np.argmax(Y_temp[i])
        if original_class_index in valid_temp_classes_indices:
            X_temp_filtered.append(X_temp[i])
            # Create a new one-hot vector for the filtered classes
            filtered_onehot = np.zeros(len(valid_temp_classes_indices))
            new_index = valid_temp_classes_indices.index(original_class_index)
            filtered_onehot[new_index] = 1
            Y_temp_filtered.append(filtered_onehot)

X_temp_filtered = np.array(X_temp_filtered)
Y_temp_filtered = np.array(Y_temp_filtered)


# Second split with filtered data
X_val, X_test, Y_val, Y_test = train_test_split(X_temp_filtered, Y_temp_filtered, test_size=0.5, random_state=42, stratify=Y_temp_filtered)


print(f"\n Train set: {len(X_train)}, Val set: {len(X_val)}, Test set: {len(X_test)}")

Class counts in Y_temp before second split:
Class 0: 9.0
Class 1: 4.0
Class 2: 11.0
Class 3: 74.0
Class 4: 61.0
Class 5: 5.0
Class 6: 11.0
Class 7: 4.0
Class 8: 5.0
Class 9: 3.0
Class 10: 21.0
Class 11: 1.0
Class 12: 40.0
Class 13: 4.0
Class 14: 47.0

 Train set: 698, Val set: 149, Test set: 150


In [42]:
print("Train classes:", Y_train.shape[1])
print("Val classes:", Y_val.shape[1])

Train classes: 15
Val classes: 14


In [43]:
def pad_labels(Y, target_size):
    padded_Y = np.zeros((Y.shape[0], target_size))
    padded_Y[:, :Y.shape[1]] = Y
    return padded_Y

Y_val = pad_labels(Y_val, Y_train.shape[1])
Y_test = pad_labels(Y_test, Y_train.shape[1])

In [44]:
# Build CNN-LSTM Model
base_cnn = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
for layer in base_cnn.layers:
    layer.trainable = False

cnn_model = Sequential([
    base_cnn,
    GlobalAveragePooling2D()
])

# Get the number of valid classes after filtering
num_valid_classes = Y_train_seq.shape[-1]

model = Sequential([
    TimeDistributed(cnn_model, input_shape=(SEQUENCE_LENGTH, 224, 224, 3)),
    LSTM(128, return_sequences=False),
    Dropout(0.5),
    Dense(len(valid_class_indices), activation='softmax')
    #Dense(Y_train_seq.shape[-1], activation='softmax'
])

model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


  super().__init__(**kwargs)


In [45]:

# Prepare Sequences for LSTM - group consecutive frames into sequences
def create_sequences(X, Y, seq_length):
    X_seq, Y_seq = [], []
    for i in range(len(X) - seq_length):
        X_seq.append(X[i:i+seq_length])
        Y_seq.append(Y[i+seq_length-1])
    return np.array(X_seq), np.array(Y_seq)

X_train_seq, Y_train_seq = create_sequences(X_train, Y_train, SEQUENCE_LENGTH)
X_val_seq, Y_val_seq = create_sequences(X_val, Y_val, SEQUENCE_LENGTH)



In [60]:
train_counts = np.sum(Y_train_seq, axis=0)
for i, c in enumerate(train_counts):
    print(f"{new_classes[i]}: {c}")

bending: 21.0
carry_item: 11.0
crouch_down: 27.0
empty_room: 169.0
fallen: 140.0
gathering: 10.0
housekeeping: 25.0
kneeling: 9.0
lying_down: 12.0
occlusion: 7.0
sitting: 49.0
sleeping: 3.0
standing: 90.0
unidentified: 10.0
walking: 110.0


In [69]:
def evaluate_model(model, X_test_seq, Y_test_seq, new_classes):
    Y_pred = model.predict(X_test_seq)
    Y_pred_labels = np.argmax(Y_pred, axis=1)
    Y_true_labels = np.argmax(Y_test_seq, axis=1)

    print("\n Classification Report:")
    print(classification_report(Y_true_labels, Y_pred_labels, target_names=new_classes, zero_division=0))

    cm = confusion_matrix(Y_true_labels, Y_pred_labels)
    cm_df = pd.DataFrame(cm, index=new_classes, columns=new_classes)
    print("\n Confusion Matrix:")
    print(cm_df)

In [72]:
Y_train_labels = np.argmax(Y_train_seq, axis=1)

from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(Y_train_labels),
    y=Y_train_labels
)

print(dict(enumerate(new_classes)))
print(class_weights)

{0: 'bending', 1: 'carry_item', 2: 'crouch_down', 3: 'empty_room', 4: 'fallen', 5: 'gathering', 6: 'housekeeping', 7: 'kneeling', 8: 'lying_down', 9: 'occlusion', 10: 'sitting', 11: 'sleeping', 12: 'standing', 13: 'unidentified', 14: 'walking'}
[ 2.2         4.2         1.71111111  0.27337278  0.33        4.62
  1.848       5.13333333  3.85        6.6         0.94285714 15.4
  0.51333333  4.62        0.42      ]


In [73]:
Y_train_labels = np.argmax(Y_train_seq, axis=1)
Y_val_labels = np.argmax(Y_val_seq, axis=1)

In [74]:
model.compile(optimizer=Adam(1e-4), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [78]:
# Train Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True),
    ModelCheckpoint('/content/best_model.h5', save_best_only=True)
]

class_weights_dict = dict(enumerate(class_weights))

model.fit(
    X_train_seq, Y_train_labels,
    validation_data=(X_val_seq, Y_val_labels),
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    callbacks=callbacks,
    class_weight=class_weights_dict
)

Epoch 1/20
[1m43/44[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 63ms/step - accuracy: 0.2382 - loss: 3.4190



[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 115ms/step - accuracy: 0.2383 - loss: 3.3993 - val_accuracy: 0.2292 - val_loss: 2.6087
Epoch 2/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 76ms/step - accuracy: 0.2104 - loss: 2.6967 - val_accuracy: 0.2569 - val_loss: 2.6225
Epoch 3/20
[1m43/44[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 66ms/step - accuracy: 0.2120 - loss: 2.5578



[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 87ms/step - accuracy: 0.2116 - loss: 2.5601 - val_accuracy: 0.2222 - val_loss: 2.6081
Epoch 4/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.1955 - loss: 2.4315



[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 84ms/step - accuracy: 0.1955 - loss: 2.4334 - val_accuracy: 0.2292 - val_loss: 2.5757
Epoch 5/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 78ms/step - accuracy: 0.2011 - loss: 2.7232 - val_accuracy: 0.2083 - val_loss: 2.5861
Epoch 6/20
[1m43/44[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 63ms/step - accuracy: 0.2125 - loss: 2.1757



[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 82ms/step - accuracy: 0.2134 - loss: 2.1818 - val_accuracy: 0.2222 - val_loss: 2.5631
Epoch 7/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 76ms/step - accuracy: 0.2340 - loss: 2.2053 - val_accuracy: 0.2083 - val_loss: 2.6064
Epoch 8/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 77ms/step - accuracy: 0.2902 - loss: 2.1325 - val_accuracy: 0.1528 - val_loss: 2.6229
Epoch 9/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 77ms/step - accuracy: 0.2671 - loss: 1.9813 - val_accuracy: 0.1806 - val_loss: 2.5669
Epoch 10/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 78ms/step - accuracy: 0.3067 - loss: 2.1021 - val_accuracy: 0.2361 - val_loss: 2.6003
Epoch 11/20
[1m43/44[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 63ms/step - accuracy: 0.3022 - loss: 1.9746



[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 83ms/step - accuracy: 0.3019 - loss: 1.9742 - val_accuracy: 0.2361 - val_loss: 2.5232
Epoch 12/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 78ms/step - accuracy: 0.3203 - loss: 1.8500 - val_accuracy: 0.1875 - val_loss: 2.5849
Epoch 13/20
[1m43/44[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 64ms/step - accuracy: 0.3094 - loss: 1.8400



[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 83ms/step - accuracy: 0.3095 - loss: 1.8416 - val_accuracy: 0.2222 - val_loss: 2.5159
Epoch 14/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 76ms/step - accuracy: 0.3439 - loss: 1.6604 - val_accuracy: 0.1875 - val_loss: 2.6054
Epoch 15/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 77ms/step - accuracy: 0.3464 - loss: 1.7359 - val_accuracy: 0.2500 - val_loss: 2.5428
Epoch 16/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 76ms/step - accuracy: 0.3637 - loss: 1.6265 - val_accuracy: 0.2431 - val_loss: 2.5737
Epoch 17/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 78ms/step - accuracy: 0.3727 - loss: 1.6710 - val_accuracy: 0.2639 - val_loss: 2.5286
Epoch 18/20
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 75ms/step - accuracy: 0.3978 - loss: 1.5109 - val_ac

<keras.src.callbacks.history.History at 0x7ec4ac5a4a90>

In [79]:
# Evaluate on Test Set
X_test_seq, Y_test_seq = create_sequences(X_test, Y_test, SEQUENCE_LENGTH)
Y_pred = model.predict(X_test_seq)
Y_pred_labels = np.argmax(Y_pred, axis=1)
Y_true_labels = np.argmax(Y_test_seq, axis=1)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 329ms/step


In [80]:
new_classes = [label_encoder.classes_[i] for i in valid_class_indices]
print(classification_report(Y_true_labels, Y_pred_labels, target_names=new_classes))

              precision    recall  f1-score   support

     bending       0.12      0.25      0.17         4
  carry_item       0.00      0.00      0.00         1
 crouch_down       0.00      0.00      0.00         6
  empty_room       0.47      0.58      0.52        36
      fallen       0.47      0.24      0.32        29
   gathering       0.00      0.00      0.00         2
housekeeping       0.33      0.17      0.22         6
    kneeling       0.00      0.00      0.00         2
  lying_down       0.00      0.00      0.00         2
   occlusion       0.00      0.00      0.00         1
     sitting       0.15      0.18      0.17        11
    sleeping       0.00      0.00      0.00        19
    standing       0.00      0.00      0.00         2
unidentified       0.00      0.00      0.00        24
     walking       0.00      0.00      0.00         0

    accuracy                           0.22       145
   macro avg       0.10      0.09      0.09       145
weighted avg       0.24   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [81]:
cm = confusion_matrix(Y_true_labels, Y_pred_labels)
cm_df = pd.DataFrame(cm, index=new_classes, columns=new_classes)
print(cm_df)

              bending  carry_item  crouch_down  empty_room  fallen  gathering  \
bending             1           0            0           0       1          0   
carry_item          0           0            0           1       0          0   
crouch_down         0           0            0           0       0          1   
empty_room          1           1            0          21       1          1   
fallen              2           0            0           6       7          0   
gathering           0           0            0           0       0          0   
housekeeping        1           1            0           0       0          1   
kneeling            0           0            0           0       0          1   
lying_down          0           0            1           0       0          0   
occlusion           0           0            0           0       0          0   
sitting             0           1            1           3       3          0   
sleeping            1       

In [82]:
# Save Model & Label Encoder
model.save('/content/cnn_lstm_action_classifier.h5')
joblib.dump(label_encoder, '/content/label_encoder.joblib')




['/content/label_encoder.joblib']

In [83]:
# Upload back to GCS
bucket.blob('models/cnn_lstm_action_classifier.h5').upload_from_filename('/content/cnn_lstm_action_classifier.h5')
bucket.blob('models/label_encoder.joblib').upload_from_filename('/content/label_encoder.joblib')

# Apply detection logic: extract frames → preprocess → form sequences → predict → log to CSV / stream to Power BI/Tableau


# Attempt 2

In [86]:
# Labels to remove
remove_labels = ['empty_room', 'occlusion', 'unidentified']

remove_indices = [i for i, label in enumerate(label_encoder.classes_) if label in remove_labels]
print("Removing class indices:", remove_indices)

keep_indices = [i for i in range(len(label_encoder.classes_)) if i not in remove_indices]


X_clean = []
Y_clean = []

for i in range(len(Y)):
    label_index = np.argmax(Y[i])
    if label_index in keep_indices:
        X_clean.append(X[i])
        # One-hot encode to new index range
        new_label_index = keep_indices.index(label_index)
        new_Y = np.zeros(len(keep_indices))
        new_Y[new_label_index] = 1
        Y_clean.append(new_Y)

X_clean = np.array(X_clean)
Y_clean = np.array(Y_clean)

print("Cleaned dataset shape:", X_clean.shape, Y_clean.shape)

Removing class indices: [4, 10, 14]
Cleaned dataset shape: (569, 224, 224, 3) (569, 14)


In [87]:
# New class names
new_class_names = [label_encoder.classes_[i] for i in keep_indices]
print("Remaining classes:", new_class_names)

Remaining classes: ['bending', 'carry_item', 'crawling', 'crouch_down', 'fallen', 'gathering', 'housekeeping', 'kneeling', 'lying_down', 'sitting', 'sleeping', 'standing', 'using_laptop', 'walking']


In [89]:
# Get label indices from one-hot
Y_clean_labels = np.argmax(Y_clean, axis=1)

# Count per class
unique, counts = np.unique(Y_clean_labels, return_counts=True)

for i, c in zip(unique, counts):
    print(f"{new_class_names[i]}: {c}")

bending: 30
carry_item: 15
crawling: 38
crouch_down: 245
fallen: 15
gathering: 36
housekeeping: 13
kneeling: 17
lying_down: 10
sitting: 4
sleeping: 132
standing: 14


In [98]:
from collections import Counter
min_samples_per_class = 100

# Count current samples
counts = Counter(Y_clean_labels)

X_aug, Y_aug = [], []

for i in range(len(X_clean)):
    label = Y_clean_labels[i]
    X_aug.append(X_clean[i])
    Y_aug.append(Y_clean[i])

    # If this class is below threshold, add augmented copies
    while counts[label] < min_samples_per_class:
        aug_img = augment_image(X_clean[i])
        X_aug.append(aug_img)
        Y_aug.append(Y_clean[i])
        counts[label] += 1

# Convert to arrays
X_aug = np.array(X_aug)
Y_aug = np.array(Y_aug)

print("After augmentation:", X_aug.shape, Y_aug.shape)

After augmentation: (1377, 224, 224, 3) (1377, 14)


In [104]:
print("Unique labels in Y_true_labels:", np.unique(Y_true_labels))
print("Unique labels in Y_pred_labels:", np.unique(Y_pred_labels))
print("Length of new_class_names:", len(new_class_names))

Unique labels in Y_true_labels: [ 0  1  2  3  4  5  6  7  8  9 10 11]
Unique labels in Y_pred_labels: [ 0  1  2  3  4  5  6  7  8  9 10 11]
Length of new_class_names: 14


Value	Meaning:
1. 1377 - Number of images after augmentation (total images including both original + augmented)
2. (224, 224, 3) - 	Shape of each image (224x224 pixels, with 3 color channels — RGB)
3. (1377, 14)	- Number of labels after augmentation, one-hot encoded vectors of size 14 per image

In summary, 1377 images and their corresponding one-hot encoded labels of 14 action classes (after removing empty_room, occlusion, unidentified).

In [99]:
# Train-validation-test split
X_train, X_temp, Y_train, Y_temp = train_test_split(X_aug, Y_aug, test_size=0.3, random_state=42, stratify=Y_aug)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=42, stratify=Y_temp)

# Create sequences for LSTM
def create_sequences(X, Y, seq_length):
    X_seq, Y_seq = [], []
    for i in range(len(X) - seq_length):
        X_seq.append(X[i:i+seq_length])
        Y_seq.append(Y[i+seq_length-1])
    return np.array(X_seq), np.array(Y_seq)

X_train_seq, Y_train_seq = create_sequences(X_train, Y_train, SEQUENCE_LENGTH)
X_val_seq, Y_val_seq = create_sequences(X_val, Y_val, SEQUENCE_LENGTH)
X_test_seq, Y_test_seq = create_sequences(X_test, Y_test, SEQUENCE_LENGTH)

In [92]:
!pip install albumentations




In [96]:
import albumentations as A
import cv2

# Define augmentation pipeline
augment = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.Rotate(limit=20, p=0.3),
    A.GaussNoise(p=0.2),
    A.MotionBlur(p=0.2)
])

def augment_image(image):
    # Albumentations expects images in uint8, HWC format
    image = image.astype('uint8')
    augmented = augment(image=image)
    return augmented['image']

In [100]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, GlobalAveragePooling2D, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam

In [101]:
# Load pretrained ResNet50 model (exclude top classification layers)
base_cnn = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
for layer in base_cnn.layers:
    layer.trainable = False  # freeze CNN layers for transfer learning

# Wrap CNN in a Sequential model
cnn_model = Sequential([
    base_cnn,
    GlobalAveragePooling2D()
])

# Build the CNN-LSTM model
model = Sequential([
    TimeDistributed(cnn_model, input_shape=(SEQUENCE_LENGTH, 224, 224, 3)),
    LSTM(128, return_sequences=False),
    Dropout(0.5),
    Dense(Y_train_seq.shape[-1], activation='softmax')  # Number of classes = 14
])

# Compile the model
model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(**kwargs)


In [102]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True),
    ModelCheckpoint('/content/best_resnet_model.h5', save_best_only=True)
]

# Train the model
model.fit(
    X_train_seq, Y_train_seq,
    validation_data=(X_val_seq, Y_val_seq),
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    callbacks=callbacks
)

Epoch 1/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - accuracy: 0.1907 - loss: 2.4324



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 304ms/step - accuracy: 0.1914 - loss: 2.4306 - val_accuracy: 0.3713 - val_loss: 2.0820
Epoch 2/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - accuracy: 0.3857 - loss: 1.9620



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 105ms/step - accuracy: 0.3863 - loss: 1.9608 - val_accuracy: 0.5347 - val_loss: 1.7333
Epoch 3/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.5612 - loss: 1.6093



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 104ms/step - accuracy: 0.5617 - loss: 1.6080 - val_accuracy: 0.6139 - val_loss: 1.4937
Epoch 4/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.6407 - loss: 1.3504



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 101ms/step - accuracy: 0.6411 - loss: 1.3496 - val_accuracy: 0.6287 - val_loss: 1.3504
Epoch 5/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.6729 - loss: 1.2446



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 101ms/step - accuracy: 0.6734 - loss: 1.2432 - val_accuracy: 0.6584 - val_loss: 1.2414
Epoch 6/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.7233 - loss: 1.0737



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 100ms/step - accuracy: 0.7235 - loss: 1.0733 - val_accuracy: 0.6832 - val_loss: 1.1735
Epoch 7/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.7325 - loss: 0.9983



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 101ms/step - accuracy: 0.7327 - loss: 0.9980 - val_accuracy: 0.7178 - val_loss: 1.1174
Epoch 8/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.7994 - loss: 0.8677



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 102ms/step - accuracy: 0.7991 - loss: 0.8682 - val_accuracy: 0.6931 - val_loss: 1.0941
Epoch 9/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.8000 - loss: 0.8897



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 101ms/step - accuracy: 0.7999 - loss: 0.8892 - val_accuracy: 0.6980 - val_loss: 1.0512
Epoch 10/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - accuracy: 0.8258 - loss: 0.7742



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 99ms/step - accuracy: 0.8256 - loss: 0.7745 - val_accuracy: 0.7327 - val_loss: 1.0260
Epoch 11/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - accuracy: 0.7856 - loss: 0.8062



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 101ms/step - accuracy: 0.7860 - loss: 0.8055 - val_accuracy: 0.7178 - val_loss: 0.9946
Epoch 12/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - accuracy: 0.8604 - loss: 0.6735



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 104ms/step - accuracy: 0.8600 - loss: 0.6740 - val_accuracy: 0.7178 - val_loss: 0.9853
Epoch 13/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.8416 - loss: 0.6420



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 100ms/step - accuracy: 0.8414 - loss: 0.6424 - val_accuracy: 0.7327 - val_loss: 0.9634
Epoch 14/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.8431 - loss: 0.6509



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 100ms/step - accuracy: 0.8430 - loss: 0.6507 - val_accuracy: 0.7376 - val_loss: 0.9579
Epoch 15/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.8454 - loss: 0.6461



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 100ms/step - accuracy: 0.8456 - loss: 0.6457 - val_accuracy: 0.7376 - val_loss: 0.9315
Epoch 16/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - accuracy: 0.8722 - loss: 0.5751



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 99ms/step - accuracy: 0.8720 - loss: 0.5754 - val_accuracy: 0.7178 - val_loss: 0.9276
Epoch 17/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.8824 - loss: 0.5269



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 101ms/step - accuracy: 0.8823 - loss: 0.5272 - val_accuracy: 0.7376 - val_loss: 0.9058
Epoch 18/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - accuracy: 0.8671 - loss: 0.5508



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 101ms/step - accuracy: 0.8671 - loss: 0.5504 - val_accuracy: 0.7475 - val_loss: 0.8976
Epoch 19/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.8946 - loss: 0.4753



[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 97ms/step - accuracy: 0.8944 - loss: 0.4756 - val_accuracy: 0.7525 - val_loss: 0.8826
Epoch 20/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 89ms/step - accuracy: 0.8870 - loss: 0.4725 - val_accuracy: 0.7525 - val_loss: 0.8955


<keras.src.callbacks.history.History at 0x7ec3d43ee690>

In [106]:
Y_pred = model.predict(X_test_seq)
Y_pred_labels = np.argmax(Y_pred, axis=1)
Y_true_labels = np.argmax(Y_test_seq, axis=1)

from sklearn.metrics import classification_report, confusion_matrix

existing_classes = np.unique(Y_true_labels)
existing_class_names = [new_class_names[i] for i in existing_classes]

print(classification_report(Y_true_labels, Y_pred_labels, target_names=existing_class_names))

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 100ms/step
              precision    recall  f1-score   support

     bending       0.89      0.57      0.70        14
  carry_item       0.93      1.00      0.97        14
    crawling       0.93      0.87      0.90        15
 crouch_down       0.68      0.92      0.78        37
      fallen       0.85      0.79      0.81        14
   gathering       0.81      0.87      0.84        15
housekeeping       1.00      0.80      0.89        15
    kneeling       0.92      0.73      0.81        15
  lying_down       1.00      0.87      0.93        15
     sitting       1.00      1.00      1.00        14
    sleeping       0.45      0.50      0.48        20
    standing       0.83      0.71      0.77        14

    accuracy                           0.81       202
   macro avg       0.86      0.80      0.82       202
weighted avg       0.83      0.81      0.81       202



*   No noisy labels (empty_room, occlusion, unidentified)
*   Strong ResNet50 transfer learning backbone.
*   Balanced minority classes through augmentation





Learning Points in Data Preparation and Model Training (including using VGG19 vs Transfer Learning Resnet50, model performance before and after applying data augmentation, observation e.g. imbalance data, skewed data, between hot encoded and numerical data)

# Upload to GCS

In [107]:
!cp "/content/drive/MyDrive/Colab Notebooks/P2-06_CNN2-vid.ipynb" "/content/P2-06_CNN2-vid.ipynb"

client = storage.Client(project='p2-anomaly')
bucket = client.bucket('p2-anomaly')
notebook_blob = bucket.blob('notebooks/P2-06_CNN2-vid.ipynb')
notebook_blob.upload_from_filename('/content/P2-06_CNN2-vid.ipynb')
print("Notebook pushed to GCS.")

Notebook pushed to GCS.


In [108]:
!jupyter nbconvert --to script "/content/P2-06_CNN2-vid.ipynb" --output "/content/P2-06_CNN2-vid"
!mv /content/P2-06_CNN2-vid.txt /content/P2-06_CNN2-vid.py
local_script_path = '/content/P2-06_CNN2-vid.py'
gcs_script_path = 'scripts/P2-06_CNN2-vid.py'

# Upload to GCS
blob = bucket.blob(gcs_script_path)
blob.upload_from_filename(local_script_path)

print("Script uploaded to GCS.")

[NbConvertApp] Converting notebook /content/P2-06_CNN2-vid.ipynb to script
[NbConvertApp] Writing 16562 bytes to /content/P2-06_CNN2-vid.txt
Script uploaded to GCS.
