# Dataset preprocessing

In [3]:
import cv2
import h5py
import json
import random
import urllib
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm.auto import tqdm, trange 

## Useful functions

In [4]:
def split_videos_list_train_val_test(videos_list, train_ratio=0.7, val_ratio=0.2):
    random.shuffle(videos_list)
    train_index = int(np.floor(train_ratio * len(videos_list)))
    val_index = train_index + int(np.floor(val_ratio * len(videos_list)))
    return videos_list[:train_index], videos_list[train_index:val_index], videos_list[val_index:]

## Parameters

In [5]:
SEQ_LENGTH = 5
SPATIAL_FEATURES = ['CTR_LOC_X', 'CTR_LOC_Y', 'CTR_SPD_X', 'CTR_SPD_Y', 'ANGLE', 'ANGLE_SPEED', 'WIDTH', 'HEIGHT', 
                    'ASPECT_RATIO', 'C_MOTION', 'PROJ_RATIO']

# Constants
CLOSING_KERNEL = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (4, 4))
EROTION_KERNEL = np.ones((2, 2), np.uint8)
MIN_HUMAN_CONTOUR_AREA = 1000
MHI_DURATION = 500 # milliseconds
MHI_THRESHOLD = 32
GAUSSIAN_KERNEL = (3, 3)

# Kalman filter
NO_CONTOUR_TOLERANCE = 10

## URFD

In [6]:
dataset_folder = Path('data/URFD/')
videos_folder = dataset_folder / 'videos'

In [None]:
# download the videos
for fall in range(1, 31):
    file_name = f"fall-{fall:02}-cam0.mp4"
    link = f"http://fenix.univ.rzeszow.pl/~mkepski/ds/data/" + file_name
    urllib.request.urlretrieve(link, str(videos_folder / file_name))
    
for adl in range(1, 41):
    file_name = f"adl-{adl:02}-cam0.mp4"
    link = f"http://fenix.univ.rzeszow.pl/~mkepski/ds/data/" + file_name
    urllib.request.urlretrieve(link, str(videos_folder / file_name))

urllib.request.urlretrieve('http://fenix.univ.rzeszow.pl/~mkepski/ds/data/urfall-cam0-falls.csv', 
                           str(dataset_folder / 'urfall-cam0-falls.csv'))

# Create annotations dictionary
df = pd.read_csv(dataset_folder / 'urfall-cam0-falls.csv', header=None, usecols=range(3))
df.columns = ['file_name', 'frame', 'label']
falls = df[df.label == 0].groupby('file_name', as_index=False).agg({'frame': ['min', 'max']})
falls.columns = ['_'.join(col) if col[1] else col[0] for col in falls.columns.values]

annotations = {}
for row in falls.itertuples():
    annotations[f'{row.file_name}-cam0.mp4'] = [row.frame_min, row.frame_max]

# Save to file
with open(dataset_folder / 'annotations.json', 'w') as f:
    f.write(json.dumps(annotations, indent=4))

In [7]:
with open(dataset_folder / 'annotations.json', 'r') as ano_file:
    annotations = json.load(ano_file)

In [8]:
# Create train / val / test split
fall_videos = list(videos_folder.glob('fall*'))
fall_train, fall_val, fall_test = split_videos_list_train_val_test(fall_videos)
adl_videos = list(videos_folder.glob('adl*'))
adl_train, adl_val, adl_test = split_videos_list_train_val_test(adl_videos)

all_videos = {
    "train": fall_train + adl_train,
    "val": fall_val + adl_val,
    "test": fall_test + adl_test
}

In [63]:
data = {
    "train": {"spatial": [], "rgb": [], "flow": []},
    "val": {"spatial": [], "rgb": [], "flow": []},
    "test": {"spatial": [], "rgb": [], "flow": []}
}
labels = {"train": [], "val": [], "test": []}

In [64]:
cap.release()
for group in all_videos:
    for video in tqdm(all_videos[group], desc=f'{group} videos', total=len(all_videos[group])):
    
        cap = cv2.VideoCapture(str(video))
        length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        # > Labels
        fall_start, fall_end = annotations[video.name] if video.name.startswith('fall') else (-1, -1)  # Negative values for adl videos

        # > Spatial features
        spatial_features_vectors_sequence = []
        background_subtractor = cv2.createBackgroundSubtractorMOG2()

        # ---> tMHI prep
        fps = cap.get(cv2.CAP_PROP_FPS)
        ms_per_frame = 1000 / fps   # milliseconds
        _, frame = cap.read()
        frame = frame[:, 320:]
        tmhi_last_frame = cv2.GaussianBlur(frame, GAUSSIAN_KERNEL, 0)
        h, w = tmhi_last_frame.shape[:2]
        mhi = np.zeros((h, w), np.float32)

        # ---> Kalman Filter Prep
        state_size = 8          # [x, y, v_x, v_y, alpha, v_alpha, a, b]
        measurement_size = 5    # [x, y, alpha, a, b]
        kalman = cv2.KalmanFilter(state_size, measurement_size, 0)
        kalman.transitionMatrix = np.eye(state_size, dtype=np.float32)
        kalman.transitionMatrix[(0,1,4), (2,3,5)] = ms_per_frame
        kalman.measurementMatrix = np.zeros((measurement_size, state_size), dtype=np.float32)
        pos = [(0,0), (1,1), (2,4), (3,6), (4,7)]
        rows, cols = zip(*pos)
        kalman.measurementMatrix[rows, cols] = 1.
        kalman.processNoiseCov = 1e-5 * np.eye(state_size, dtype=np.float32)  # Values can't change sharply
        kalman.processNoiseCov[4,4] = 0        # Angle shouldn't change very fast (unless there is a fall!)
        kalman.processNoiseCov[(6,7),(6,7)] = 5e-4        # Height and width can change faster
        kalman.measurementNoiseCov = np.eye(measurement_size, dtype=np.float32)
        kalman.measurementNoiseCov[2,2] = 1e-1  # Angle is very noisy (especially around the limits)
        kalman.measurementNoiseCov[(0,1),(0,1)] = 1  # Center is super noisy
        kalman.measurementNoiseCov[(3,4),(3,4)] = 1e-1  # Height and width are not noisy
        kalman_filter_active = False  # This will change according to our contour search
        contour_unfound_count = 0     
        last_angle = None  # Continous angle tracking (to avoid drastic changes on direction change)

        # > Optical flow
        optical_flow_sequence = []
        optical_flow = cv2.optflow.DualTVL1OpticalFlow_create()
        opt_last_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        for frame_number in trange(2, length + 1, desc='frame', leave=False):
            ret, frame = cap.read()
            frame = frame[:, 320:]  # Remove the depth data

            label = 0
            if fall_start <= frame_number <= fall_end:
                label = 1

            ### OPTICAL FLOW
            
            opt_current_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            flow = optical_flow.calc(opt_last_frame, opt_current_frame, None)  
            optical_flow_sequence.append(flow)
                
#             ### SPATIAL FEATURES

#             # Set parameters to 0 unless found otherwise
#             c_motion = 0
#             projection_ratio = 0

#             # Transformations to improve the background subtraction
#             # tMHI
#             img = cv2.GaussianBlur(frame, GAUSSIAN_KERNEL, 0)
#             frame_diff = cv2.absdiff(img, tmhi_last_frame)
#             gray_diff = cv2.cvtColor(frame_diff, cv2.COLOR_BGR2GRAY)
#             _, motion_mask = cv2.threshold(gray_diff, MHI_THRESHOLD, 1, cv2.THRESH_BINARY)
#             cv2.motempl.updateMotionHistory(motion_mask, mhi, frame_number * ms_per_frame, MHI_DURATION)
#             tmhi_last_frame = img

#             # Preparation for contour finding
#             img = cv2.GaussianBlur(frame, (5,5), 0)
#             img = background_subtractor.apply(img)
#             img = cv2.erode(img, EROTION_KERNEL, iterations=1)
#             img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, CLOSING_KERNEL, iterations=5)
#             img[img < 255] = 0

#             # Find contours
#             (contours, _) = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
#             img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

#             if contours:
#                 biggest_contour = max(contours, key=lambda c: cv2.contourArea(c))

#                 # Check minimal requirements to fit new ellipse
#                 if cv2.contourArea(biggest_contour) > MIN_HUMAN_CONTOUR_AREA and len(biggest_contour) >= 5:
#                     # calculate motion coefficient
#                     mask = np.zeros(img.shape[:2], dtype="uint8")
#                     cv2.drawContours(mask, biggest_contour, -1, 1, -1)
#                     tmhi = np.uint8(np.clip((mhi - (frame_number * ms_per_frame - MHI_DURATION)) / MHI_DURATION, 0, 1))
#                     relevant_tmhi = np.multiply(mask, tmhi)
#                     c_motion = np.sum(relevant_tmhi) / np.sum(mask) 

#                     # calculate projections
#                     horizontal_proj = mask.any(axis=0).sum()
#                     vertical_proj = mask.any(axis=1).sum()
#                     projection_ratio = vertical_proj / horizontal_proj

#                     contour_unfound_count = 0
#                     measured_ellipse = cv2.fitEllipse(biggest_contour)
#                     (x, y), (MA, ma), alpha = measured_ellipse
#                     if not kalman_filter_active:
#                         last_angle = alpha
#                         kalman.errorCovPre = np.eye(state_size)
#                         state = np.array([x, y, 0, 0, alpha, 0, MA, ma], dtype=np.float32)[:, np.newaxis]
#                         kalman.statePost = state
#                         kalman_filter_active = True
#                     else:
#                         corrected_alpha = min(alpha, alpha - 180, alpha + 180, key=lambda x: abs(x - last_angle))
#                         kalman.correct(np.array([x, y, corrected_alpha, MA, ma], dtype=np.float32)[:, np.newaxis])
#                         last_angle = corrected_alpha

#                 else:
#                     contour_unfound_count += 1
#                     if contour_unfound_count >= NO_CONTOUR_TOLERANCE:
#                         kalman_filter_active = False
#                         last_angle = None

#             else:
#                 contour_unfound_count += 1
#                 if contour_unfound_count >= NO_CONTOUR_TOLERANCE:
#                     kalman_filter_active = False
#                     last_angle = None

#             if kalman_filter_active:
#                 state = kalman.predict() 
#                 ctr_x, ctr_y, ctr_vx, ctr_vy, alpha, v_alpha, a, b = state.T[0]
#                 spatial_features_vectors_sequence.append([ctr_x, ctr_y, ctr_vx, ctr_vy, alpha, v_alpha, a, b, 
#                                                           a / b, c_motion, projection_ratio])                    
#             else:
#                 spatial_features_vectors_sequence.append(np.zeros(len(SPATIAL_FEATURES)))

            # when we have SEQ_LENGTH frame, we can start saving data
            if frame_number > 1 + SEQ_LENGTH:
                rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # convert to RGB
#                 data[group]["spatial"].append(np.array(spatial_features_vectors_sequence))
                data[group]["rgb"].append(rgb)
                data[group]["flow"].append(np.array(optical_flow_sequence))
                labels[group].append(label)
#                 del spatial_features_vectors_sequence[0]
                del optical_flow_sequence[0]


        cap.release()

HBox(children=(FloatProgress(value=0.0, description='train videos', max=49.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=0.0, description='frame', max=99.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='frame', max=84.0, style=ProgressStyle(description_width='…




KeyboardInterrupt: 

In [65]:
# Normalize the flow data
all_flows = np.array(data['train']['flow'])
max_value = all_flows.max()
min_value = all_flows.min()
for i in range(len(data['train']['flow'])):
    data['train']['flow'][i] = 255 * data['train']['flow'][i] / (max_value - min_value)
    data['train']['flow'][i] = data['train']['flow'][i].astype('uint8')

In [66]:
with h5py.File(dataset_folder / "data.hdf5", "w") as f:
    data_group = f.create_group("data")
    for split in data:
        split_group = data_group.create_group(split)
        for feature in data[split]:
            split_group.create_dataset(feature, data=np.array(data[split][feature], dtype=np.uint8))

    label_group = f.create_group("labels")
    for split in labels:
        label_group.create_dataset(split, data=np.array(labels[split], dtype=np.uint8))

## SisFall ?

In [4]:
dataset = Path('data/SisFall/')
videos = dataset / 'videos'