In [None]:
# Setup imports
import os
import io
import numpy as np
import pandas as pd
import zipfile
import matplotlib.pyplot as plt

from gaitsetpy.dataset import UrFallLoader
from gaitsetpy.dataset.utils import download_urfall_data
from gaitsetpy.features import GaitFeatureExtractor, UrFallMediaFeatureExtractor
from gaitsetpy.preprocessing import create_preprocessing_pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay

DATA_DIR = os.path.join('.', 'urfall_data')
os.makedirs(DATA_DIR, exist_ok=True)
print(DATA_DIR)


In [None]:
# 1) Loader and metadata
loader = UrFallLoader()
print(loader.name, loader.description)
print('Supported types:', loader.metadata['data_types'])
print('Sampling frequency (camera):', loader.metadata['sampling_frequency'])


In [None]:
# 2) Ensure features CSVs are present (falls + ADLs)
download_urfall_data(DATA_DIR, data_types=['features'], use_falls=True, use_adls=True)

# 3) Load features
data, names = loader.load_data(DATA_DIR, data_types=['features'], use_falls=True, use_adls=True)
print(f"Loaded {len(data)} feature DataFrames:", names)
if data:
    display(data[0].head())


In [None]:
# 4) Sliding windows on features
windows = loader.create_sliding_windows(data, names, window_size=30, step_size=15)
print(f"Window sets: {len(windows)}")
if windows:
    print('Example window names:', [w['name'] for w in windows[:3]])


In [None]:
# 5) Download a tiny subset of raw accelerometer and extract gait features
subset = ['fall-01', 'adl-01']
download_urfall_data(DATA_DIR, sequences=subset, data_types=['accelerometer', 'synchronization'], use_falls=True, use_adls=True)
acc_data, acc_names = loader.load_data(DATA_DIR, data_types=['accelerometer'], sequences=subset, use_falls=True, use_adls=True)
print(acc_names)

acc_windows = loader.create_sliding_windows(acc_data, acc_names, window_size=100, step_size=50)
fs_acc = loader.metadata.get('accelerometer_frequency', 100)
extractor = GaitFeatureExtractor(verbose=False)
flat_sensor_windows = [entry for w in acc_windows for entry in w['windows'] if entry['name'] not in ['labels','activity_id']]
acc_feats = extractor.extract_features(flat_sensor_windows, fs=fs_acc)
print(f"Extracted {len(acc_feats)} gait sensor feature dicts")


In [None]:
# 6) Pull a few frames from depth/RGB archives and extract media features
download_urfall_data(DATA_DIR, sequences=subset, data_types=['depth','rgb','video'], use_falls=True, use_adls=True, max_workers=6)

def analyze_zip(paths, grayscale):
    media_extractor = UrFallMediaFeatureExtractor(verbose=False)
    windows = []
    for seq, path in list(paths.items()):
        try:
            with zipfile.ZipFile(path, 'r') as zf:
                pngs = [n for n in zf.namelist() if n.lower().endswith('.png')][:12]
                frames = []
                for name in pngs:
                    with zf.open(name) as f:
                        arr = plt.imread(io.BytesIO(f.read()))
                        frames.append(arr)
                if frames:
                    windows.append({'name': seq, 'data': frames})
        except Exception as e:
            print('warn', e)
    feats = media_extractor.extract_features(windows, fs=loader.metadata['sampling_frequency'], grayscale=grayscale)
    return {f['name']: f['features'] for f in feats}

depth_paths = loader.get_file_paths(DATA_DIR, 'depth', sequences=subset)
rgb_paths = loader.get_file_paths(DATA_DIR, 'rgb', sequences=subset)
video_paths = loader.get_file_paths(DATA_DIR, 'video', sequences=subset)

depth_feats = analyze_zip(depth_paths, grayscale=True)
rgb_feats = analyze_zip(rgb_paths, grayscale=False)

# Video optional (requires cv2); skip if not available
try:
    import cv2
    media_extractor = UrFallMediaFeatureExtractor(verbose=False)
    windows = []
    for seq, path in list(video_paths.items()):
        cap = cv2.VideoCapture(path)
        if not cap.isOpened():
            continue
        frames = []
        for _ in range(120):
            ret, frame = cap.read()
            if not ret:
                break
            frames.append(frame)
        cap.release()
        if frames:
            windows.append({'name': seq, 'data': frames})
    video_feats_list = media_extractor.extract_features(windows, fs=loader.metadata['sampling_frequency'], grayscale=True)
    video_feats = {f['name']: f['features'] for f in video_feats_list}
except Exception as e:
    print('Video feature extraction skipped:', e)
    video_feats = {}


In [None]:
# 7) Build a simple classification dataset and train RF
X_list, y_list = [], []
# From accelerometer windows: simple stats per window
for item in acc_windows:
    per_name = {entry['name']: entry['data'] for entry in item['windows']}
    if 'activity_id' in per_name:
        labels_windows = per_name['activity_id']
        window_labels = []
        for wlab in labels_windows:
            vals, counts = np.unique(wlab, return_counts=True)
            window_labels.append(int(vals[np.argmax(counts)]))
    else:
        continue
    for name_key, windows_arr in per_name.items():
        if name_key in ['labels','activity_id']:
            continue
        for idx, w in enumerate(windows_arr):
            arr_w = np.ravel(np.array(w, dtype=np.float32))
            vec = [float(np.mean(arr_w)), float(np.std(arr_w)), float(np.max(arr_w)-np.min(arr_w))]
            X_list.append(vec)
            y_list.append(window_labels[idx] if idx < len(window_labels) else 0)

# From media features: aggregate
for seq, feat in depth_feats.items():
    X_list.append([feat.get('mean_intensity', 0.0), feat.get('std_intensity', 0.0)])
    y_list.append(1 if seq.startswith('fall-') else 0)
for seq, feat in rgb_feats.items():
    X_list.append([feat.get('mean_intensity', 0.0), feat.get('std_intensity', 0.0)])
    y_list.append(1 if seq.startswith('fall-') else 0)
for seq, feat in (video_feats or {}).items():
    X_list.append([feat.get('motion_mean', 0.0), feat.get('motion_std', 0.0), feat.get('brightness_mean', 0.0)])
    y_list.append(1 if seq.startswith('fall-') else 0)

max_dim = max(len(v) for v in X_list) if X_list else 0
pad = lambda v, d: v if len(v)==d else (v[:d] if len(v)>d else v + [0.0]*(d-len(v)))
X = np.array([pad(list(map(float, v)), max_dim) for v in X_list], dtype=np.float32) if X_list else np.empty((0,0), dtype=np.float32)
y = np.array(y_list, dtype=np.int64) if y_list else np.empty((0,), dtype=np.int64)
print('X, y shapes:', X.shape, y.shape)

if X.size and len(np.unique(y))>=2 and len(y)>=4:
    pipeline = create_preprocessing_pipeline(['clipping'], clipping={'min_val': float(np.min(X)), 'max_val': float(np.max(X))})
    Xp = pipeline(X)
    X_train, X_test, y_train, y_test = train_test_split(Xp, y, test_size=0.3, random_state=42, stratify=y)
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f'RandomForest accuracy: {acc:.3f}')
    cm = confusion_matrix(y_test, y_pred)
    ConfusionMatrixDisplay(confusion_matrix=cm).plot(values_format='d')
    plt.title('Confusion Matrix - UrFall RF')
    plt.tight_layout()
    plt.show()
else:
    print('Not enough labeled samples for training.')
