In [49]:
import os

def check_upfall_completeness(root_dir="dataset"):
    missing_entries = []

    for subject in os.listdir(root_dir):
        subject_path = os.path.join(root_dir, subject)
        if not os.path.isdir(subject_path):
            continue

        for activity in os.listdir(subject_path):
            activity_path = os.path.join(subject_path, activity)
            if not os.path.isdir(activity_path):
                continue

            for trial in os.listdir(activity_path):
                trial_path = os.path.join(activity_path, trial)
                if not os.path.isdir(trial_path):
                    continue

                base_name = f"{subject}{activity}{trial}"
                expected_files = [
                    f"{base_name}.csv",
                    f"{base_name}Camera1.zip",
                    f"{base_name}Camera2.zip"
                ]

                existing_files = os.listdir(trial_path)
                missing = [f for f in expected_files if f not in existing_files]

                if missing:
                    missing_entries.append({
                        "subject": subject,
                        "activity": activity,
                        "trial": trial,
                        "missing_files": missing
                    })

    return missing_entries

# Run the check
missing_data = check_upfall_completeness("dataset")

if not missing_data:
    print("✅ All trials have required .csv, Camera1.zip, and Camera2.zip files.")
else:
    print("❌ Missing files detected in the following trials:\n")
    for entry in missing_data:
        print(f"{entry['subject']}/{entry['activity']}/{entry['trial']}:")
        for f in entry['missing_files']:
            print(f"  - {f}")


✅ All trials have required .csv, Camera1.zip, and Camera2.zip files.


In [31]:
import pandas as pd
import os
import re 
import numpy as np
import cv2
from matplotlib.pyplot import imread
import matplotlib.pyplot as plt
from zipfile import ZipFile
import shutil

## Load Sensor Data

In [32]:
def read_data(data): 
    
    data.reset_index(inplace = True)
    data.rename(columns={'level_0': 'Time'}, inplace = True)
    data.rename(columns={'x-axis (deg/s).4': 'Raw Brainwave Signal '}, inplace = True)
    data.rename(columns={'Unnamed: 42': 'Tag'}, inplace = True)
    
    TimeStamp = data.iloc[:,0]
    ankle = data.iloc[: , 1:8]
    pocket = data.iloc[:, 8:15]
    waist = data.iloc[:,15:22]
    neck = data.iloc[:,22:29]
    wrist = data.iloc[:,29:36]
    EEG = data.iloc[:,36]
    Infraded = data.iloc[:,37:43]
    label = data.iloc[:,46]
        
    ankle.columns = ['X-axis Accelerometer (g)', 'Y-axis Accelerometer (g)' , 'Z-axis Accelerometer (g)',
                     'Roll Gyroscrope (deg/s)', 'Pitch Gyroscope (deg/s)', 'Yaw Gyroscope (deg/s)' ,'Luminosity (lux)']

    pocket.columns = ['X-axis Accelerometer (g)', 'Y-axis Accelerometer (g)' , 'Z-axis Accelerometer (g)',
                     'Roll Gyroscrope (deg/s)', 'Pitch Gyroscope (deg/s)', 'Yaw Gyroscope (deg/s)' ,'Luminosity (lux)']

    waist.columns = ['X-axis Accelerometer (g)', 'Y-axis Accelerometer (g)' , 'Z-axis Accelerometer (g)',
                     'Roll Gyroscrope (deg/s)', 'Pitch Gyroscope (deg/s)', 'Yaw Gyroscope (deg/s)' ,'Luminosity (lux)']

    neck.columns = ['X-axis Accelerometer (g)', 'Y-axis Accelerometer (g)' , 'Z-axis Accelerometer (g)',
                     'Roll Gyroscrope (deg/s)', 'Pitch Gyroscope (deg/s)', 'Yaw Gyroscope (deg/s)' ,'Luminosity (lux)']

    wrist.columns = ['X-axis Accelerometer (g)', 'Y-axis Accelerometer (g)' , 'Z-axis Accelerometer (g)',
                     'Roll Gyroscrope (deg/s)', 'Pitch Gyroscope (deg/s)', 'Yaw Gyroscope (deg/s)' ,'Luminosity (lux)']

    Infraded.columns = ['Infrared 1', 'Infrared 2', 'Infrared 3', 'Infrared 4', 'Infrared 5', 'Infrared 6']

    handled_data = pd.concat([TimeStamp, ankle,pocket,waist,neck,wrist,EEG,Infraded ], 
                            axis = 1, 
                            keys = ['TimeStamp','Wearable Ankle', 'Wearable Pocket','Wearable Waist', 
                                    'Wearable Neck', 'Wearable Wrist','EEG Headset'  ,'Infrared'],
                             
                            names = ['Deviece Name', 'Channel Name'])
    handled_data[('Tag' , 'Label')]= label
    
    return handled_data


def concat_data():
    concat_Sub = []
    list_Sub = []

    for sub_ in range(1, 17 + 1):
        Sub = 'Subject' + str(sub_)
        concat_Act = []
        list_Act = []

        for act_ in range(1, 11 + 1):
            Act = 'Activity' + str(act_)
            concat_Trial = []
            list_Trial = []

            for trial_ in range(1, 3 + 1):
                Trial = 'Trial' + str(trial_)

                if (sub_ == 8 and act_ == 11) and (trial_ == 2 or trial_ == 3):
                    print('----------------------------NULL---------------------------------')
                    continue

                path = f'./dataset/{Sub}/{Act}/{Trial}/{Sub}{Act}{Trial}.csv'

                try:
                    data = pd.read_csv(path, skiprows=1)
                    print(f'path : {path} . Shape : ({data.shape[0]},{data.shape[1]})')
                    handled = read_data(data)
                    concat_Trial.append(handled)
                    list_Trial.append(Trial)
                except FileNotFoundError:
                    print(f'[MISSING FILE] Skipping: {path}')
                    continue

            if concat_Trial:
                TRIAL = pd.concat(concat_Trial, keys=list_Trial)
                concat_Act.append(TRIAL)
                list_Act.append(Act)

        if concat_Act:
            ACT = pd.concat(concat_Act, keys=list_Act)
            concat_Sub.append(ACT)
            list_Sub.append(Sub)

    if concat_Sub:
        SUB = pd.concat(concat_Sub, keys=list_Sub)
        return SUB


In [33]:
SUB = concat_data()

path : ./dataset/Subject1/Activity1/Trial1/Subject1Activity1Trial1.csv . Shape : (195,43)
path : ./dataset/Subject1/Activity1/Trial2/Subject1Activity1Trial2.csv . Shape : (193,43)
[MISSING FILE] Skipping: ./dataset/Subject1/Activity1/Trial3/Subject1Activity1Trial3.csv
path : ./dataset/Subject1/Activity2/Trial1/Subject1Activity2Trial1.csv . Shape : (157,43)
path : ./dataset/Subject1/Activity2/Trial2/Subject1Activity2Trial2.csv . Shape : (195,43)
[MISSING FILE] Skipping: ./dataset/Subject1/Activity2/Trial3/Subject1Activity2Trial3.csv
path : ./dataset/Subject1/Activity3/Trial1/Subject1Activity3Trial1.csv . Shape : (192,43)
path : ./dataset/Subject1/Activity3/Trial2/Subject1Activity3Trial2.csv . Shape : (191,43)
[MISSING FILE] Skipping: ./dataset/Subject1/Activity3/Trial3/Subject1Activity3Trial3.csv
path : ./dataset/Subject1/Activity4/Trial1/Subject1Activity4Trial1.csv . Shape : (189,43)
path : ./dataset/Subject1/Activity4/Trial2/Subject1Activity4Trial2.csv . Shape : (191,43)
[MISSING FILE

In [34]:
SUB.to_csv(path_or_buf='Sensor + Image/sensor.csv', index = False)

In [35]:
times = SUB.iloc[:,0].values
labels = SUB.iloc[:,-1].values
Time_Label = pd.DataFrame(labels , index = times)
Time_Label

Unnamed: 0,0
2018-07-04T12:04:17.738369,7
2018-07-04T12:04:17.790509,7
2018-07-04T12:04:17.836632,7
2018-07-04T12:04:17.885262,7
2018-07-04T12:04:17.945423,7
...,...
2018-07-06T11:51:21.297629,6
2018-07-06T11:51:21.353778,6
2018-07-06T11:51:21.433490,6
2018-07-06T11:51:21.502675,6


## Load image

In [36]:
from zipfile import ZipFile, BadZipFile

def load_img(start_sub, end_sub, start_act, end_act, start_cam, end_cam, DesiredWidth=64, DesiredHeight=64):
    IMG = []
    count = 0
    name_img = []

    for sub_ in range(start_sub, end_sub + 1):
        sub = 'Subject' + str(sub_)

        for act_ in range(start_act, end_act + 1):
            act = 'Activity' + str(act_)

            for trial_ in range(1, 3 + 1):
                trial = 'Trial' + str(trial_)

                if (sub_ == 8 and act_ == 11) and (trial_ == 2 or trial_ == 3):
                    print('----------------------------NULL---------------------------------')
                    continue

                for cam_ in range(start_cam, end_cam + 1):
                    cam = 'Camera' + str(cam_)
                    zip_path = f'./dataset/{sub}/{act}/{trial}/{sub}{act}{trial}{cam}.zip'
                    extract_path = f'CAMERA/{sub}{act}{trial}{cam}'

                    try:
                        with ZipFile(zip_path, 'r') as zipObj:
                            zipObj.extractall(extract_path)
                    except FileNotFoundError:
                        print(f'[MISSING ZIP] Skipping: {zip_path}')
                        continue
                    except BadZipFile:
                        print(f'[BAD ZIP] Cannot open: {zip_path}')
                        continue

                    for root, dirnames, filenames in os.walk(extract_path):
                        for filename in filenames:
                            if re.search(r"\.(jpg|jpeg|png|bmp|tiff)$", filename):
                                filepath = os.path.join(root, filename)
                                count += 1
                                if count % 5000 == 0:
                                    print(f'{filepath} : {count}')
                                if filepath == 'CAMERA/Subject6Activity10Trial2Camera2/2018-07-06T12_03_04.483526.png':
                                    print('----------------------------NO SHAPE---------------------------------')
                                    continue
                                elif len(filepath) > 70:
                                    print(f'{filepath} : Invalid image')
                                    continue
                                name_img.append(filepath)
                                img = cv2.imread(filepath, 0)
                                resized = ResizeImage(img, DesiredWidth, DesiredHeight)
                                IMG.append(resized)

                    shutil.rmtree(extract_path)

    return IMG, name_img


def handle_name(path_name) :
    img_name = []
    for path in path_name :
        if len(path) == 68: 
            img_name.append(path[38:64])
        elif len(path) == 69 :
            img_name.append(path[39:65])
        else :
            img_name.append(path[40:66])
    handle = []
    for name in img_name :
        n1 = 13
        a1 = name.replace(name[n1],':')
        n2 = 16
        a2 = a1.replace(name[n2],':')
        handle.append(a2)
    return handle 


def ShowImage(ImageList, nRows = 1, nCols = 2, WidthSpace = 0.00, HeightSpace = 0.00):
    from matplotlib import pyplot as plt 
    import matplotlib.gridspec as gridspec
    gs = gridspec.GridSpec(nRows, nCols)     
    gs.update(wspace=WidthSpace, hspace=HeightSpace) # set the spacing between axes.
    plt.figure(figsize=(20,20))
    for i in range(len(ImageList)):
        ax1 = plt.subplot(gs[i])
        ax1.set_xticklabels([])
        ax1.set_yticklabels([])
        ax1.set_aspect('equal')
        plt.subplot(nRows, nCols,i+1)
        image = ImageList[i].copy()
        if (len(image.shape) < 3):
            plt.imshow(image, plt.cm.gray)
        else:
            plt.imshow(image)
        plt.title("Image " + str(i))
        plt.axis('off')
    plt.show()
    
    
def ResizeImage(IM, DesiredWidth, DesiredHeight):
    OrigWidth = float(IM.shape[1])
    OrigHeight = float(IM.shape[0])
    Width = DesiredWidth 
    Height = DesiredHeight

    if((Width == 0) & (Height == 0)):
        return IM
    
    if(Width == 0):
        Width = int((OrigWidth * Height)/OrigHeight)

    if(Height == 0):
        Height = int((OrigHeight * Width)/OrigWidth)

    dim = (Width, Height)
    resizedIM = cv2.resize(IM, dim, interpolation = cv2.INTER_NEAREST) 
    return resizedIM

In [37]:
start_sub = 1
end_sub  = 17
start_act = 1
end_act = 11
start_cam = 1 
end_cam = 1
DesiredWidth = 32
DesiredHeight = 32


img_1, path_1 = load_img(start_sub ,   end_sub,
             start_act , end_act  ,
             start_cam ,   end_cam , DesiredWidth ,  DesiredHeight )


name_1 = handle_name(path_1)

[MISSING ZIP] Skipping: ./dataset/Subject1/Activity1/Trial3/Subject1Activity1Trial3Camera1.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity2/Trial3/Subject1Activity2Trial3Camera1.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity3/Trial3/Subject1Activity3Trial3Camera1.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity4/Trial3/Subject1Activity4Trial3Camera1.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity5/Trial3/Subject1Activity5Trial3Camera1.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity6/Trial3/Subject1Activity6Trial3Camera1.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity7/Trial1/Subject1Activity7Trial1Camera1.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity7/Trial2/Subject1Activity7Trial2Camera1.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity7/Trial3/Subject1Activity7Trial3Camera1.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity8/Trial1/Subject1Activity8Trial1Camera1.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activit

In [38]:
size = '32x32'
cam = '1'
image = 'Sensor + Image' + '/' + 'image_' + cam +  '.npy'     
# name = 'Camera + Label' + '/' + size + '/' + 'name_' + cam + '(' + size + ')' + '.npy'     
name = 'Sensor + Image' + '/' + 'name_' + cam +  '.npy'  

name_1 = handle_name(path_1)

np.save(image, img_1)
np.save(name, name_1)

In [39]:
start_sub = 1
end_sub  = 17
start_act = 1
end_act = 11
start_cam = 2
end_cam = 2

DesiredWidth = 32
DesiredHeight = 32 

img_2, path_2 = load_img(start_sub ,   end_sub,
             start_act , end_act  ,
             start_cam ,   end_cam , DesiredWidth, DesiredHeight )

[MISSING ZIP] Skipping: ./dataset/Subject1/Activity1/Trial3/Subject1Activity1Trial3Camera2.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity2/Trial3/Subject1Activity2Trial3Camera2.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity3/Trial3/Subject1Activity3Trial3Camera2.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity4/Trial3/Subject1Activity4Trial3Camera2.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity5/Trial3/Subject1Activity5Trial3Camera2.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity6/Trial3/Subject1Activity6Trial3Camera2.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity7/Trial1/Subject1Activity7Trial1Camera2.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity7/Trial2/Subject1Activity7Trial2Camera2.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity7/Trial3/Subject1Activity7Trial3Camera2.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activity8/Trial1/Subject1Activity8Trial1Camera2.zip
[MISSING ZIP] Skipping: ./dataset/Subject1/Activit

In [40]:
size = '32x32'
cam = '2'
image = 'Sensor + Image' + '/' + 'image_' + cam +  '.npy'
# name = 'Camera + Label' + '/' + size + '/' + 'name_' + cam + '(' + size + ')' + '.npy'
name = 'Sensor + Image' + '/' + 'name_' + cam +  '.npy'

name_2 = handle_name(path_1)

np.save(image, img_2)
np.save(name, name_2)

In [41]:
print(len(img_1))
print(len(name_1))
print(len(img_2))
print(len(name_2))

23442
23442
23442
23442


In [42]:
mask = ~np.isin(name_1, name_2)
red_in1 = np.where(mask)[0]

name_1d = np.delete(name_1, red_in1)
img_1d = np.delete(img_1, red_in1, axis=0)


In [43]:
mask = ~np.isin(name_2, name_1)

red_in2 = np.where(mask)[0]

name_2d = np.delete(name_2, red_in2)
img_2d = np.delete(img_2, red_in2, axis=0)


In [44]:
(name_1d == name_2d).all()

True

In [45]:
label_1 = Time_Label.loc[name_1d].values
label_2 = Time_Label.loc[name_2d].values

In [46]:
print(len(img_1d))
print(len(name_1d))
print(len(label_1))
print(len(img_2d))
print(len(name_2d))
print(len(label_2))

23442
23442
23442
23442
23442
23442


In [48]:

cam = '1'

image = 'Sensor + Image' + '/' + 'image_' + cam +  '.npy'       
name = 'Sensor + Image' + '/' + 'name_' + cam +  '.npy'
label = 'Sensor + Image' + '/' + 'label_' + cam +  '.npy'


np.save(image, img_1d)
np.save(name, name_1d)
np.save(label, label_1)


cam = '2'
image = 'Sensor + Image' + '/' + 'image_' + cam +  '.npy'       
name = 'Sensor + Image' + '/' + 'name_' + cam +  '.npy'
label = 'Sensor + Image' + '/' + 'label_' + cam +  '.npy'

np.save(image, img_2d)
np.save(name, name_2d)
np.save(label, label_2)