In [40]:
import pandas as pd

In [41]:
df = pd.read_csv("CompleteDataSet.csv")

# Drop the first row (extra header)
df.drop(index=0, inplace=True)

# Select the useful columns
df = df[["TimeStamps", "Subject", "Activity", "Trial", "Tag"]]

# Convert the columns to the correct data types
df["TimeStamps"] = pd.to_datetime(df["TimeStamps"])
df["Subject"] = df["Subject"].astype(int)
df["Activity"] = df["Activity"].astype(int)
df["Trial"] = df["Trial"].astype(int)
df["Tag"] = df["Tag"].astype(int)

# Create Names column and remove the old columns
df["Names"] = df.apply(lambda row: f"Subject{row['Subject']}Activity{row['Activity']}Trial{row['Trial']}", axis=1)
df.drop(columns=["Subject", "Activity", "Trial"], inplace=True)

df

  df = pd.read_csv("CompleteDataSet.csv")


Unnamed: 0,TimeStamps,Tag,Names
1,2018-07-04 12:04:17.738369,7,Subject1Activity1Trial1
2,2018-07-04 12:04:17.790509,7,Subject1Activity1Trial1
3,2018-07-04 12:04:17.836632,7,Subject1Activity1Trial1
4,2018-07-04 12:04:17.885262,7,Subject1Activity1Trial1
5,2018-07-04 12:04:17.945423,7,Subject1Activity1Trial1
...,...,...,...
294674,2018-07-12 12:18:28.783680,11,Subject17Activity11Trial3
294675,2018-07-12 12:18:28.832811,11,Subject17Activity11Trial3
294676,2018-07-12 12:18:28.892470,11,Subject17Activity11Trial3
294677,2018-07-12 12:18:29.025324,11,Subject17Activity11Trial3


In [42]:
# Group by 'Names' and calculate the duration
df['Duration'] = df.groupby('Names')['TimeStamps'].transform(lambda x: x - x.iloc[0])

# Filter out rows where Duration is greater than 15 seconds
df = df[df['Duration'] <= pd.Timedelta(seconds=15)]

df

Unnamed: 0,TimeStamps,Tag,Names,Duration
1,2018-07-04 12:04:17.738369,7,Subject1Activity1Trial1,0 days 00:00:00
2,2018-07-04 12:04:17.790509,7,Subject1Activity1Trial1,0 days 00:00:00.052140
3,2018-07-04 12:04:17.836632,7,Subject1Activity1Trial1,0 days 00:00:00.098263
4,2018-07-04 12:04:17.885262,7,Subject1Activity1Trial1,0 days 00:00:00.146893
5,2018-07-04 12:04:17.945423,7,Subject1Activity1Trial1,0 days 00:00:00.207054
...,...,...,...,...
293903,2018-07-12 12:17:43.782516,11,Subject17Activity11Trial3,0 days 00:00:14.687090
293904,2018-07-12 12:17:43.836660,11,Subject17Activity11Trial3,0 days 00:00:14.741234
293905,2018-07-12 12:17:43.883786,11,Subject17Activity11Trial3,0 days 00:00:14.788360
293906,2018-07-12 12:17:43.945450,11,Subject17Activity11Trial3,0 days 00:00:14.850024


In [43]:
# Group by 'Names' and aggregate 'Tag' values into a set
df = df.groupby('Names').agg({'Tag': lambda x: set(x)})

df


Unnamed: 0_level_0,Tag
Names,Unnamed: 1_level_1
Subject10Activity10Trial1,{10}
Subject10Activity10Trial2,{10}
Subject10Activity10Trial3,{10}
Subject10Activity11Trial1,{11}
Subject10Activity11Trial2,{11}
...,...
Subject9Activity8Trial2,{8}
Subject9Activity8Trial3,{8}
Subject9Activity9Trial1,"{9, 7}"
Subject9Activity9Trial2,"{9, 7}"


In [44]:
# 1 Falling forward using hands 10
# 2 Falling forward using knees 10
# 3 Falling backwards 10
# 4 Falling sideward 10
# 5 Falling sitting in empty chair 10
# 6 Walking 60
# 7 Standing 60
# 8 Sitting 60
# 9 Picking up an object 10
# 10 Jumping 30
# 11 Laying 60

# Create a new column 'Fall' and set it to True if the set contains any of the falling tags
fall_tags = {1, 2, 3, 4, 5}
df['Fall'] = df['Tag'].apply(lambda x: 1 if (len(x.intersection(fall_tags)) > 0) else 0)

df

Unnamed: 0_level_0,Tag,Fall
Names,Unnamed: 1_level_1,Unnamed: 2_level_1
Subject10Activity10Trial1,{10},0
Subject10Activity10Trial2,{10},0
Subject10Activity10Trial3,{10},0
Subject10Activity11Trial1,{11},0
Subject10Activity11Trial2,{11},0
...,...,...
Subject9Activity8Trial2,{8},0
Subject9Activity8Trial3,{8},0
Subject9Activity9Trial1,"{9, 7}",0
Subject9Activity9Trial2,"{9, 7}",0


In [45]:
import os

names_falls_dict = df['Fall'].to_dict()

dataset_folder = "./"
folder_names = [
    folder
    for folder in os.listdir(dataset_folder)
    if os.path.isdir(os.path.join(dataset_folder, folder))
]
labels = [names_falls_dict[folder[:-7]] for folder in folder_names]

labels_df = pd.DataFrame({"folder_name": folder_names, "label": labels})
df.to_csv(f"{dataset_folder}/labels.csv", index=False)
labels_df

Unnamed: 0,folder_name,label
0,Subject10Activity1Trial1Camera1,1
1,Subject10Activity1Trial1Camera2,1
2,Subject10Activity1Trial2Camera1,1
3,Subject10Activity1Trial2Camera2,1
4,Subject10Activity1Trial3Camera1,1
5,Subject10Activity1Trial3Camera2,1
6,Subject10Activity2Trial1Camera1,1
7,Subject10Activity2Trial1Camera2,1
8,Subject10Activity2Trial2Camera1,1
9,Subject10Activity2Trial2Camera2,1
