In [10]:
import pandas as pd
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
dataset_folder = "/content/drive/MyDrive/Colab Notebooks/fall-detection/data/har-up-spiking-dataset-240/"

df = pd.read_csv(dataset_folder + "CompleteDataSet.csv")

# Drop the first row (extra header)
df.drop(index=0, inplace=True)

# Select the useful columns
df = df[["TimeStamps", "Subject", "Activity", "Trial", "Tag"]].copy()

# Convert the columns to the correct data types
df["TimeStamps"] = pd.to_datetime(df["TimeStamps"])
df["Subject"] = df["Subject"].astype(int)
df["Activity"] = df["Activity"].astype(int)
df["Trial"] = df["Trial"].astype(int)
df["Tag"] = df["Tag"].astype(int)

# Create Names column and remove the old columns
df["Names"] = df.apply(lambda row: f"Subject{row['Subject']}Activity{row['Activity']}Trial{row['Trial']}", axis=1)
df.drop(columns=["Subject", "Activity", "Trial"], inplace=True)

df

In [None]:
# Group by 'Names' and calculate the duration
df['Duration'] = df.groupby('Names')['TimeStamps'].transform(lambda x: x - x.iloc[0])

# Filter out rows where Duration is greater than 15 seconds
df = df[df['Duration'] <= pd.Timedelta(seconds=15)]

df

In [None]:
# Group by 'Names' and aggregate 'Tag' values into a set
df = df.groupby('Names').agg({'Tag': lambda x: set(x)})

df


In [None]:
# 1 Falling forward using hands 10
# 2 Falling forward using knees 10
# 3 Falling backwards 10
# 4 Falling sideward 10
# 5 Falling sitting in empty chair 10
# 6 Walking 60
# 7 Standing 60
# 8 Sitting 60
# 9 Picking up an object 10
# 10 Jumping 30
# 11 Laying 60

# Create a new column 'Fall' and set it to True if the set contains any of the falling tags
fall_tags = {1, 2, 3, 4, 5}
df['Fall'] = df['Tag'].apply(lambda x: 1 if (len(x.intersection(fall_tags)) > 0) else 0)

df

In [None]:
import os

names_falls_dict = df['Fall'].to_dict()

folder_names = [
    folder
    for folder in os.listdir(dataset_folder)
    if os.path.isdir(os.path.join(dataset_folder, folder))
]
labels = [names_falls_dict[folder[:-7]] for folder in folder_names]

labels_df = pd.DataFrame({"folder_name": folder_names, "label": labels})
labels_df.to_csv(f"{dataset_folder}labels.csv", index=False)
labels_df

In [None]:
from matplotlib import pyplot as plt

labels_df['label'].plot(kind='hist', bins=20, title='Labels')
plt.gca().spines[['top', 'right',]].set_visible(False)

## Zip events folders

In [None]:
import shutil

dataset_folder = "/content/drive/MyDrive/Colab Notebooks/fall-detection/data/har-up-spiking-dataset-240"
file_name = dataset_folder.split("/")[-1]

# Zip the folder
zip_path = shutil.make_archive(file_name, 'zip', dataset_folder)

shutil.move(zip_path, dataset_folder.replace(file_name, ""))