In [85]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from imblearn.over_sampling import SMOTE

In [86]:
dataset = Path('WEDA-FALL/dataset/50Hz')
WINDOW_SIZE = 20

In [87]:
walking_dataset = dataset / 'D01'
walking_accel = (f for f in walking_dataset.glob('*_accel.csv') if 'vertical' not in f.name)
walking_gyro = walking_dataset.glob('*_gyro.csv')

In [88]:
jogging_dataset = dataset / 'D02'
jogging_accel = (f for f in jogging_dataset.glob('*_accel.csv') if 'vertical' not in f.name)
jogging_gyro = jogging_dataset.glob('*_gyro.csv')

In [89]:
def preprocess_df(df):
	df['acceleration'] = np.sqrt(
		df['accel_x_list'] ** 2 + df['accel_y_list'] ** 2 + df['accel_z_list'] ** 2
	)
	df['rotationrate'] = np.sqrt(
		df['gyro_x_list'] ** 2 + df['gyro_y_list'] ** 2 + df['gyro_z_list'] ** 2
	)

	df['acceleration'] = df['acceleration'].rolling(window=WINDOW_SIZE).mean()
	df['rotationrate'] = df['rotationrate'].rolling(window=WINDOW_SIZE).mean()

	pitch = np.atan2(
		-df['accel_x_list'], np.sqrt(df['accel_y_list'] ** 2 + df['accel_z_list'] ** 2)
	) * (180 / np.pi)
	gyro_integration = df['gyro_y_list'] * (1 / 50)

	# Calculate the recursive component using cumsum
	# This handles: 0.98 * (previous_angle + gyro_term)
	recursive_term = (0.98 * gyro_integration).cumsum()

	# Add the accelerometer correction term
	accel_term = 0.02 * pitch

	# Combine terms (note: first value needs special handling)
	df['angle_pitch'] = recursive_term + accel_term.cumsum()

	df = df.filter(
		items=['accel_time_list', 'gyro_time_list', 'acceleration', 'rotationrate', 'angle_pitch']
	)
	return df

In [90]:
df_list = []
for acc, gyro in zip(walking_accel, walking_gyro):
	df1 = pd.read_csv(acc)
	df2 = pd.read_csv(gyro)
	df = pd.concat([df1, df2], axis=1)
	df = preprocess_df(df).dropna()
	df['label'] = np.full(len(df), 'walking')
	df_list.append(df)
df_walking = pd.concat(df_list)

In [91]:
df_list = []
for acc, gyro in zip(jogging_accel, jogging_gyro):
	df1 = pd.read_csv(acc)
	df2 = pd.read_csv(gyro)
	df = pd.concat([df1, df2], axis=1)
	df = preprocess_df(df).dropna()
	df['label'] = np.full(len(df), 'jogging')
	df_list.append(df)
df_jogging = pd.concat(df_list)

In [92]:
df_fall = pd.read_csv('fall_data.csv')

df = pd.concat([df_walking, df_jogging, df_fall]).reset_index(drop=True)
# df = pd.concat([df_walking, df_jogging]).reset_index(drop=True)

In [93]:
X = df[['acceleration', 'rotationrate', 'angle_pitch']]
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

In [94]:
clf = DecisionTreeClassifier(random_state=42, max_depth=3)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

        fall       0.58      0.88      0.70      9772
     jogging       0.80      0.69      0.74      7363
     walking       0.97      0.66      0.78     12224

    accuracy                           0.74     29359
   macro avg       0.79      0.74      0.74     29359
weighted avg       0.80      0.74      0.74     29359



In [95]:
from collections import defaultdict
from sklearn.tree import _tree

c_features = defaultdict(list)


def tree_to_rules(clf, feature_names):
	tree_ = clf.tree_
	feature_name = [
		feature_names[i] if i != _tree.TREE_UNDEFINED else 'undefined!' for i in tree_.feature
	]

	paths = []
	# the tuple is min, max
	d = defaultdict(lambda: (float('inf'), float('-inf')))

	def recurse(node, path):
		if tree_.feature[node] != _tree.TREE_UNDEFINED:
			name = feature_name[node]
			threshold = tree_.threshold[node]
			threshold = float(threshold)

			d_copy = d.copy()
			# left child
			d[name] = (d_copy[name][0], max(d_copy[name][1], threshold))
			recurse(tree_.children_left[node], path + [(name, '<=', threshold)])
			# right child
			d[name] = (min(d_copy[name][0], threshold), d_copy[name][1])
			recurse(tree_.children_right[node], path + [(name, '>', threshold)])
		else:
			# leaf node
			value = tree_.value[node]
			class_id = value.argmax()
			c_features[class_id].append(d.copy())
			paths.append((path, class_id))

	recurse(0, [])
	return paths


# Extract rules
rules = tree_to_rules(clf, X.columns)

# Convert rules to readable intervals
for conditions, class_id in rules:
	cond_str = ', '.join([f'{a} {b} {c:.2f}' for a, b, c in conditions])
	# print(f'Class {class_id} :', cond_str)

In [None]:
import math


def format_rules(rules_list):
	"""
	Converts a list of decision tree rule dictionaries into a single
	human-readable expression.
	"""
	final_expression = []

	# Process each rule dictionary in the list
	for rule in rules_list:
		conditions = []
		# Process each feature's condition within the rule
		for feature, (min_val, max_val) in rule.items():
			# Clean up the representation of infinity for display
			min_disp = round(min_val, 2) if min_val != math.inf else -math.inf
			max_disp = round(max_val, 2) if max_val != -math.inf else math.inf

			if min_disp == -math.inf and max_disp == math.inf:
				continue  # This condition is always true, so we can ignore it
			elif min_disp == -math.inf:
				conditions.append(f'{feature} < {max_disp}')
			elif max_disp == math.inf:
				conditions.append(f'{feature} > {min_disp}')
			else:
				conditions.append(f'{min_disp} < {feature} < {max_disp}')

		# Join the conditions for a single rule with "AND"
		if conditions:
			final_expression.append(f'( {" AND ".join(conditions)} )')

	# Join all the individual rule expressions with "OR"
	return ' OR\n'.join(final_expression)


# Generate and print the single expression
for i, rule in c_features.items():
	print(i)
	expression = format_rules(rule)
	print(expression)

0
( angle_pitch < 356.68 AND acceleration < 11.55 ) OR
( 69.68 < angle_pitch < 356.68 AND acceleration < 11.55 ) OR
( 69.68 < angle_pitch < 356.68 AND acceleration > 11.55 ) OR
( angle_pitch > 356.68 AND 11.55 < acceleration < 16.44 )
1
( 69.68 < angle_pitch < 356.68 AND acceleration > 11.55 ) OR
( angle_pitch > 356.68 AND acceleration > 11.55 )
2
( 356.68 < angle_pitch < 494.96 AND 11.55 < acceleration < 13.58 ) OR
( angle_pitch > 356.68 AND 11.55 < acceleration < 13.58 )
