In [3]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Load your data
data = pd.read_csv("./aos_data/merged_phuc_data.csv")
features = ['AccelX', 'AccelY', 'AccelZ', 'GyroX', 'GyroY', 'GyroZ', 'IR']
X = data[features]
y = data['Label'].map({'sleep': 0, 'awake': 1})

# Check for single-class issue
if len(np.unique(y)) < 2:
    raise ValueError("Data contains only one class! Need both 'sleep' and 'awake' samples.")

# Normalize data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train Random Forest
model = RandomForestClassifier(n_estimators=5, max_depth=4, random_state=42)
model.fit(X_train, y_train)

# Evaluate
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy:.2f}")

# Export min/max values
with open("scaler_values.h", "w") as f:
    f.write("// Scaler values\n")
    for i, feature in enumerate(features):
        f.write(f"const float {feature}_min = {scaler.data_min_[i]}f;\n")
        f.write(f"const float {feature}_max = {scaler.data_max_[i]}f;\n")

# Function to export a single tree as C array
def tree_to_c_array(tree, tree_id):
    n_nodes = tree.node_count
    children_left = tree.children_left
    children_right = tree.children_right
    feature = tree.feature
    threshold = tree.threshold
    value = tree.value  # Shape: (n_nodes, n_classes, 1) or (n_nodes, 1, 1) if single class

    # Debugging: Print value shape to understand the issue
    print(f"Tree {tree_id} value shape: {value.shape}")

    # Handle variable number of classes in value
    if value.shape[1] == 1:  # Only one class present
        classes = [int(np.argmax(value[i])) for i in range(n_nodes)]  # Shouldn't happen with two classes
        print(f"Warning: Tree {tree_id} has only one class in value array.")
    else:  # Expected case: two classes
        classes = [1 if value[i][1][0] > value[i][0][0] else 0 for i in range(n_nodes)]

    c_code = f"// Tree {tree_id}\n"
    c_code += f"const int tree_{tree_id}_n_nodes = {n_nodes};\n"
    c_code += f"const int tree_{tree_id}_children_left[] = {{ {', '.join(map(str, children_left))} }};\n"
    c_code += f"const int tree_{tree_id}_children_right[] = {{ {', '.join(map(str, children_right))} }};\n"
    c_code += f"const int tree_{tree_id}_feature[] = {{ {', '.join(map(str, feature))} }};\n"
    c_code += f"const float tree_{tree_id}_threshold[] = {{ {', '.join(map(str, threshold))} }};\n"
    c_code += f"const int tree_{tree_id}_class[] = {{ {', '.join(map(str, classes))} }};\n"
    return c_code

# Export all trees to a header file
with open("random_forest_model.h", "w") as f:
    f.write("#ifndef RANDOM_FOREST_MODEL_H\n")
    f.write("#define RANDOM_FOREST_MODEL_H\n\n")
    f.write(f"const int n_trees = {model.n_estimators};\n\n")
    for i, tree in enumerate(model.estimators_):
        f.write(tree_to_c_array(tree.tree_, i))
        f.write("\n")
    f.write("#endif\n")

print("Model exported to random_forest_model.h and scaler_values.h")

Accuracy: 0.71
Tree 0 value shape: (27, 1, 2)
Tree 1 value shape: (27, 1, 2)
Tree 2 value shape: (31, 1, 2)
Tree 3 value shape: (23, 1, 2)
Tree 4 value shape: (23, 1, 2)
Model exported to random_forest_model.h and scaler_values.h
