In [None]:
!git clone https://github.com/referreira-wisc/digag2022.git

In [None]:
import os
os.chdir('digag2022/LabAccelerometer')

### Imports

In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt

### Load excel file

In [None]:
data = pd.read_excel('Accelerometer_HW03.xlsx')
data

### Create variables and labels numpy arrays

In [None]:
X = np.array(data[['AccX', 'AccY', 'AccZ']])
y = np.array(data['Label'])
print(X.shape)
print(y.shape)
print(X)
print(y)

### Train Random Forest and perform leave-one-person-out cross-validation (AccX, AccY, and AccZ as features)

In [None]:
person_list = data['Person'].unique()
accuracies = []
for person in person_list:
  train_ids = np.array(data['Person'] != person)
  test_ids = np.array(data['Person'] == person)
  train_X = X[train_ids]
  train_y = y[train_ids]
  test_X = X[test_ids]
  test_y = y[test_ids]
  # Create Random Forest estimator with default hyperparameters
  rf = RandomForestClassifier()
  # Train Random Forest estimator with training set
  rf.fit(train_X, train_y)
  # Predict test set using trained Random Forest
  ypred = rf.predict(test_X)
  ytrue = test_y
  accuracy = np.mean(ypred == ytrue)
  accuracies.append(accuracy)
  print(f'Accuracy on {person}: {accuracy:.3f}')
print(f'Mean accuracy: {np.mean(accuracies):.3f}')

### Group AccX, AccY, and AccZ values into windows (windows of size 10 in this case)

In [None]:
def prepare_data(window_size):
  X_all = []
  y_all = []
  person_all = []
  for person in person_list:
    X_person = np.array(data[data['Person']==person][['AccX', 'AccY', 'AccZ']])
    y_person = np.array(data[data['Person']==person]['Label'])
    for i in range(len(X_person) - window_size + 1):
      xi = []
      yi = []
      for w in range(window_size):
        xi.append(X_person[i + w])
        yi.append(y_person[i + w])
      xi = np.array(xi).flatten()
      yi = np.array(yi)
      unique, pos = np.unique(yi, return_inverse=True)
      counts = np.bincount(pos)
      maxpos = np.argmax(counts)
      yi = unique[maxpos]
      X_all.append(xi)
      y_all.append(yi)
      person_all.append(person)
  X_all = np.array(X_all)
  y_all = np.array(y_all)
  person_all = np.array(person_all)
  print(f'X_all shape: {X_all.shape}')
  print(f'y_all shape: {y_all.shape}')
  print(f'person_all shape: {person_all.shape}')
  return X_all, y_all, person_all

window_size = 10
X_all, y_all, person_all = prepare_data(window_size=window_size)

### Train Random Forest and perform leave-one-person-out cross-validation (windows of AccX, AccY, and AccZ as features)

In [None]:
def train_random_forest(X_all, y_all, person_all):
  person_list = data['Person'].unique()
  accuracies = []
  for person in person_list:
    train_ids = person_all != person
    test_ids = person_all == person
    train_X = X_all[train_ids]
    train_y = y_all[train_ids]
    test_X = X_all[test_ids]
    test_y = y_all[test_ids]
    # Create Random Forest estimator with default hyperparameters
    rf = RandomForestClassifier()
    # Train Random Forest estimator with training set
    rf.fit(train_X, train_y)
    # Predict test set using trained Random Forest
    ypred = rf.predict(test_X)
    ytrue = test_y
    accuracy = np.mean(ypred == ytrue)
    accuracies.append(accuracy)
    print(f'Accuracy on {person}: {accuracy:.3f}')
  print(f'Mean accuracy: {np.mean(accuracies):.3f}')
  return np.mean(accuracies)

print(f'Window size: {window_size}')
_ = train_random_forest(X_all, y_all, person_all)

### Try a few different window sizes

In [None]:
accuracies_per_window_size = []
window_sizes = [1, 3, 5, 10, 20, 50, 100, 200]
for window_size in window_sizes:
  print(f'Window size: {window_size}')
  X_all, y_all, person_all = prepare_data(window_size=window_size)
  mean_acc = train_random_forest(X_all, y_all, person_all)
  accuracies_per_window_size.append(mean_acc)
  print('\n')

### Plot mean accuracy by window size

In [None]:
plt.bar(np.array(window_sizes, dtype=str), accuracies_per_window_size)
plt.ylim([min(accuracies_per_window_size) - 0.01, max(accuracies_per_window_size) + 0.01])
plt.show()