In [None]:
import sys
import os
import numpy as np
from typing import List
import matplotlib.pyplot as plt

import parse
import features
import plot
import classification

In [None]:
# Specify path to raw data file.
RAW_DATA_FILE = os.path.join(os.environ["HOME"], "data", "accelerometer", "WISDM_ar_v1.1", "WISDM_ar_v1.1_raw.txt")

# Plotting options
FLAG_PLOT = True

# Random number generator seeds
TRAIN_TEST_SHUFFLE_SEED = 5


In [None]:
# Load data
raw_data = parse.file_to_string(RAW_DATA_FILE)
raw_measurements = parse.timepoint_strings_to_timepoint_tuples(
    parse.raw_data_string_to_timepoint_strings(raw_data)
)
user_ids = parse.extract_user_set(raw_measurements)
activities = parse.extract_activity_set(raw_measurements)
print(activities)
print(user_ids)

In [None]:
# Parse raw data into 10 second intervals.
nanoseconds_in_one_second = 1000000000
nanoseconds_in_10_seconds = 10000000000
intervals: dict = parse.intervals_by_user_and_activity(
    raw_measurements,
    interval_duration_in_nanoseconds=nanoseconds_in_10_seconds,
    maximum_gap_in_nanoseconds=(nanoseconds_in_one_second * 2)
)

In [None]:
# Create train and test sets.
def _shuffled_sequence(length):
    seq = list(range(length))
    np.random.shuffle(seq)  # Numpy does this in place.
    return tuple(seq)

# Create train-test folds
np.random.seed(TRAIN_TEST_SHUFFLE_SEED)
sequence = _shuffled_sequence(len(user_ids))
folds = classification.train_test_folds(list(user_ids), sequence, 3)


Classify a period of activity as either active or inactive

Pick a decision boundary based on observed data

Evaluate accuracy on test data