In [None]:
import sys
import os
import numpy as np
from typing import List
import matplotlib.pyplot as plt

import parse
import features
import plot
import classification
from classification import GaussianNaiveBayesClassifier

### Data preprocessing

In [None]:
# Specify path to raw data file.
RAW_DATA_FILE = os.path.join(os.environ["HOME"], "data", "accelerometer", "WISDM_ar_v1.1", "WISDM_ar_v1.1_raw.txt")

# Random number generator seeds
TRAIN_TEST_SHUFFLE_SEED = 6


In [None]:
# Load data
raw_data = parse.file_to_string(RAW_DATA_FILE)
raw_measurements = parse.timepoint_strings_to_timepoint_tuples(
    parse.raw_data_string_to_timepoint_strings(raw_data)
)
user_ids = parse.extract_user_set(raw_measurements)
activities = parse.extract_activity_set(raw_measurements)
print(activities)
print(user_ids)

In [None]:
# Parse raw data into 10 second intervals.
nanoseconds_in_one_second = 1000000000
nanoseconds_in_10_seconds = 10000000000
intervals: dict = parse.intervals_by_user_and_activity(
    raw_measurements,
    interval_duration_in_nanoseconds=nanoseconds_in_10_seconds,
    maximum_gap_in_nanoseconds=(nanoseconds_in_one_second * 2)
)

In [None]:
# Calculate features for all data intervals
features = features.vectors_for_intervals(
    intervals,
    (features.mean_absolute_magnitude_change_per_second, features.mean_angle_change_per_second)
)


In [None]:
# Create train and test sets.
def _shuffled_sequence(length):
    seq = list(range(length))
    np.random.shuffle(seq)  # Numpy does this in place.
    return tuple(seq)

# Create train-test folds of user ids.
np.random.seed(TRAIN_TEST_SHUFFLE_SEED)
sequence = _shuffled_sequence(len(user_ids))
folds_user_ids = classification.train_test_folds(list(user_ids), sequence, 3)


In [None]:
# Create train and test data sets.
train_features = parse.collect_dict_values_by_listed_key_contents(features, folds_user_ids[0][0])
test_features = parse.collect_dict_values_by_listed_key_contents(features, folds_user_ids[0][1])


### Gaussian Naive Bayes

In [None]:
# Fit Gaussian naive Bayes classifier.
nb_classifier = classification.GaussianNaiveBayesClassifier(train_features, activities)

In [None]:
# Example prediction of a class given a feature vector.
nb_classifier.predict_from_feature_vector((125, 20))

In [None]:
# Make predictions for the test set and plot confusion matrix.
pairs = nb_classifier.predicted_and_labeled_pairs(test_features)
plot.confusion_matrix(*classification.confusion_matrix_from_pairs(pairs))

### k-Nearest Neighbors

In [None]:
# Instantiate kNN classifier with training data.


In [None]:
# Example prediction of a class given a feature vector.


In [None]:
# Make predictions for the test set and plot confusion matrix.
