In [None]:
import sys
import os
import numpy as np
from typing import List
import matplotlib.pyplot as plt

import parse
import features
import plot
import classification


In [None]:
# Specify path to raw data file.
RAW_DATA_FILE = os.path.join(os.environ["HOME"], "data", "accelerometer", "WISDM_ar_v1.1", "WISDM_ar_v1.1_raw.txt")

# Plotting options
SHOW_PLOTS = False


In [None]:
# Read and parse data from file.
raw_data = parse.file_to_string(RAW_DATA_FILE)
raw_measurements = parse.timepoint_strings_to_timepoint_tuples(
    parse.raw_data_string_to_timepoint_strings(raw_data)
)
user_ids = parse.extract_user_set(raw_measurements)
activities = parse.extract_activity_set(raw_measurements)
print(activities)
print(user_ids)

In [None]:
raw_times = np.array([x[2] for x in raw_measurements])
raw_time_differences = raw_times[1:] - raw_times[:-1]

In [None]:
# Plot measurement times (in nanoseconds) for whole data set.
if SHOW_PLOTS: figure, axis = plot.overlay_series(np.arange(0, len(raw_times)), [raw_times])

In [None]:
raw_accelerations = []
for column in [3, 4, 5]:
    raw_accelerations.append(np.array([v[column] for v in raw_measurements]))

In [None]:
# Plot segment of raw acceleration data against row number.
if SHOW_PLOTS:
    start =  196400
    finish = 196800
    figure, axis = plot.overlay_series(np.arange(0, len(raw_times))[start: finish],
                                       [x[start: finish] for x in raw_accelerations])

In [None]:
# Parse raw data into 10 second intervals.
nanoseconds_in_one_second = 1000000000
nanoseconds_in_10_seconds = 10000000000
intervals: dict = parse.intervals_by_user_and_activity(
    raw_measurements,
    interval_duration_in_nanoseconds=nanoseconds_in_10_seconds,
    maximum_gap_in_nanoseconds=(nanoseconds_in_one_second * 2)
)

In [None]:
# Count number of intervals for each user and activity
all_counts = parse.count_intervals(intervals)
activity_counts = parse.count_intervals_per_activity(intervals)
user_counts = parse.count_intervals_per_user(intervals)
print(activity_counts)
print(user_counts)

In [None]:
# Select random examples of each activity.

sampled_users = 33
samples_per_user = 1

np.random.seed(42)
examples = dict()
random_users = np.random.choice(list(user_ids), sampled_users)
for activity in activities:
    for user in random_users:
        if len(intervals[(user, activity)]) >= samples_per_user:
            examples[(user, activity)] = np.random.choice(intervals[(user, activity)], samples_per_user)


In [None]:
if SHOW_PLOTS: plot.intervals_by_activity(examples, ["Upstairs", "Downstairs"])

In [None]:
if SHOW_PLOTS: plot.intervals_by_activity(examples, ["Walking", "Jogging"])

In [None]:
if SHOW_PLOTS: plot.intervals_by_activity(examples, ["Sitting", "Standing"])

In [None]:
# Calculate features for all data intervals
interval_features = features.vectors_for_intervals(
    intervals,
    (features.mean_absolute_magnitude_change_per_second, features.mean_angle_change_per_second)
)


In [None]:
# Functions used to plot histograms of features.

def collect_dict_values_by_activity(dictionary, activity):
    out = dict()
    for key, value in dictionary.items():
        if activity in key:
            out[key] = value
    return out


def plot_feature_histograms_for_activities(interval_features, activities, bins):
    for activity in activities:
        activity_intervals = collect_dict_values_by_activity(interval_features, activity)
        feature_vectors = features.extract_vectors_from_dict(activity_intervals)
        for i, vector in enumerate(feature_vectors):
            plt.figure(figsize=(15,5))
            plt.hist(
                vector,
                bins=bins,
                normed=1, facecolor='blue', alpha=0.5)
            plt.title(activity + " x_" + str(i))
            plt.show()


In [None]:
if SHOW_PLOTS: plot_feature_histograms_for_activities(interval_features, activities, 40)

In [None]:
# Functions used for scatter plots of features    

def feature_scatter_for_activities(interval_features, activities, colours):
    fig, ax = plt.subplots(figsize=(10,10))
    for i, activity in enumerate(activities):
        activity_intervals = collect_dict_values_by_activity(interval_features, activity)
        x = features.extract_vectors_from_dict(activity_intervals)
        ax.scatter(x[0], x[1], c=colours[i], alpha=0.4, marker='.', label=activity)
    ax.set_xlim((0, 200))
    ax.set_ylim((0, 40))
    x0,x1 = ax.get_xlim()
    y0,y1 = ax.get_ylim()
    ax.set_aspect(abs(x1-x0)/abs(y1-y0))
    ax.set_title("")
    plt.xlabel("mean absolute magnitude change per second")
    plt.ylabel("mean angle change per second")
    ax.legend()
    plt.show()
    

In [None]:
if SHOW_PLOTS:
    feature_scatter_for_activities(
        interval_features,
        activities,
        ['blue', 'red', 'purple', 'gold', 'green', 'cyan']
    )