In [8]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import seaborn as sn

In [9]:
# Meta data and environment vars

COLS = ["_idx", "x_accel", "y_accel", "z_accel", "target"]
CORE_FEATURES = ["x_accel", "y_accel", "z_accel"]

TARGET_LABELS = {
    1: "Working at Computer",
    2: "Standing Up, Walking and Going updown stairs",
    3: "Standing",
    4: "Walking",
    5: "Going UpDown Stairs",
    6: "Walking and Talking with Someone",
    7: "Talking while Standing",        
}
VALID_TARGETS = list(TARGET_LABELS.keys()) # i.e: 1-7

FREQ = 52 #data recorded at 52HZ (aka 52 rows per second)

# Where the data lives
DATA_DIR = os.path.join(os.getcwd(), 'data')
CSV_FILES = [os.path.join(DATA_DIR, f) for f in os.listdir(DATA_DIR) if f.endswith('.csv')]

In [10]:
def standardize(df, cols):
    """
    Make the mean of data 0 with standard dev of 1
    """
    return (df[cols] - df[cols].mean()) / df[cols].std()


def root_mean_square(df, cols):
    """
    Root Mean Square = sqrt(1/n * (x^2 + y^2 + z^2))
    """
    return np.sqrt(np.mean(np.square(df[cols]), axis=1))

In [11]:
# Read files, keeping only feature columns and target
dfs = [pd.read_csv(f, names=COLS, usecols=CORE_FEATURES+['target']) for f in CSV_FILES]
master = pd.concat(dfs).reset_index()

In [12]:
# Ensure targets are valid values (1-7)
master_valid = master[master['target'].isin(VALID_TARGETS)]