# Final Assignment, Part I

* Name: _WISDM Smartphone and Smartwatch Activity and Biometrics Dataset Data Set_
* Source: https://archive.ics.uci.edu/ml/datasets/WISDM+Smartphone+and+Smartwatch+Activity+and+Biometrics+Dataset+
* License: Must cite:
> Smartphone and Smartwatch-Based Biometrics Using Activities of Daily Living. IEEE Access, 7:133190-133202, Sept. 2019.

* Filesize:
 * Total Zip: 302MB
 * Data: 1.1GB (582MB + 205MB + 196MB + 109MB)
* Data File Count: 204 (51 * 4)
* Number of Instances: 15630426 

In [1]:
%matplotlib widget

from pathlib import Path

import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

import numpy as np
import pandas as pd

# Thanks to `ImportanceOfBeingErnest` from https://stackoverflow.com/questions/47404653/pandas-0-21-0-timestamp-compatibility-issue-with-matplotlib
pd.plotting.register_matplotlib_converters()

input_dirpath = Path('../data/raw/wisdm/raw')
processed_dirpath = Path('../data/processed/wisdm/')
ref_dirpath = Path('../references')

activity_key_path = ref_dirpath / 'wisdm_activity_key.txt'

DEBUG=True
def release_plt_mem():
    if not DEBUG:
        return
    try:
        # release memory if needed
        plt.close('all')
    except:
        pass


In [2]:
activity_key = {v: k for k, v in [line.strip().split(' = ') for line in activity_key_path.read_text().strip().split('\n')]}
# '| Keys | Activity |\n|:-:|:-:|' + '\n'.join(['|{}|{}|'.format(k, v) for k, v in activity_key.items()])

# Data Information
> from Documentation

| Keys | Activity |
|:-:|:-:|
|A|walking|
|B|jogging|
|C|stairs|
|D|sitting|
|E|standing|
|F|typing|
|G|teeth|
|H|soup|
|I|chips|
|J|pasta|
|K|drinking|
|L|sandwich|
|M|kicking|
|O|catch|
|P|dribbling|
|Q|writing|
|R|clapping|
|S|folding|

| Field name | Type | Description | Range |
|:--|:--|:--|:--|
| Subject-id |  Symbolic numeric identififier. | Uniquely identifyies the subject. | 1600-1650. |
| Activity code | Symbolic single letter. | Identifies a specific activity as listed in Table 2. | A-S (no “N” value) |
| Timestamp  | Integer. | Linux time | >= 1-1-1970 00\:00\:00.0+00\:00 |
| x | Numeric: real.  | Sensor value for x axis. | May be positive or negative. |
| y | Numeric: real.  | Sensor value for y axis. | May be positive or negative. |
| z | Numeric: real.  | Sensor value for z axis. | May be positive or negative. |

In [3]:
# process data for ease of access - only run once
            
def concat_subject_data_by_device_type(reimport=False):
    """ Construct files containing all subjects and activities for each device and sensor type. """
    
    try:
        # This will build a dictionary for the files
        # This can take awhile as the files are large
        data_filepaths = list(processed_dirpath.glob('wisdm_*.csv'))
        
        if not reimport and len(list(data_filepaths)) == 4:
            return {f.stem.replace('wisdm_', ''): pd.read_csv(f, parse_dates=['timestamp']) for f in data_filepaths}
    except FileNotFoundError:
        pass
    
    import zipfile

    df_dict = {}
    # from data documentation
    headers = ['subject_id', 'activity_code', 'timestamp', 'x','y','z']

    for f in input_dirpath.glob('**/*.zip'):
        unzip_dirpath = f.parent / f.stem
        if not unzip_dirpath.exists():
            with zipfile.ZipFile(str(f),'r') as fd:
                fd.extractall(str(f.parent))

    for f in input_dirpath.glob('raw/**/*.txt'):
        if not any([name in str(f) for name in ['watch', 'phone']]):
            continue

        split_path = f.parts
        device_type, sensor_type  = split_path[-3:-1]
        data_src = '{}_{}'.format(device_type, sensor_type)

        filename = output_dirpath / 'wisdm_{}.csv'.format(data_src)
        if filename.exists():
            continue

        new_df = pd.read_csv(f, names=headers, parse_dates=['timestamp'], infer_datetime_format=False)
        new_df['timestamp'] = [pd.to_datetime(int(ts), utc=True) for ts in new_df['timestamp']]
        
        # remove idiosyncratic endline symbol
        new_df.iloc[:,-1] = new_df.iloc[:,-1].str.replace(';', '')
        
        if dfs.get(data_src) is None:
            df_dict[data_src] = new_df
        else:
            df_dict[data_src] = dfs[data_src].append(new_df)

    for data_src, df in df_dict.items():
        df.to_csv(output_dirpath / 'wisdm_{}.csv'.format(data_src), index=False)

    return df_dict
    
dfs = concat_subject_data_by_device_type(reimport=False)

In [4]:
display(dfs.get('phone_accel').head())
display(dfs.get('phone_accel').tail())

Unnamed: 0,subject_id,activity_code,timestamp,x,y,z
0,1600,A,1970-01-03 22:03:27.666810782+00:00,-0.364761,8.793503,1.055084
1,1600,A,1970-01-03 22:03:27.717164786+00:00,-0.87973,9.768784,1.016998
2,1600,A,1970-01-03 22:03:27.767518790+00:00,2.001495,11.10907,2.619156
3,1600,A,1970-01-03 22:03:27.817872794+00:00,0.450623,12.651642,0.184555
4,1600,A,1970-01-03 22:03:27.868226798+00:00,-2.164352,13.928436,-4.422485


Unnamed: 0,subject_id,activity_code,timestamp,x,y,z
4804398,1650,S,1970-01-05 03:22:14.078421+00:00,-8.234077,-4.27511,-1.439407
4804399,1650,S,1970-01-05 03:22:14.098619+00:00,-8.080796,-4.418811,-1.415457
4804400,1650,S,1970-01-05 03:22:14.118715+00:00,-7.994576,-4.457131,-1.441802
4804401,1650,S,1970-01-05 03:22:14.138868+00:00,-8.040081,-4.366121,-1.496887
4804402,1650,S,1970-01-05 03:22:14.158964+00:00,-8.164622,-4.179309,-1.530418


In [5]:
display(dfs.get('phone_accel').dtypes)
display(dfs.get('watch_accel').dtypes)

subject_id                     int64
activity_code                 object
timestamp        datetime64[ns, UTC]
x                            float64
y                            float64
z                            float64
dtype: object

subject_id                     int64
activity_code                 object
timestamp        datetime64[ns, UTC]
x                            float64
y                            float64
z                            float64
dtype: object

In [6]:
df = dfs.get('phone_accel').groupby('activity_code')
np.median(df.get_group('A')[['x', 'y', 'z']], axis=0)

array([ 0.5843848 , -3.444957  , -0.18293762])

From page 2 of the documentation:
>For the accelerometer sensor, the units are
$m/s^2$, ... Note that the force of gravity on Earth,
which affects the accelerometer readings, is $9.8m/s^2$.

Note: *y* axis is Vertical, not *z*. Why is this using an image processing system?

| Axis | Direction |
|:-:|:-:|
|x|Forward/Backward|
|y|Up/Down|
|z|Left/Right|

In [7]:
df = dfs.get('phone_accel').groupby(['subject_id', 'activity_code'])
df_sub = df.get_group((1600, 'A'))
display(df_sub['y'].head())
display(df_sub['y'].mean())

0     8.793503
1     9.768784
2    11.109070
3    12.651642
4    13.928436
Name: y, dtype: float64

9.551098193802462

## Shift To Origin / Mean Functions
Used to set the first index or the mean as the origin, substracting it from subsequent values.
This helps remove artifacts which bias the data when comparing sets

In [8]:
def shift_to_origin(df_):
    """  Adjust values using initial index as `origin`  (otherwise inital bias takes over during cumulative summation). """
    return df_.assign(**{col: df_[col] - df_[col].iloc[0] for col in ['x', 'y', 'z']})

def shift_to_mean(df_):
    """ Adjust values using mean as `origin` (otherwise inital bias takes over during cumulative summation). """
    return df_.assign(**{col: df_[col] - df_[col].mean() for col in ['x', 'y', 'z']})

In [9]:
df = dfs.get('phone_accel').groupby(['subject_id', 'activity_code'])
df_sub = df.get_group((1600, 'A'))

for name, fn in [('shift_to_origin', shift_to_origin), ('shift_to_mean', shift_to_mean)]:
    df_sub_adj = fn(df_sub)
    # display(df_sub_adj.nlargest(5, columns='y').append(df_sub_adj.nsmallest(5, columns='y')))
    print('\n>>>\n>>> ' + name.replace('_', ' ').capitalize() + ' <<<\n>>>')
    display(df_sub_adj.sort_values(by='y'))
    print('\n>   Mean Values after Tare')
    display(pd.DataFrame(np.round(df_sub_adj.mean(), decimals=6)).T)

# df_sub_adj = tare_to_mean(df_sub)
# # display(df_sub_adj.nlargest(5, columns='y').append(df_sub_adj.nsmallest(5, columns='y')))
# display(df_sub_adj.sort_values(by='y'))
# display(pd.DataFrame(np.round(df_sub_adj.mean(), decimals=5)))


>>>
>>> Shift to origin <<<
>>>


Unnamed: 0,subject_id,activity_code,timestamp,x,y,z
689,1600,A,1970-01-03 22:04:02.360736882+00:00,-2.073883,-7.831177,-0.571075
1207,1600,A,1970-01-03 22:04:28.444107868+00:00,-0.152618,-7.586121,-1.677689
1553,1600,A,1970-01-03 22:04:45.866598547+00:00,-3.946213,-7.571045,-1.735626
287,1600,A,1970-01-03 22:03:42.118420721+00:00,-1.213669,-7.570648,-0.835968
408,1600,A,1970-01-03 22:03:48.211246563+00:00,-0.835831,-7.429840,-1.566391
...,...,...,...,...,...,...
2890,1600,A,1970-01-03 22:05:53.189886795+00:00,-3.262238,10.316971,1.221466
971,1600,A,1970-01-03 22:04:16.560547824+00:00,-5.989212,10.449936,6.581345
1638,1600,A,1970-01-03 22:04:50.146667980+00:00,-1.816086,10.619064,3.497986
2912,1600,A,1970-01-03 22:05:54.297674881+00:00,0.173492,10.664429,-1.734695



>   Mean Values after Tare


Unnamed: 0,subject_id,x,y,z
0,1600.0,-1.493528,0.757595,-0.63885



>>>
>>> Shift to mean <<<
>>>


Unnamed: 0,subject_id,activity_code,timestamp,x,y,z
689,1600,A,1970-01-03 22:04:02.360736882+00:00,-0.580355,-8.588772,0.067775
1207,1600,A,1970-01-03 22:04:28.444107868+00:00,1.340909,-8.343716,-1.038838
1553,1600,A,1970-01-03 22:04:45.866598547+00:00,-2.452685,-8.328640,-1.096776
287,1600,A,1970-01-03 22:03:42.118420721+00:00,0.279859,-8.328244,-0.197118
408,1600,A,1970-01-03 22:03:48.211246563+00:00,0.657697,-8.187435,-0.927541
...,...,...,...,...,...,...
2890,1600,A,1970-01-03 22:05:53.189886795+00:00,-1.768710,9.559376,1.860317
971,1600,A,1970-01-03 22:04:16.560547824+00:00,-4.495684,9.692341,7.220195
1638,1600,A,1970-01-03 22:04:50.146667980+00:00,-0.322558,9.861469,4.136836
2912,1600,A,1970-01-03 22:05:54.297674881+00:00,1.667020,9.906834,-1.095845



>   Mean Values after Tare


Unnamed: 0,subject_id,x,y,z
0,1600.0,-0.0,0.0,0.0


### Note
$y$ should be nearly $0$ with gravity taken into account. `Tare To Mean` works well.

## Coordinate Summation
If acceleration data is treaded as *instantaneous* acceleration ($\bf{a}$), taking the cumulative sum as a *knockoff* version of an integral over $\bf{a}$ gives us a velocity ($\bf{v}$).

This is probably better stated, informally speaking, as a *pseudo*-velocity. 

**NOTE**: Technically, this should be divided by the frame rate (~25fps). For exploratory purposes, this probably isn't necessary to get the gist of the space.

In [10]:
def coord_summation(df_, combine=False):
    """ For cumulatively sum coordinate vectors across time"""
    return df_.assign(**{'{}{}'.format(col, '_sum' if combine else ''): vals.cumsum(axis=0) for col, vals in df_[['x', 'y', 'z']].items()})

In [11]:
df_sub_adj = shift_to_mean(df_sub)
df_sub_sum = coord_summation(df_sub_adj)
df_sub_samp = df_sub_adj.iloc[:1000]
df_sub_sum = df_sub_sum.iloc[:1000]

# display({col: df_sub_sum[['x', 'y', 'z']].cumsum(axis=0) for col, vals in df_sub_sum[['x', 'y', 'z']].items()})

In [12]:
mins = df_sub_sum[['x','y','z']].aggregate('min')
maxs = df_sub_sum[['x','y','z']].aggregate('max')

mins.name = 'minimums'
maxs.name = 'maximums'

display(mins)
display(maxs)

total_min = mins.min()
total_max = maxs.max()

abs_minmax = np.max(np.abs([total_min, total_max]))

print()
print('Total Minimum: {:.04f}'.format(total_min))
print('Total Maximum: {:.04f}'.format(total_max))
print('Absolute MinMax: {:.04f}'.format(abs_minmax))

x     1.493528
y   -32.880882
z   -20.887485
Name: minimums, dtype: float64

x    420.993507
y     28.071034
z     76.090718
Name: maximums, dtype: float64


Total Minimum: -32.8809
Total Maximum: 420.9935
Absolute MinMax: 420.9935


In [13]:
def get_abs_minmax(df_):
    total_min = df_[['x','y','z']].aggregate('min').min()
    total_max = df_[['x','y','z']].aggregate('max').max()
    return np.max(np.abs([total_min, total_max]))

# Acceleration Analysis

> in $m/s^2$


In [14]:
release_plt_mem()

df_sub_sum = coord_summation(df_sub_adj)
df_sub_samp = df_sub_adj.iloc[:1000]
df_sub_sum = df_sub_sum.iloc[:1000]

abs_minmax = get_abs_minmax(df_sub_sum) + 100

""" Start Plot """
fig, axs = plt.subplots(3,1,sharex=True)
fig.suptitle('Phone Accelerometer for Subject #1600 - Walking')
ax_vert, ax_lng, ax_lat = axs

ax_vert.plot(df_sub_samp['timestamp'], df_sub_sum['y'])
ax_lng.plot(df_sub_samp['timestamp'], df_sub_sum['x'])
ax_lat.plot(df_sub_samp['timestamp'], df_sub_sum['z'])

for ax in axs:
    ax.set_ylim(-(abs_minmax), abs_minmax)

ax_vert.set_ylabel('Y (Vertical Axis)')
ax_lng.set_ylabel('X (Longitudnal Axis)')
ax_lat.set_ylabel('Z (lateral Axis)')

ax_vert.grid()
ax_lng.grid()
ax_lat.grid()

ax_lat.set_xlabel('Time')

fig.canvas.layout.width = '1600px'
fig.canvas.layout.height = '1200px'

plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [15]:
df = dfs.get('phone_accel')
df = df[(df['subject_id'] == 1600)]

def get_abs_subject_max(df_):
    return np.max([get_abs_minmax(coord_summation(shift_to_mean(df_[(df_['activity_code'] == key)]))[['x', 'y', 'z']]) for key, activity in activity_key.items()])

get_abs_subject_max(df)

1037.0013180442156

In [16]:
from mpl_toolkits.mplot3d import Axes3D

def plot_activity_accel_over_time(df_, subject_id=1600, release_mem=False):
    if release_mem:
        release_plt_mem()
    df_subject = df_[(df_['subject_id']==subject_id)]

    fig_ = plt.figure()
    ax = fig_.add_subplot(111, projection='3d')

    subject_abs_max = get_abs_subject_max(df_subject)

    for key, activity in activity_key.items():
        df_subject_activity = shift_to_mean(df_subject[(df_subject['activity_code']==key)][['x', 'y', 'z']])
        df_subject_sum = coord_summation(df_subject_activity)

        ax.plot(df_subject_sum['x'], df_subject_sum['z'], df_subject_sum['y'], label=activity)

        ax.set_xlabel('Longitudnal')
        ax.set_ylabel('Lateral')
        ax.set_zlabel('Vertical')

        ax.set_xlim(-subject_abs_max, subject_abs_max)
        ax.set_ylim(-subject_abs_max, subject_abs_max)
        ax.set_zlim(-subject_abs_max, subject_abs_max)

    fig_.legend(title='Activity')
    fig_.suptitle('Subject #1600\'s Summed Acceleration per Activity in 3D-Space Over Time.')
    
    return fig_

In [17]:
fig = plot_activity_accel_over_time(dfs.get('phone_accel'), 1600)
fig.canvas.layout.width = '1200px'
fig.canvas.layout.height = '1200px'

fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [18]:
fig = plot_activity_accel_over_time(dfs.get('watch_accel'), subject_id=1620)

fig.canvas.layout.width = '1200px'
fig.canvas.layout.height = '1200px'

fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Gyroscopic Analysis

>(for) the gyroscope sensor, the units are $\theta/s$

Not sure about this wrt this data:

| Axis | Envelope |
|:-:|:-:|
|x|roll|
|y|pitch|
|z|yaw|

**NOTE**: It is unclear if these data run into gimble-lock situations. Given the activity domain, it is doubtful this would be an issue.

In [19]:
df_watch_gyro = dfs.get('watch_gyro')
df_phone_gyro = dfs.get('phone_gyro')

subject = 1650

df_watch_gyro = df_watch_gyro[df_watch_gyro['subject_id'] == subject]
df_phone_gyro = df_phone_gyro[df_phone_gyro['subject_id'] == subject]

max_frames = 200

# display(df_watch_gyro.head())
release_plt_mem()
# https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.subplots.html?highlight=subplots#matplotlib.pyplot.subplots
fig, axs = plt.subplots(1,2, subplot_kw=dict(polar=True))

ax_watch, ax_phone = axs

#
# Watch
#

# Activity D: Sitting
df = shift_to_mean(df_watch_gyro[df_watch_gyro['activity_code']=='D'].iloc[:max_frames])
ax_watch.plot(df.index, df['x'], label=activity_key.get('D'))

# Activity Q: Writing
df = shift_to_mean(df_watch_gyro[df_watch_gyro['activity_code']=='Q'].iloc[:max_frames])
ax_watch.plot(df.index, df['x'], label=activity_key.get('Q'))

#
# Phone
#

# Activity D: Sitting
df = shift_to_mean(df_phone_gyro[df_phone_gyro['activity_code']=='D'].iloc[:max_frames])
ax_phone.plot(df.index, df['x'], label=activity_key.get('D'))

# Activity Q: Writing
df = shift_to_mean(df_phone_gyro[df_phone_gyro['activity_code']=='Q'].iloc[:max_frames])
ax_phone.plot(df.index, df['x'], label=activity_key.get('Q'))


for ax in axs:
    ax.set_rlim(ax.get_rmin(), 8)
    ax.set_xlabel('Gyroscope Rotation (in radians)')
    ax.text(np.radians(ax.get_rlabel_position()+10), ax.get_rmax() - 2, 'Time', rotation=ax.get_rlabel_position() + 5, ha='center',va='center')

fig.legend(title='Activity')    
fig.canvas.layout.width = '1200px'
fig.canvas.layout.height = '1200px'

fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [20]:
df_watch_gyro = dfs.get('watch_gyro')
df_phone_gyro = dfs.get('phone_gyro')

subject = 1650

df_watch_gyro = df_watch_gyro[df_watch_gyro['subject_id'] == subject]
df_phone_gyro = df_phone_gyro[df_phone_gyro['subject_id'] == subject]


# display(df_watch_gyro.head())
release_plt_mem()
# https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.subplots.html?highlight=subplots#matplotlib.pyplot.subplots
fig, axs = plt.subplots(2,1)

ax_watch, ax_phone = axs

#
# Watch
#

# Watch Activity A: Walking
df = coord_summation(shift_to_mean(df_watch_gyro[df_watch_gyro['activity_code']=='A'].reset_index(drop=True)))
ax_watch.plot(df['x'].index, df['x'], label='{}'.format(activity_key.get('A')))

# Watch Activity D: Sitting
df = coord_summation(shift_to_mean(df_watch_gyro[df_watch_gyro['activity_code']=='D'].reset_index(drop=True)))
ax_watch.plot(df['x'].index, df['x'], label='{}'.format(activity_key.get('D')))

# watch Activity Q: Writing
df = coord_summation(shift_to_mean(df_watch_gyro[df_watch_gyro['activity_code']=='Q'].reset_index(drop=True)))
ax_watch.plot(df['x'].index, df['x'], label='{}'.format(activity_key.get('Q')))
ax_watch.set_title('Watch Gyroscope')

#
# Phone
#

# Phone Activity A: Walking
df = coord_summation(shift_to_mean(df_phone_gyro[df_phone_gyro['activity_code']=='A'].reset_index(drop=True)))
ax_phone.plot(df['x'].index, df['x'], label='{}'.format(activity_key.get('A')))

# Phone Activity D: Sitting
df = coord_summation(shift_to_mean(df_phone_gyro[df_phone_gyro['activity_code']=='D'].reset_index(drop=True)))
ax_phone.plot(df['x'].index, df['x'], label='{}'.format(activity_key.get('D')))

# watch Activity Q: Writing
df = coord_summation(shift_to_mean(df_phone_gyro[df_phone_gyro['activity_code']=='Q'].reset_index(drop=True)))
ax_phone.plot(df['x'].index, df['x'], label='{}'.format(activity_key.get('Q')))
ax_phone.set_title('Phone Gyroscope')

for ax in axs:
    ax.grid()
    ax.set_xlabel('Time')
    ax.set_ylabel('Cumulative Radians per Second')
    ax.legend()

   
fig.canvas.layout.width = '1200px'
fig.canvas.layout.height = '1200px'

fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [21]:
df_watch_accel = dfs.get('watch_accel')
df_phone_accel = dfs.get('phone_accel')

# display(len(set(df_watch_accel['activity_code'])))
# display((set(df_phone_accel['subject_id'])))

# subject = 1650
# activity_code = 'A'

# df_watch_accel = coord_summation(shift_to_mean(df_watch_accel[(df_watch_accel['subject_id'] == subject) & (df_watch_accel['activity_code'] == activity_code)]), True).reset_index(drop=True)
# df_phone_accel = coord_summation(shift_to_mean(df_phone_accel[(df_phone_accel['subject_id'] == subject) & (df_phone_accel['activity_code'] == activity_code)]), True).reset_index(drop=True)

# display(df_watch_accel)
# display(df_phone_accel)

# fig, axs = plt.subplots(1,1)
# ax1 = axs

# ax1.plot(df_watch_accel['y_sum'])
# ax1.plot(df_phone_accel['y_sum'])

# fig.show()

### Gyroscope + Accelerometer
To get the actual heading of the device, the coordinates need to be rotated. A rotation matrix might be of use (taken from [Wolfram-Alpha - Rotation Matrix](http://mathworld.wolfram.com/RotationMatrix.html))
<br/><br/>
Rotation around the x-axis, where $\alpha = \text{x_gyro}$:
$$
R_x(\alpha) = 
\begin{bmatrix}
    1 & 0 & 0 \\
    0 & cos\;\alpha & -sin\;\alpha \\
    0 & sin\;\alpha & cos\;\alpha \\
\end{bmatrix}
\\[2em]
$$
Rotation around the y-axis, where $\beta = \text{y_gyro}$:
$$
R_y(\beta) = \begin{bmatrix}
cos\;\beta & 0 & sin\;\beta \\
0 & 1 & 0 \\
-sin\;\beta & 0 & cos\;\beta \\
\end{bmatrix}
\\[2em]
$$
Rotation around the z-axis, where $\gamma = \text{z_gyro}$
$$
R_z(\gamma) = \begin{bmatrix}
cos\;\gamma & -sin\;\gamma & 0 \\
sin\;\gamma & cos\;\gamma & 0 \\
0 & 0 & 1 \\
\end{bmatrix}
$$

Note: Rotations are done in $R_{zyx}$ order

$$
\begin{bmatrix}
cos\;\gamma & -sin\;\gamma & 0 \\
sin\;\gamma & cos\;\gamma & 0 \\
0 & 0 & 1 \\
\end{bmatrix}
\begin{bmatrix}
cos\;\beta & 0 & sin\;\beta \\
0 & 1 & 0 \\
-sin\;\beta & 0 & cos\;\beta \\
\end{bmatrix}
\begin{bmatrix}
    1 & 0 & 0 \\
    0 & cos\;\alpha & -sin\;\alpha \\
    0 & sin\;\alpha & cos\;\alpha \\
\end{bmatrix}
$$


In [22]:
def series_3d_rotation(_ser, rot_labels=['x_accel', 'y_accel', 'z_accel']):
    coords = _ser[['x_accel', 'y_accel', 'z_accel']].astype(np.float64)
    rots = _ser[['x_gyro', 'y_gyro', 'z_gyro']].astype(np.float64)
    rots.index = ['x', 'y', 'z']
    cos = rots.apply(np.cos)
    cos.name = 'cosine'
    sin = rots.apply(np.sin)
    sin.name = 'sine'
    ones = 1.
    zeros = 0.

    x_rot = np.matrix([
        [ones , zeros,  zeros], 
        [zeros, cos.x, -sin.x],
        [zeros, sin.x,  cos.x]
    ])
    
    y_rot = np.matrix([
        [ cos.y, zeros, sin.y], 
        [ zeros, ones , zeros],
        [-sin.y, zeros, cos.y]
    ])
    
    z_rot = np.matrix([
        [ cos.z, -sin.z, zeros], 
        [ sin.z,  cos.z, zeros],
        [ zeros,  zeros,  ones]
    ])
    
    rot_coords = np.round(np.dot(np.dot(z_rot, np.dot(y_rot, x_rot)), coords), decimals=10)[0]
    new_coords = pd.Series(rot_coords, index=rot_labels)
    
    return new_coords


This is a $90^\circ$ rotation clockwise around the y-axis.
$$
R_{zyx}(0 \; 90^\circ \; 0)
    \begin{bmatrix}1 \\ 0 \\ 0\end{bmatrix} = 
    \begin{bmatrix}0 \\ 0 \\ -1\end{bmatrix}
$$

$$
R_{zyx}(0 \; 90^\circ \; 0)\begin{bmatrix}1 \\ 0 \\ 0\end{bmatrix} =
\begin{bmatrix}
0 & -1 & 0 \\
1 & 0 & 0 \\
0 & 0 & 1
\end{bmatrix} 
\begin{bmatrix}
0 & 0 & 1 \\
0 & 1 & 0 \\
-1 & 0 & 0
\end{bmatrix}
\begin{bmatrix}
1 & 0 & 0 \\
0 & 0 & -1 \\
0 & 1 & 0
\end{bmatrix}
\begin{bmatrix}1 \\ 0 \\ 0\end{bmatrix}
= \begin{bmatrix}0 \\ 0 \\ -1\end{bmatrix}
$$

In [23]:
test_ser = pd.Series({'x_accel': 1, 'y_accel': 0, 'z_accel': 0, 'x_gyro': 0, 'y_gyro': np.pi/2, 'z_gyro': 0})

print('Original')
display(test_ser.to_frame().loc[['x_accel', 'y_accel', 'z_accel']])

test_rot = series_3d_rotation(test_ser).to_frame()
print('Rotated clockwise by pi/2 around the y-axis.')
display(test_rot)

Original


Unnamed: 0,0
x_accel,1.0
y_accel,0.0
z_accel,0.0


Rotated clockwise by pi/2 around the y-axis.


Unnamed: 0,0
x_accel,0.0
y_accel,0.0
z_accel,-1.0


In [24]:
test_df = pd.DataFrame({'x_accel': 1, 'y_accel': 0, 'z_accel': 0, 'x_gyro': 0, 'y_gyro': np.pi/2, 'z_gyro': 0}, index=[0])
rot_test_df = test_df.apply(series_3d_rotation, axis=1)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot([0, test_df.x_accel.iloc[0]], [0, test_df.y_accel.iloc[0]], [0, test_df.z_accel.iloc[0]], label='Original Vector')
ax.plot([0, rot_test_df.x_accel.iloc[0]], [0, rot_test_df.y_accel.iloc[0]], [0, rot_test_df.z_accel.iloc[0]], label='Rotated Vector')
fig.legend()
fig.canvas.layout.width = '600px'
fig.canvas.layout.height = '600px'

fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [25]:
watch_df = pd.read_csv('../data/processed/wisdm/merged_sensors/watch/1600_watch.csv').drop(columns='timestamp')
watch_df.head()

Unnamed: 0,subject_id,activity_code,x_accel,y_accel,z_accel,x_gyro,y_gyro,z_gyro
0,1600,A,4.972757,-0.158317,6.696732,0.314944,-1.022277,-0.309962
1,1600,A,3.25372,-0.191835,6.107758,0.387382,-0.618541,-0.048972
2,1600,A,2.801216,-0.155922,5.997625,0.070999,-0.20948,-0.195978
3,1600,A,3.770868,-1.051354,7.731027,0.037975,0.254976,-0.156563
4,1600,A,4.661511,0.169689,9.684695,0.073129,0.719431,-0.001035


In [26]:
def shift_to_mean(df_, columns=['x_accel', 'y_accel', 'z_accel']):
    """ Adjust values using mean as `origin` (otherwise inital bias takes over during cumulative summation). """
    return df_.assign(**{col: df_[col] - df_[col].mean() for col in columns})

### Original Activity Data

In [27]:
act_grp_a = watch_df.groupby('activity_code').get_group('A')
act_grp_a.head()[['x_accel', 'y_accel', 'z_accel']]

Unnamed: 0,x_accel,y_accel,z_accel
0,4.972757,-0.158317,6.696732
1,3.25372,-0.191835,6.107758
2,2.801216,-0.155922,5.997625
3,3.770868,-1.051354,7.731027
4,4.661511,0.169689,9.684695


### Aggregate Sum of Original Activity Data

In [28]:
sum_act_grp_a = act_grp_a[['x_accel', 'y_accel', 'z_accel']].cumsum(axis=0)
sum_act_grp_a.head()

Unnamed: 0,x_accel,y_accel,z_accel
0,4.972757,-0.158317,6.696732
1,8.226477,-0.350152,12.80449
2,11.027693,-0.506074,18.802115
3,14.798561,-1.557428,26.533142
4,19.460072,-1.387739,36.217837


### Aggregate Sum of Rotated Original Activity Data

In [29]:
sum_rot_act_grp_a = act_grp_a.apply(series_3d_rotation, axis=1).cumsum(axis=0)
sum_act_grp_a.head()

Unnamed: 0,x_accel,y_accel,z_accel
0,4.972757,-0.158317,6.696732
1,8.226477,-0.350152,12.80449
2,11.027693,-0.506074,18.802115
3,14.798561,-1.557428,26.533142
4,19.460072,-1.387739,36.217837


### Aggregate Sum of Standardized Original Activity Data

In [30]:
sum_std_act_grp_a = shift_to_mean(act_grp_a)[['x_accel', 'y_accel', 'z_accel']].cumsum(axis=0)
sum_std_act_grp_a.head()

Unnamed: 0,x_accel,y_accel,z_accel
0,-6.631561,1.072198,6.820006
1,-14.982159,2.110877,13.051038
2,-23.785261,3.185468,19.171938
3,-31.618711,3.364629,27.026239
4,-38.561518,4.764832,36.834208


## Standardize, Rotate, Sum
1. Standardize
2. Rotate
3. Aggregate Sums

### Standardized Original Activity Data

In [31]:
std_act_grp_a = shift_to_mean(act_grp_a)
std_act_grp_a.head()[['x_accel', 'y_accel', 'z_accel']]

Unnamed: 0,x_accel,y_accel,z_accel
0,-6.631561,1.072198,6.820006
1,-8.350598,1.038679,6.231032
2,-8.803102,1.074592,6.120899
3,-7.83345,0.17916,7.854301
4,-6.942807,1.400203,9.807969


### Standardized, then Rotated, Activity Data

In [32]:
rot_std_act_grp_a = std_act_grp_a.apply(series_3d_rotation, axis=1)
rot_std_act_grp_a.head()

Unnamed: 0,x_accel,y_accel,z_accel
0,-9.16597,1.787897,-2.104306
1,-10.431995,-0.882546,0.178019
2,-9.582544,2.552508,4.215958
3,-5.548978,0.755294,9.577237
4,1.291623,0.678516,12.00961


### Aggregate Sum of Rotated Standardized Data

In [33]:
sum_rot_std_act_grp_a = rot_std_act_grp_a.cumsum(axis=0)
sum_rot_std_act_grp_a.head()

Unnamed: 0,x_accel,y_accel,z_accel
0,-9.16597,1.787897,-2.104306
1,-19.597965,0.905352,-1.926287
2,-29.18051,3.45786,2.289671
3,-34.729488,4.213154,11.866908
4,-33.437865,4.89167,23.876518


## Rotate, Standardize, Sum
1. Rotate
2. Standardize
3. Aggregate Sums

### Rotated Original Activity Data

In [34]:
rot_act_grp_a = act_grp_a.apply(series_3d_rotation, axis=1)
rot_act_grp_a.head()

Unnamed: 0,x_accel,y_accel,z_accel
0,-3.343765,-1.265307,7.53776
1,-0.707172,-2.453246,6.435036
2,1.356395,-0.861614,6.423429
3,5.309493,-2.198889,6.485965
4,9.878644,-0.54859,4.202764


### Rotated, then Standardized, Activity Data

In [35]:
std_rot_act_grp_a = shift_to_mean(rot_act_grp_a)
std_rot_act_grp_a.head()

Unnamed: 0,x_accel,y_accel,z_accel
0,-3.231211,-0.767956,6.553893
1,-0.594619,-1.955895,5.451169
2,1.468949,-0.364263,5.439562
3,5.422046,-1.701538,5.502097
4,9.991198,-0.051239,3.218897


### Aggregate Sum of Standardized Rotated Data

In [36]:
sum_std_rot_act_grp_a = std_rot_act_grp_a.cumsum(axis=0)
sum_std_rot_act_grp_a.head()

Unnamed: 0,x_accel,y_accel,z_accel
0,-3.231211,-0.767956,6.553893
1,-3.82583,-2.723852,12.005062
2,-2.356881,-3.088114,17.444624
3,3.065165,-4.789653,22.946721
4,13.056363,-4.840892,26.165618


In [37]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot(sum_act_grp_a.x_accel, sum_act_grp_a.y_accel, sum_act_grp_a.z_accel, label='Original')
ax.plot(sum_std_act_grp_a.x_accel, sum_std_act_grp_a.y_accel, sum_std_act_grp_a.z_accel, label='Standardized Original')
ax.plot(sum_rot_act_grp_a.x_accel, sum_rot_act_grp_a.y_accel, sum_rot_act_grp_a.z_accel, label='Rotated Original')
ax.plot(sum_std_rot_act_grp_a.x_accel, sum_std_rot_act_grp_a.y_accel, sum_std_rot_act_grp_a.z_accel, label='Standardized Rotated Data')
ax.plot(sum_rot_std_act_grp_a.x_accel, sum_rot_std_act_grp_a.y_accel, sum_rot_std_act_grp_a.z_accel, label='Rotated Standardized Data')

ax.set_xlim(-20000, 20000)
ax.set_ylim(-20000, 20000)
ax.set_zlim(-20000, 20000)

fig.legend()
fig.canvas.layout.width = '1200px'
fig.canvas.layout.height = '1200px'

fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [38]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot([], [], [])
ax.plot(sum_std_act_grp_a.x_accel, sum_std_act_grp_a.y_accel, sum_std_act_grp_a.z_accel, label='Standardized Original')
ax.plot([], [], [])
ax.plot(sum_std_rot_act_grp_a.x_accel, sum_std_rot_act_grp_a.y_accel, sum_std_rot_act_grp_a.z_accel, label='Standardized Rotated Data')
ax.plot(sum_rot_std_act_grp_a.x_accel, sum_rot_std_act_grp_a.y_accel, sum_rot_std_act_grp_a.z_accel, label='Rotated Standardized Data')

fig.legend()
fig.canvas.layout.width = '1200px'
fig.canvas.layout.height = '1200px'

fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [39]:

fig, axs = plt.subplots(3,1)

axs[0].plot(sum_act_grp_a.x_accel, label='Original Data')
axs[0].plot(sum_std_act_grp_a.x_accel, label='Standardized Data')
axs[0].plot(sum_rot_act_grp_a.x_accel, label='Rotated Data')
axs[0].plot(sum_std_rot_act_grp_a.x_accel, label='Standardized Rotated Data')
axs[0].plot(sum_rot_std_act_grp_a.x_accel, label='Rotated Standardized Data')
axs[0].set_ylabel('X Axis')
axs[0].set_ylim(-1000, 1000)

axs[1].plot(sum_act_grp_a.y_accel)
axs[1].plot(sum_std_act_grp_a.y_accel)
axs[1].plot(sum_rot_act_grp_a.y_accel)
axs[1].plot(sum_std_rot_act_grp_a.y_accel)
axs[1].plot(sum_rot_std_act_grp_a.y_accel)
axs[1].set_ylabel('Y Axis')
axs[1].set_ylim(-1000, 1000)

axs[2].plot(sum_act_grp_a.z_accel)
axs[2].plot(sum_std_act_grp_a.z_accel)
axs[2].plot(sum_rot_act_grp_a.y_accel)
axs[2].plot(sum_std_rot_act_grp_a.z_accel)
axs[2].plot(sum_rot_std_act_grp_a.z_accel)
axs[2].set_ylabel('Z Axis')
axs[2].set_xlabel('Timestep')
axs[2].set_ylim(-1000, 1000)



fig.canvas.layout.width = '1200px'
fig.canvas.layout.height = '1200px'

fig.legend()
fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

... and here it abruptly ends.