In [None]:
# packages

# standard
import numpy as np
import pandas as pd
import time

# plots
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

In [None]:
# load data
t1 = time.time()
df = pd.read_csv('../input/human-activity-recognition/time_series_data_human_activities.csv')
t2 = time.time()
print('Elapsed time [s]:', np.round(t2-t1,4))

In [None]:
# structure of data
df.info()

# Basic Explorations

In [None]:
# eval activities
activity_stats = df.activity.value_counts()
activities = activity_stats.index.tolist()
activity_stats.plot(kind='bar')
plt.grid()
plt.show()
print(activity_stats)

In [None]:
# activity vs. user
pd.crosstab(df.user, df.activity)

# Look at specific user

In [None]:
# select user
sel_user = 3
df_user = df[df.user==sel_user]

# > Time Series Plots

In [None]:
# time series plots
for act in activities:
    df_temp = df_user.loc[df.activity==act].copy()
    df_temp.reset_index(inplace=True)
    
    # convert time to seconds and start with 0
    t_min = df_temp.timestamp.min()
    df_temp['time_sec'] = (df_temp['timestamp'] - t_min)/1e9
    
    fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(14,6), sharex=True)
    
    ax1.plot(df_temp['time_sec'], df_temp['x-axis'], color='red', alpha=0.5)
    ax1.set_ylim(-20,20)
    ax1.set_title('user=' + str(sel_user) + ' / ' + act + ' - x')
    ax1.grid()
    
    ax2.plot(df_temp['time_sec'], df_temp['y-axis'], color='green', alpha=0.5)
    ax2.set_ylim(-20,20)
    ax2.set_title('user=' + str(sel_user) + ' / ' + act + ' - y')
    ax2.grid()

    ax3.plot(df_temp['time_sec'], df_temp['z-axis'], color='blue', alpha=0.5)
    ax3.set_ylim(-20,20)    
    ax3.set_title('user=' + str(sel_user) + ' / ' + act + ' - z')
    ax3.grid()
    
    plt.show()

# > Distribution Plots

In [None]:
# distribution plots
n_bins = 100

for act in activities:
    df_temp = df_user.loc[df.activity==act].copy()
    df_temp.reset_index(inplace=True)
    
    fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(14,6), sharex=True)
    
    ax1.hist(df_temp['x-axis'], bins=n_bins, color='red', alpha=0.5)
    ax1.set_xlim(-20,20)
    ax1.set_title('user=' + str(sel_user) + ' / ' + act + ' - x')
    ax1.grid()
    
    ax2.hist(df_temp['y-axis'], bins=n_bins, color='green', alpha=0.5)
    ax2.set_xlim(-20,20)
    ax2.set_title('user=' + str(sel_user) + ' / ' + act + ' - y')
    ax2.grid()

    ax3.hist(df_temp['z-axis'], bins=n_bins, color='blue', alpha=0.5)
    ax3.set_xlim(-20,20)
    ax3.set_title('user=' + str(sel_user) + ' / ' + act + ' - z')
    ax3.grid()
    
    plt.show()

# > Scatter Plots

In [None]:
# pairwise scatter plots
for act in activities:
    df_temp = df_user.loc[df.activity==act].copy()
    df_temp.reset_index(inplace=True)
    
    # convert time to seconds and start with 0
    t_min = df_temp.timestamp.min()
    df_temp['time_sec'] = (df_temp['timestamp'] - t_min)/1e9
    
    print(act,':')
    sns.pairplot(data = df_temp[['x-axis','y-axis','z-axis']],
                 plot_kws = dict(s=2))
    plt.show()

# > Standard deviation of accelerations by activitiy

#### Note: We are still focussing on one selected user!

In [None]:
stats_by_act = df_user.groupby('activity')[['x-axis','y-axis','z-axis']].std()
stats_by_act

In [None]:
# and a more visual version
sns.heatmap(stats_by_act, annot=True, linewidths=1, linecolor='black')
plt.show()

# Back to all users

#### Evaluate standard deviation now by user and activity:

In [None]:
stats_full = df.groupby(['activity','user'], 
                        as_index=False)[['x-axis','y-axis','z-axis']].std()

In [None]:
# show full output
pd.set_option('display.max_rows', 200)
stats_full

#### We can now visualize each (user,activity)-pair in a 3D scatter plot:

In [None]:
# interactive 3d scatter plot
fig = px.scatter_3d(data_frame=stats_full,
                    x='x-axis', y='y-axis', z='z-axis',
                    color='activity',
                    opacity=0.25)
fig.update_layout(title='')
fig.show()

#### Finally, let's evaluate the mean (across users) of the standard deviations by activity:

In [None]:
stats_full_mean_std = stats_full.groupby('activity')[['x-axis','y-axis','z-axis']].mean()
stats_full_mean_std

In [None]:
# and again a more visual version
sns.heatmap(stats_full_mean_std,
            annot=True, linewidths=1, linecolor='black')
plt.show()