Human Activity Monitoring Analysis using Permutation Entropy
Complete analysis code

In [None]:
# --- 1. Import Libraries ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import os

In [None]:
# For 3D plots
from mpl_toolkits.mplot3d import Axes3D

In [None]:
# Set plotting style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_context("talk")

In [None]:
# Handle warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# --- 2. Load the Dataset ---
# Define the path to the dataset
data_path = "/Users/rosalinatorres/corrected_permutation_entropy_complexity.csv"

In [None]:
# Load the dataset
df = pd.read_csv(data_path)

In [None]:
# Display basic information
print(f"Dataset shape: {df.shape}")
print(f"\nColumns: {df.columns.tolist()}")
print(f"\nSample data:")
print(df.head())

In [None]:
# --- 3. Explore Dataset Properties ---
# Display dataset summary
print(f"Number of subjects: {df['Subject'].nunique()}")
print(f"Activities: {sorted(df['Activity'].unique())}")
print(f"Axes: {sorted(df['Axis'].unique())}")
print(f"Dimensions: {sorted(df['Dimension'].unique())}")
print(f"Delays: {sorted(df['Delay'].unique())}")
print(f"Signal lengths: {sorted(df['Signal length'].unique())}")

In [None]:
# Count samples per activity
print("\nSamples per activity:")
print(df['Activity'].value_counts())

In [None]:
# Check for missing values
print("\nMissing values per column:")
print(df.isnull().sum())

In [None]:
# --- 4. Data Distribution Analysis ---
# Distribution of Permutation Entropy values
plt.figure(figsize=(12, 6))
sns.histplot(df['Permutation entropy'], kde=True)
plt.title('Distribution of Permutation Entropy Values')
plt.xlabel('Permutation Entropy')
plt.ylabel('Frequency')
plt.grid(True, alpha=0.3)
plt.savefig('pe_distribution.png')
plt.close()

In [None]:
# Distribution of Complexity values
plt.figure(figsize=(12, 6))
sns.histplot(df['Complexity'], kde=True)
plt.title('Distribution of Complexity Values')
plt.xlabel('Complexity')
plt.ylabel('Frequency')
plt.grid(True, alpha=0.3)
plt.savefig('complexity_distribution.png')
plt.close()

In [None]:
# --- 5. Analysis by Parameters (Dimension and Delay) ---
# Group by dimension and delay and calculate mean PE and Complexity
heatmap_data = df.groupby(['Dimension', 'Delay'])[['Permutation entropy', 'Complexity']].mean().reset_index()
heatmap_pe = heatmap_data.pivot(index='Dimension', columns='Delay', values='Permutation entropy')
heatmap_complexity = heatmap_data.pivot(index='Dimension', columns='Delay', values='Complexity')

In [None]:
# Create PE heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(heatmap_pe, annot=True, cmap='viridis', fmt='.4f')
plt.title('Mean Permutation Entropy by Dimension and Delay')
plt.savefig('pe_heatmap.png')
plt.close()

In [None]:
# Create Complexity heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(heatmap_complexity, annot=True, cmap='viridis', fmt='.4f')
plt.title('Mean Complexity by Dimension and Delay')
plt.savefig('complexity_heatmap.png')
plt.close()

In [None]:
# --- 6. Analysis by Activity (Basic Parameters: Dimension=3, Delay=1) ---
# Filter for dimension=3, delay=1
filtered_df = df[(df['Dimension'] == 3) & (df['Delay'] == 1)]

In [None]:
# Group by Activity and calculate statistics
activity_stats = filtered_df.groupby('Activity').agg({
    'Permutation entropy': ['mean', 'std'],
    'Complexity': ['mean', 'std']
}).reset_index()

In [None]:
print("\nActivity statistics with basic parameters (dimension=3, delay=1):")
print(activity_stats)

In [None]:
# Create activity comparison plot
plt.figure(figsize=(14, 7))
activities = filtered_df['Activity'].unique()
x = np.arange(len(activities))
width = 0.35

In [None]:
pe_means = [filtered_df[filtered_df['Activity'] == act]['Permutation entropy'].mean() for act in activities]
pe_std = [filtered_df[filtered_df['Activity'] == act]['Permutation entropy'].std() for act in activities]

In [None]:
complexity_means = [filtered_df[filtered_df['Activity'] == act]['Complexity'].mean() for act in activities]
complexity_std = [filtered_df[filtered_df['Activity'] == act]['Complexity'].std() for act in activities]

In [None]:
plt.bar(x - width/2, pe_means, width, label='Permutation Entropy', yerr=pe_std, capsize=5, color='skyblue')
plt.bar(x + width/2, complexity_means, width, label='Complexity', yerr=complexity_std, capsize=5, color='lightcoral')

In [None]:
plt.xlabel('Activity')
plt.ylabel('Value')
plt.title('Permutation Entropy and Complexity by Activity (Dimension=3, Delay=1)')
plt.xticks(x, activities)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('activity_comparison_basic.png')
plt.close()

In [None]:
# --- 7. Activity Analysis by Axis ---
# Group by Axis and Activity to calculate statistics
axis_stats = filtered_df.groupby(['Activity', 'Axis']).agg({
    'Permutation entropy': ['mean', 'std'],
    'Complexity': ['mean', 'std']
}).reset_index()

In [None]:
print("\nActivity and axis statistics:")
print(axis_stats)

In [None]:
# Create subplot for each activity
activities = filtered_df['Activity'].unique()
axes_labels = filtered_df['Axis'].unique()

In [None]:
fig, axs = plt.subplots(1, len(activities), figsize=(20, 6), sharey=True)
fig.suptitle('Permutation Entropy by Axis for Each Activity (Dimension=3, Delay=1)', fontsize=16)

In [None]:
for i, activity in enumerate(activities):
    activity_data = filtered_df[filtered_df['Activity'] == activity]
    
    # Group by axis
    axis_means = [activity_data[activity_data['Axis'] == axis]['Permutation entropy'].mean() for axis in axes_labels]
    axis_std = [activity_data[activity_data['Axis'] == axis]['Permutation entropy'].std() for axis in axes_labels]
    
    axs[i].bar(axes_labels, axis_means, yerr=axis_std, capsize=5, color='lightgreen')
    axs[i].set_title(activity)
    axs[i].set_xlabel('Axis')
    if i == 0:
        axs[i].set_ylabel('Permutation Entropy')
    axs[i].grid(True, alpha=0.3)

In [None]:
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig('axis_comparison.png')
plt.close()

In [None]:
# --- 8. Statistical Tests ---
# ANOVA for Permutation Entropy
activities = filtered_df['Activity'].unique()
f_stat_pe, p_value_pe = stats.f_oneway(
    *[filtered_df[filtered_df['Activity'] == act]['Permutation entropy'].values for act in activities]
)
print(f"\nANOVA for Permutation Entropy: F={f_stat_pe:.4f}, p={p_value_pe:.4f}")

In [None]:
# ANOVA for Complexity
f_stat_complexity, p_value_complexity = stats.f_oneway(
    *[filtered_df[filtered_df['Activity'] == act]['Complexity'].values for act in activities]
)
print(f"ANOVA for Complexity: F={f_stat_complexity:.4f}, p={p_value_complexity:.4f}")

In [None]:
# Test with different parameters
print("\nFinding optimal parameters for discrimination:")
dimensions = sorted(df['Dimension'].unique())
delays = sorted(df['Delay'].unique())

In [None]:
results = []
for dim in dimensions:
    for delay in delays:
        param_df = df[(df['Dimension'] == dim) & (df['Delay'] == delay)]
        if len(param_df) > 0:
            try:
                f_stat, p_value = stats.f_oneway(
                    *[param_df[param_df['Activity'] == act]['Permutation entropy'].values for act in activities]
                )
                results.append({'Dimension': dim, 'Delay': delay, 'F_statistic': f_stat, 'p_value': p_value})
            except:
                continue

In [None]:
results_df = pd.DataFrame(results)
print("\nTop 5 parameter combinations by F-statistic:")
print(results_df.sort_values('F_statistic', ascending=False).head(5))

In [None]:
# --- 9. Activity Comparison with Optimal Parameters ---
# Use the optimal parameters found above
best_dim = results_df.loc[results_df['F_statistic'].idxmax(), 'Dimension']
best_delay = results_df.loc[results_df['F_statistic'].idxmax(), 'Delay']

In [None]:
print(f"\nOptimal parameters: Dimension={best_dim}, Delay={best_delay}")

In [None]:
# Filter for optimal parameters
optimal_df = df[(df['Dimension'] == best_dim) & (df['Delay'] == best_delay)]

In [None]:
# Group by Activity and calculate statistics
optimal_stats = optimal_df.groupby('Activity').agg({
    'Permutation entropy': ['mean', 'std'],
    'Complexity': ['mean', 'std']
}).reset_index()

In [None]:
print("\nActivity statistics with optimal parameters:")
print(optimal_stats)

In [None]:
# Create PE vs Complexity scatter plot
plt.figure(figsize=(12, 10))

In [None]:
# Colors for different activities
colors = {'walking': 'blue', 'running': 'red', 'climbingup': 'green', 'climbingdown': 'purple'}
markers = {'walking': 'o', 'running': 's', 'climbingup': '^', 'climbingdown': 'd'}

In [None]:
for activity in optimal_df['Activity'].unique():
    activity_data = optimal_df[optimal_df['Activity'] == activity]
    
    plt.scatter(
        activity_data['Permutation entropy'],
        activity_data['Complexity'],
        label=activity,
        alpha=0.7,
        s=70,
        c=colors[activity],
        marker=markers[activity]
    )
    
    # Add centroids
    centroid = (activity_data['Permutation entropy'].mean(), activity_data['Complexity'].mean())
    plt.scatter(
        centroid[0],
        centroid[1],
        s=200,
        c=colors[activity],
        marker='*',
        edgecolor='black',
        linewidth=1.5,
        alpha=1.0
    )

In [None]:
plt.xlabel('Permutation Entropy')
plt.ylabel('Statistical Complexity')
plt.title(f'PE vs Complexity with Optimal Parameters (Dimension={best_dim}, Delay={best_delay})')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig('optimal_parameters_comparison.png')
plt.close()

In [None]:
# --- 10. Activity Separation across Parameter Space ---
# Calculate F-statistic for each parameter combination
param_results = []

In [None]:
for dim in dimensions:
    for delay in delays:
        param_df = df[(df['Dimension'] == dim) & (df['Delay'] == delay)]
        if len(param_df) > 0:
            try:
                f_stat, p_value = stats.f_oneway(
                    *[param_df[param_df['Activity'] == act]['Permutation entropy'].values for act in activities]
                )
                param_results.append({
                    'Dimension': dim, 
                    'Delay': delay, 
                    'F_statistic': f_stat,
                    'p_value': p_value,
                    'Is_significant': p_value < 0.05
                })
            except:
                continue

In [None]:
param_df = pd.DataFrame(param_results)

In [None]:
# Visualization of parameter space
plt.figure(figsize=(10, 8))
for i, row in param_df.iterrows():
    color = 'green' if row['Is_significant'] else 'red'
    plt.scatter(row['Dimension'], row['Delay'], s=row['F_statistic']*20, color=color, alpha=0.7)
    plt.text(row['Dimension'], row['Delay'], f"{row['F_statistic']:.2f}", ha='center', va='center')

In [None]:
plt.xlabel('Dimension')
plt.ylabel('Delay')
plt.title('F-Statistic for Activity Discrimination by Parameter Combination')
plt.xticks(dimensions)
plt.yticks(delays)
plt.grid(True)
plt.savefig('parameter_space_analysis.png')
plt.close()

In [None]:
# --- 11. Analyzing Activity Patterns by Subject ---
# Use optimal parameters
subject_df = df[(df['Dimension'] == best_dim) & (df['Delay'] == best_delay)]

In [None]:
# Create boxplot of PE by activity and subject
plt.figure(figsize=(16, 10))
sns.boxplot(x='Activity', y='Permutation entropy', hue='Subject', data=subject_df)
plt.title(f'Permutation Entropy by Activity and Subject (Dimension={best_dim}, Delay={best_delay})')
plt.legend(title='Subject', loc='upper right', bbox_to_anchor=(1.15, 1))
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('subject_analysis.png')
plt.close()

In [None]:
# --- 12. 3D Visualization of Parameter Space ---
# Create 3D visualization of activity patterns in parameter space
fig = plt.figure(figsize=(14, 12))
ax = fig.add_subplot(111, projection='3d')

In [None]:
# Get mean values for each combination
mean_data = df.groupby(['Dimension', 'Delay', 'Activity']).agg({
    'Permutation entropy': 'mean',
    'Complexity': 'mean'
}).reset_index()

In [None]:
# Create color map for activities
activities = mean_data['Activity'].unique()
colors = ['b', 'r', 'g', 'purple']
activity_colors = dict(zip(activities, colors))

In [None]:
# Plot each activity
for activity in activities:
    activity_data = mean_data[mean_data['Activity'] == activity]
    
    ax.scatter(
        activity_data['Dimension'],
        activity_data['Delay'],
        activity_data['Permutation entropy'],
        label=activity,
        alpha=0.7,
        s=100,
        c=activity_colors[activity]
    )

In [None]:
ax.set_xlabel('Dimension')
ax.set_ylabel('Delay')
ax.set_zlabel('Permutation Entropy')
ax.set_title('Permutation Entropy in Parameter Space by Activity')
ax.legend()

In [None]:
plt.savefig('3d_parameter_space.png')
plt.close()

In [None]:
# --- 13. Create Summary Report ---
# Create a directory for results if it doesn't exist
os.makedirs('results', exist_ok=True)

In [None]:
# Write a summary report
with open('results/activity_analysis_summary.txt', 'w') as f:
    f.write("=== Human Activity Analysis using Permutation Entropy ===\n\n")
    
    f.write("Dataset Summary:\n")
    f.write(f"Total data points: {len(df)}\n")
    f.write(f"Subjects: {df['Subject'].nunique()}\n")
    f.write(f"Activities: {', '.join(sorted(df['Activity'].unique()))}\n")
    f.write(f"Axes: {', '.join(sorted(df['Axis'].unique()))}\n")
    f.write(f"Dimensions: {', '.join(map(str, sorted(df['Dimension'].unique())))}\n")
    f.write(f"Delays: {', '.join(map(str, sorted(df['Delay'].unique())))}\n")
    f.write(f"Signal lengths: {', '.join(map(str, sorted(df['Signal length'].unique())))}\n\n")
    
    f.write("Key Findings:\n")
    
    # Optimal parameters
    f.write(f"1. Optimal parameters for distinguishing activities:\n")
    f.write(f"   Dimension: {best_dim}, Delay: {best_delay}\n")
    best_f = results_df.loc[results_df['F_statistic'].idxmax(), 'F_statistic']
    best_p = results_df.loc[results_df['F_statistic'].idxmax(), 'p_value']
    f.write(f"   (F-statistic: {best_f:.4f}, p-value: {best_p:.4f})\n\n")
    
    # Activity differences
    f.write(f"2. Activity discrimination with basic parameters (dim=3, delay=1):\n")
    f.write(f"   ANOVA for PE: F={f_stat_pe:.4f}, p={p_value_pe:.4f}\n")
    f.write(f"   ANOVA for Complexity: F={f_stat_complexity:.4f}, p={p_value_complexity:.4f}\n\n")
    
    # Best axis
    axis_f_stats = {}
    for axis in df['Axis'].unique():
        axis_data = filtered_df[filtered_df['Axis'] == axis]
        try:
            f_stat, _ = stats.f_oneway(
                *[axis_data[axis_data['Activity'] == act]['Permutation entropy'].values for act in activities]
            )
            axis_f_stats[axis] = f_stat
        except:
            axis_f_stats[axis] = 0
    
    best_axis = max(axis_f_stats, key=axis_f_stats.get)
    
    f.write(f"3. Best axis for activity discrimination: {best_axis}\n")
    f.write(f"   (F-statistic: {axis_f_stats[best_axis]:.4f})\n\n")
    
    # Activity characteristics
    f.write("4. Activity characteristics with optimal parameters:\n")
    for activity in activities:
        pe_mean = optimal_df[optimal_df['Activity'] == activity]['Permutation entropy'].mean()
        complexity_mean = optimal_df[optimal_df['Activity'] == activity]['Complexity'].mean()
        f.write(f"   {activity}: PE={pe_mean:.4f}, Complexity={complexity_mean:.4f}\n")
    
    f.write("\n5. Conclusions:\n")
    f.write("   - Standard parameters (dimension=3, delay=1) show minimal differentiation between activities\n")
    f.write("   - Higher dimensions provide better discrimination capability\n")
    f.write("   - Different activities show distinctive patterns along specific axes\n")
    f.write("   - The permutation entropy approach can effectively characterize human activities\n")
    f.write("     when optimized parameters are used\n")

In [None]:
print("\nAnalysis complete! Results saved to disk.")
print("Key visualizations and summary report have been generated.")