# 📊 EMG2Pose Dataset - Exploratory Data Analysis (EDA)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import h5py
import os
import numpy as np

## 📁 Load Metadata

In [None]:
metadata = pd.read_csv('../data/metadata.csv')
metadata.head()

## 🔍 Data Split Distribution

In [None]:
sns.countplot(x='split', data=metadata)
plt.title("Data Split Distribution")
plt.xlabel("Split")
plt.ylabel("Number of Samples")
plt.show()

## 👥 Distribution of Samples per User

In [None]:
metadata['user'].value_counts().plot(kind='hist', bins=20, title='Samples per User')
plt.xlabel("Sample count")
plt.show()

## ⚙️ Generalization Strategy Distribution

In [None]:
sns.countplot(x='generalization', data=metadata)
plt.title("Generalization Strategy")
plt.xticks(rotation=45)
plt.show()

## 🖐 Moving Hand by Hand Side

In [None]:
sns.countplot(x='moving_hand', hue='side', data=metadata)
plt.title("Moving Hand by Side")
plt.show()

## 🔬 Load Sample EMG and Pose Data

In [None]:
sample_row = metadata.iloc[0]
sample_file = f"../data/{sample_row['user']}_{sample_row['session']}_{sample_row['stage']}.h5"

with h5py.File(sample_file, 'r') as f:
    emg = f['emg'][:]
    pose = f['pose'][:]
    print("EMG shape:", emg.shape)
    print("Pose shape:", pose.shape)

## 📈 Visualize EMG Channels

In [None]:
plt.figure(figsize=(12, 6))
for i in range(min(emg.shape[1], 8)):
    plt.plot(emg[:200, i], label=f'EMG channel {i}')
plt.title("First 200 EMG Samples")
plt.xlabel("Timestep")
plt.ylabel("Signal")
plt.legend()
plt.show()

## 🤖 Visualize Pose Channels

In [None]:
plt.figure(figsize=(12, 6))
for i in range(min(pose.shape[1], 5)):
    plt.plot(pose[:200, i], label=f'Pose joint {i}')
plt.title("First 200 Pose Samples")
plt.xlabel("Timestep")
plt.ylabel("Angle")
plt.legend()
plt.show()