# MVC project: Get the Data
- [GitHub](https://github.com/romainmartinez/mvc)

## 0. Setup

In [1]:
# Common imports
import pandas as pd
import numpy as np
import os

# the 'mvc' directory contains functions used but not necessary to understand the story
import mvc

# Figures
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
sns.set(style='ticks')
%matplotlib inline

# to make this notebook's output stable across runs
np.random.seed(42)

# 1. Get the Data

In [2]:
DATA_PATH = '/media/romain/E/Projet_MVC/data/Final_output'
DATA_FORMAT = 'only_max'  # could be either 'only_max', '3000_points' or '100_points'

In [3]:
mat = mvc.fileio.ImportMat(DATA_PATH, DATA_FORMAT, export='dict', normalize=True)

data format: only_max
project 'Landry2012_only_max' loaded (18 participants)
project 'Landry2013_only_max' loaded (21 participants)
project 'Patrick_2013_only_max' loaded (16 participants)
project 'Violon_only_max' loaded (10 participants)
project 'Sylvain_2015_only_max' loaded (10 participants)
project 'Landry2015_2_only_max' loaded (11 participants)
project 'Tennis_only_max' loaded (16 participants)
project 'Landry2016_only_max' loaded (15 participants)
project 'Landry2015_1_only_max' loaded (14 participants)
project 'Yoann_2015_only_max' loaded (22 participants)
	sample shape: (18, 12, 16)
	total participants: 152


  max_mvc = np.nanmax(mat[dataset_name][iparticipant, imuscle, :])


In [4]:
# Variables names
DATASETS_NAMES = mat.datasets
MUSCLES_NAMES = ['upper trapezius', 'middle trapezius', 'lower trapezius', # 0, 1, 2
           'anterior deltoid', 'middle deltoid', 'posterior deltoid',      # 3, 4, 5
           'pectoralis major', 'serratus anterior', 'latissimus dorsi',    # 6, 7, 8
           'supraspinatus', 'infraspinatus', 'subscapularis']              # 9, 10, 11
COLUMNS_NAMES = ['datasets', 'participants', 'muscles', 'tests', 'mvc']

In [25]:
# dict to single dataframe
df_tidy = pd.DataFrame({
    'participant': mat.data['participants'],
    'dataset': mat.data['datasets'],
    'muscle': mat.data['muscles'],
    'test': mat.data['tests'],
    'mvc': mat.data['mvc'],
}).dropna()

df_tidy.head()

Unnamed: 0,dataset,muscle,mvc,participant,test
0,0,0,43.698789,0,0
1,0,0,62.605141,0,1
2,0,0,57.291694,0,2
3,0,0,78.379715,0,3
4,0,0,100.0,0,4


## 2. Prepare the Data for Machine Learning

In [26]:
# convert from tidy to wide dataframe
df_wide = df_tidy.pivot_table(index=['dataset', 'participant', 'muscle'], columns='test', values='mvc',
                              fill_value=0).add_prefix('test_').reset_index()
df_wide['max'] = df_wide[df_wide.columns[3:]].max(axis=1)

In [36]:
print(f'DataFrame shape: {df_wide.shape}')
df_wide.head()

DataFrame shape: (1468, 20)


test,dataset,participant,muscle,test_0,test_1,test_2,test_3,test_4,test_5,test_6,test_7,test_8,test_9,test_10,test_11,test_12,test_13,test_14,test_15,max
0,0,0,0,43.698789,62.605141,57.291694,78.379715,100.0,23.488809,13.719727,11.694191,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
1,0,0,1,8.171433,100.0,96.507701,31.44656,46.167044,34.284982,30.203942,12.087327,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
2,0,0,2,8.342758,68.267012,100.0,56.621549,63.595539,23.340463,46.716347,5.808028,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
3,0,0,3,5.142787,23.013018,39.005871,100.0,82.660796,2.900488,3.763153,11.644537,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
4,0,0,4,16.853917,85.16183,100.0,50.367926,71.086938,20.973939,5.791126,6.68506,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0


### Stratified Train Test Split

In [30]:
from sklearn.model_selection import train_test_split

In [39]:
df_clean = df_wide.drop(['dataset', 'participant'], axis=1)
train_set, test_set = train_test_split(df_wide, test_size=0.2, random_state=42,
                                       stratify=df_wide['muscle'])