<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#1.-MVC-project-description" data-toc-modified-id="1.-MVC-project-description-1">1. MVC project description</a></span></li><li><span><a href="#2.-Setup" data-toc-modified-id="2.-Setup-2">2. Setup</a></span></li><li><span><a href="#3.-Get-the-data" data-toc-modified-id="3.-Get-the-data-3">3. Get the data</a></span><ul class="toc-item"><li><span><a href="#3.1.-From-matlab-to-dict" data-toc-modified-id="3.1.-From-matlab-to-dict-3.1">3.1. From matlab to dict</a></span></li><li><span><a href="#3.2.-From-dict-to-pandas" data-toc-modified-id="3.2.-From-dict-to-pandas-3.2">3.2. From dict to pandas</a></span></li></ul></li><li><span><a href="#4.-Save-the-data" data-toc-modified-id="4.-Save-the-data-4">4. Save the data</a></span></li></ul></div>

# 1. MVC project description

**Links**
- [github repo](https://github.com/romainmartinez/mvc)
- [plotly figures](https://plot.ly/organize/romainmartinez:114)

**Author**: _Romain Martinez._

# 2. Setup

In [1]:
# Common imports
import scipy.io as sio
import pandas as pd
import numpy as np
import json

# Path
from pathlib import Path
PROJECT_PATH = Path('./')
DATA_PATH = PROJECT_PATH / 'data'

# 3. Get the data

## 3.1. From matlab to dict

In [2]:
def load_data(data_path, data_format, normalize=False, verbose=False):
    mat = {}
    data = {key: [] for key in ('datasets', 'participants', 'muscles', 'tests', 'mvc')}
    count = -1
    dataset_names = []
    
    for idataset, ifile in enumerate(data_path.iterdir()):
        if ifile.parts[-1].endswith(f'{data_format}.mat'):
            dataset = ifile.parts[-1].replace('_only_max.mat', '').replace('MVE_Data_', '')
            
            if dataset not in dataset_names:
                dataset_names.append(dataset)
            
            mat[dataset] = sio.loadmat(ifile)['MVE']
            n_participants = mat[dataset].shape[0]
            if verbose: print(f"project '{dataset}' ({n_participants} participants)")
            
            for iparticipant in range(mat[dataset].shape[0]):
                count += 1
                for imuscle in range(mat[dataset].shape[1]):
                    max_mvc = np.nanmax(mat[dataset][iparticipant, imuscle, :])
                    for itest in range(mat[dataset].shape[2]):
                        data['participants'].append(count)
                        data['datasets'].append(idataset)
                        data['muscles'].append(imuscle)
                        data['tests'].append(itest)
                        if normalize:
                            data['mvc'].append(mat[dataset][iparticipant, imuscle, itest] * 100 / max_mvc)
                        else:
                            data['mvc'].append(mat[dataset][iparticipant, imuscle, itest])
                            
    if verbose: print(f'\n\ttotal participants: {count}')
    return data, dataset_names

In [3]:
DATA_FORMAT = 'only_max'
data, DATASET_NAMES = load_data(
    data_path=DATA_PATH,
    data_format=DATA_FORMAT,
    normalize=False,
    verbose=True)
normalized, _ = load_data(
    data_path=DATA_PATH, data_format=DATA_FORMAT, normalize=True)

project 'Landry2016' (15 participants)
project 'Landry2015_2' (11 participants)
project 'Landry2015_1' (14 participants)
project 'Violon' (10 participants)
project 'Yoann_2015' (22 participants)
project 'Landry2013' (21 participants)
project 'Landry2012' (18 participants)
project 'Romain2017' (32 participants)
project 'Tennis' (16 participants)
project 'Patrick_2013' (16 participants)
project 'Sylvain_2015' (10 participants)

	total participants: 184




## 3.2. From dict to pandas

In [4]:
df_tidy = pd.DataFrame({
    'participant': data['participants'],
    'dataset': data['datasets'],
    'muscle': data['muscles'],
    'test': data['tests'],
    'mvc': data['mvc']
}).dropna()

print(f'dataset shape = {df_tidy.shape}')
df_tidy.head()

dataset shape = (18465, 5)


Unnamed: 0,dataset,muscle,mvc,participant,test
2,0,0,0.127825,0,2
3,0,0,0.124255,0,3
4,0,0,0.146927,0,4
5,0,0,0.041583,0,5
8,0,0,0.162206,0,8


In [5]:
df_tidy_normalized = pd.DataFrame({
    'participant': normalized['participants'],
    'dataset': normalized['datasets'],
    'muscle': normalized['muscles'],
    'test': normalized['tests'],
    'mvc': normalized['mvc']
}).dropna()

print(f'dataset shape = {df_tidy.shape}')
df_tidy_normalized.head()

dataset shape = (18465, 5)


Unnamed: 0,dataset,muscle,mvc,participant,test
2,0,0,78.80394,0,2
3,0,0,76.603025,0,3
4,0,0,90.580569,0,4
5,0,0,25.635654,0,5
8,0,0,100.0,0,8


In [6]:
df_wide = df_tidy.pivot_table(
    index=['dataset', 'participant', 'muscle'],
    columns='test',
    values='mvc',
    fill_value=np.nan).reset_index()

df_wide = df_wide.drop(['dataset', 'participant'], axis=1)
df_wide.columns = df_wide.columns.astype(str)

print(f'dataset shape = {df_wide.shape}')
df_wide.head()

dataset shape = (1721, 17)


test,muscle,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,0,,,0.127825,0.124255,0.146927,0.041583,,,0.162206,0.017711,0.014369,,,,,0.036916
1,2,,,0.179864,0.294909,0.295846,0.107769,,,0.199097,0.20215,0.022668,,,,,0.07146
2,3,,,0.078753,0.244578,0.272709,0.010146,,,0.10649,0.0106,0.007517,,,,,0.238814
3,4,,,0.150353,0.104654,0.115272,0.057845,,,0.065429,0.05293,0.009894,,,,,0.079885
4,5,,,0.172669,0.124655,0.133114,0.196436,,,0.101393,0.187997,0.051396,,,,,0.050041


# 4. Save the data

In [7]:
df_tidy.reset_index(drop=True).to_feather(DATA_PATH / 'df_tidy')
df_tidy_normalized.reset_index(drop=True).to_feather(DATA_PATH / 'df_tidy_normalized')
df_wide.reset_index(drop=True).to_feather(DATA_PATH / 'df_wide')

In [8]:
conf = {
    'DATASETS': DATASET_NAMES,
    'MUSCLES': [
        'upper trapezius', 'middle trapezius', 'lower trapezius',
        'anterior deltoid', 'middle deltoid', 'posterior deltoid',
        'pectoralis major', 'serratus anterior', 'latissimus dorsi',
        'supraspinatus', 'infraspinatus', 'subscapularis'
    ],
    'TESTS': np.arange(16).tolist()
}

with open(DATA_PATH / 'conf.json', 'w') as w:
    json.dump(conf, w, indent=4)