<a href="https://colab.research.google.com/github/regifukuchi/UFABC_UofC_datasets/blob/main/notebooks/BMC_RIC_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Three-dimensional kinematics dataset of running - interlaboratory comparison
## files conversion

> Reginaldo K Fukuchi, Marcos Duarte and Reed Ferber

## Setup

In [1]:
#@title ### import libraries {display-mode: "form"}

import sys, os, platform, glob, datetime
from pathlib import Path

# import/install libraries
def import_or_install(mds):
    """mds: list (or list of lists) of modules, e.g. [['pandas', 'pd'], 'ipympl']
    ['pandas', 'pd'] will import pandas as pd
    """
    import sys, importlib, subprocess
    for m in mds:
        if isinstance(m, str) or len(m)==1: m = [m, m]
        try:
            globals()[m[1]] = importlib.import_module(m[0])  # need globals inside function
        except ImportError:
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--upgrade', m[0]])  # silent/reliable
            globals()[m[1]] = importlib.import_module(m[0])

import_or_install([['numpy', 'np'], ['pandas', 'pd'], ['tqdm.notebook', 'tqdmn']])

%matplotlib widget
if os.getenv("COLAB_RELEASE_TAG"):  # or "google.colab" in sys.modules
    from google.colab import output
    output.enable_custom_widget_manager()

print(f'Python {sys.version} on {platform.platform()}')
print(datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"), sep='\n')
print('\n'.join(f' {m.__name__}=={m.__version__}' for m in globals().values() if getattr(m, '__version__', None)))

Python 3.11.4 | packaged by conda-forge | (main, Jun 10 2023, 18:08:17) [GCC 12.2.0] on Linux-6.2.0-24-generic-x86_64-with-glibc2.37
11/07/2023 14:47:35
 platform==1.0.8
 numpy==1.25.1
 pandas==2.0.3
 seaborn==0.12.2
 pingouin==0.5.3
 spm1d==0.4.19 (2023-06-22)
 ipympl==0.9.3


### Dataset location

In [3]:
#@title #### Path {display-mode: "form"}
# LOCAL
path2 = Path('../data')
os_sep = os.sep
metadata_fname = 'metadata.csv'
if not os.path.isfile(path2 / metadata_fname):
    path2 = Path('data')
    if not os.path.isfile(path2 / metadata_fname):
        path2 = Path('.')
        if not os.path.isfile(path2 / metadata_fname):
            print('Dataset not found locally. It will be loaded from the GitHub repo.')
            # REMOTE
            path2 = 'https://raw.githubusercontent.com/BMClab/datasets/master/BMC_RIC/data'
            os_sep = '/'

path2 = str(path2)
print(f'Dataset location: {path2}')

Dataset location: ../data


## Load metadata

In [5]:
#@title #### Metadata {display-mode: "form"}
metadata = pd.read_csv(f'{path2}{os_sep}{metadata_fname}', index_col=0)
display(metadata)

Unnamed: 0,ID,Filename,Speed,Age,Height,Mass,Group
0,BMC01,BMC01angs.txt,2.5,22,181.0,62.0,BMC
1,BMC02,BMC02angs.txt,2.5,38,183.0,80.0,BMC
2,BMC03,BMC03angs.txt,2.5,34,177.0,65.4,BMC
3,BMC04,BMC04angs.txt,2.5,33,168.0,57.05,BMC
4,BMC05,BMC05angs.txt,2.5,32,169.0,60.0,BMC
5,BMC06,BMC06angs.txt,2.5,39,177.8,78.3,BMC
6,BMC07,BMC07angs.txt,2.5,31,182.9,66.7,BMC
7,BMC08,BMC08angs.txt,2.5,36,180.0,69.0,BMC
8,BMC09,BMC09angs.txt,2.5,51,173.2,63.15,BMC
9,BMC10,BMC10angs.txt,2.5,30,179.5,75.0,BMC


In [18]:
#@title #### Coordinate system conventions {display-mode: "form"}
labs   = metadata['ID'].str[:3].unique()
joints  = ['Hip', 'Knee', 'Ankle']
axes    = ['Z', 'X', 'Y']
planes  = ['Sagittal', 'Frontal', 'Transverse']
angles  = [f'{joint}_{axis}' for joint in joints for axis in axes]
angles2 = [f'{joint} {plane}' for joint in joints for plane in planes]
print(f'Joints: {joints}')
print('Planes (axes):', [f'{plane} ({axis})' for plane, axis in zip(planes, axes)])
print(f'Columns in data: {angles}')
# angle convention
pos_angs = [['EXT', 'ABD', 'ER'], ['FLX', 'ABD', 'ER'], ['PF', 'EVE', 'ABD']]
neg_angs = [['FLX', 'ADD', 'IR'], ['EXT', 'ADD', 'IR'], ['DF', 'INV', 'ADD']]

Joints: ['Hip', 'Knee', 'Ankle']
Planes (axes): ['Sagittal (Z)', 'Frontal (X)', 'Transverse (Y)']
Columns in data: ['Hip_Z', 'Hip_X', 'Hip_Y', 'Knee_Z', 'Knee_X', 'Knee_Y', 'Ankle_Z', 'Ankle_X', 'Ankle_Y']


In [67]:
metadata2 = pd.DataFrame(columns = ['Group', 'Subject', 'Trial', 'Filename', 'Speed', 'Age', 'Height', 'Mass'])
groups = ['BMC', 'RIC']
for f, fname in enumerate(metadata['Filename']):
    print(fname[:5], end=' ')
    df = pd.read_csv(f'{path2}{os_sep}{fname}', header=0, sep='\t', index_col=0)
    ntrials = int(df.shape[1] / 9)
    for t in range(ntrials):
        if ntrials > 99:
            print(f'WARNING {ntrials}')
        g = groups.index(fname[:3])
        s = fname[3:5]
        fname2 = f'g{g+1:02d}s{s}t{t+1:02d}.txt'
        # Group Subject Trial Filename Speed Age Height Mass
        row = [f'{g+1}', f'{int(s)}', f'{t+1}', fname2]
        row.extend(metadata.loc[metadata['Filename']==fname, ['Speed', 'Age', 'Height', 'Mass']].values.tolist()[0])
        metadata2.loc[meta2.shape[0]] = row
        dft = df.iloc[:, 9*t:9*(t+1)]
        dft.columns = angles
        try:
            dft.to_csv(f'{path2}{os_sep}{fname2}', sep='\t', float_format=None, index=True)
        except:
            print('File not saved.')

metadata2.to_csv(f'{path2}{os_sep}metadata2.txt', sep='\t', float_format=None, index=True)

BMC01 BMC02 BMC03 BMC04 BMC05 BMC06 BMC07 BMC08 BMC09 BMC10 BMC11 BMC12 BMC13 BMC14 BMC15 BMC16 BMC17 BMC18 BMC19 BMC20 BMC21 BMC22 BMC23 RIC01 RIC02 RIC03 RIC04 RIC05 RIC06 RIC07 RIC08 RIC09 RIC10 RIC11 RIC12 RIC13 RIC14 RIC15 RIC16 RIC17 RIC18 RIC19 RIC20 RIC21 RIC22 RIC23 

In [100]:
#@title #### Load and process all trials {display-mode: "form"}
groups = metadata2['Group'].unique()
nsubjects = metadata2[metadata2['Group']==1]['Subject'].unique().size
ang_all = np.full([101, len(joints)*len(axes), nsubjects, len(groups)], np.nan)
ang_m   = np.full([101, len(joints)*len(axes), len(labs)], np.nan)
ang_sd  = np.full([101, len(joints)*len(axes), len(labs)], np.nan)
for g, group in enumerate(tqdmn.tqdm(groups, desc='Group')):
    subjects = metadata2[metadata2['Group']==group]['Subject'].unique()
    for s, subject in enumerate(tqdmn.tqdm(subjects, desc='Subject')):
        trials = metadata2[(metadata2['Group']==group) & (metadata2['Subject']==subject)]['Trial']
        yts = np.full([101, 9, len(trials)], np.nan)
        for t, trial in enumerate(trials):
            fname2 = f'g{group:02d}s{subject:02d}t{trial:02d}.txt'
            try:
                yts[:, :, t] = np.loadtxt(f'{path2}{os_sep}{fname2}', skiprows=1, usecols=tuple(range(1, 10)))
            except:
                print(f'File {fname2} not loaded.')
        ang_all[:, :, s, g] = np.nanmedian(yts, axis=2)
# Mean and std across subjects
ang_m  = np.nanmean(ang_all, axis=2)
ang_sd = np.nanstd(ang_all, axis=2, ddof=1)

Group:   0%|          | 0/2 [00:00<?, ?it/s]

Subject:   0%|          | 0/23 [00:00<?, ?it/s]

Subject:   0%|          | 0/23 [00:00<?, ?it/s]

In [8]:
#@title #### Reorder planes of angles {display-mode: "form"}

# for fname in metadata['Filename']:
#     print(fname[:5], end=' ')
#     df = pd.read_csv(f'{path2}{os_sep}{fname}', header=0, sep='\t', index_col=0)
#     if df.columns[0][4] == 'Z':
#         print('Planes already reordered.')
#         break
#     order = [np.array([2, 0, 1, 5, 3, 4, 8, 6, 7])+9*i for i in range(int(df.shape[1]/9))]
#     order = np.array(order).flatten()
#     df = df.iloc[:, order]
#     try:
#         df.to_csv(f'{path2}{os_sep}{fname}', sep='\t', float_format=None, index=True)
#     except:
#         print('File not saved in the cloud.')
# # rename columns
# for fname in metadata['Filename']:
#     print(fname[:5], end=' ')
#     df = pd.read_csv(f'{path2}{os_sep}{fname}', header=0, sep='\t', index_col=0)
#     if df.columns[0] == 'Hip_Z_1':
#         print('Columns already renamed.')
#         break
#     cols = [f'{angle}_{trial}' for trial in range(1, int(df.shape[1]/9)+1) for angle in angles]
#     df.columns = cols
#     try:
#         df.to_csv(f'{path2}{os_sep}{fname}', sep='\t', float_format=None, index=True)
#     except:
#         print('File not saved in the cloud.')

BMC01 Planes already reordered.
BMC01 Columns already renamed.
