In [28]:
import glob
from datetime import datetime
import os

import pandas as pd
from dataprep.eda import create_report
from tqdm import tqdm

from load_mvnx import load_mvnx
from utility_functions import read_single_mvnx_to_df, read_mvnx_metadata

DATA_DIRECTORY = 'D:\\Projects\\Drag-flik-RAIS-Hackathon\\data'

In [29]:
file_names = []
for f in glob.glob(os.path.join(DATA_DIRECTORY, '**\\*.mvnx'), recursive=True):
    file_names.append(f)
print("Identified {} .mvnx files...".format(len(file_names)))

Identified 1722 .mvnx files...


In [30]:
df = pd.DataFrame()
for file_name in tqdm(file_names):
    mvnx_file = load_mvnx(os.path.join(DATA_DIRECTORY, file_name))
    if mvnx_file is None:
        print("Not possible to parse file {}".format(file_name))
        continue
    file_df = read_single_mvnx_to_df(mvnx_file, disable_print=True)
    # add metadata
    year, id, sample, gender = read_mvnx_metadata(mvnx_file, file_name)
    if 'A' in year or 'B' in year:
        year = '2021'

    file_df = pd.concat([file_df, pd.Series(year, name='year'), pd.Series(id, name='id'), pd.Series(sample, name='sample'), pd.Series(gender, name='gender')], axis=1)
    file_df[['year', 'gender', 'id', 'sample']] = file_df[['year', 'gender', 'id', 'sample']].ffill()
    df = pd.concat([df, file_df], axis=0)

 41%|████      | 701/1722 [20:30<35:40,  2.10s/it]  

Not possible to parse file D:\Projects\Drag-flik-RAIS-Hackathon\data\2019\Mvnx\Body_32\Body_32_2019 (16).mvnx


 42%|████▏     | 730/1722 [21:26<33:57,  2.05s/it]

Not possible to parse file D:\Projects\Drag-flik-RAIS-Hackathon\data\2019\Mvnx\Body_32\Body_32\Body_32_071019-017.mvnx


 46%|████▌     | 792/1722 [23:34<34:23,  2.22s/it]

Not possible to parse file D:\Projects\Drag-flik-RAIS-Hackathon\data\2019\Mvnx\Body_35\Body_35_2019 (7).mvnx


 74%|███████▍  | 1272/1722 [42:57<23:15,  3.10s/it]

Not possible to parse file D:\Projects\Drag-flik-RAIS-Hackathon\data\2021_A\Mvnx\Body_65\Body_65_290621-005.mvnx


 89%|████████▊ | 1526/1722 [56:40<10:30,  3.22s/it]

Not possible to parse file D:\Projects\Drag-flik-RAIS-Hackathon\data\2021_B\Mvnx\Body_75\Body_75_2021B (1).mvnx


 89%|████████▉ | 1535/1722 [57:05<09:21,  3.01s/it]

Not possible to parse file D:\Projects\Drag-flik-RAIS-Hackathon\data\2021_B\Mvnx\Body_75\Body_75_2021B (3).mvnx


 95%|█████████▍| 1632/1722 [1:02:16<05:54,  3.94s/it]

Not possible to parse file D:\Projects\Drag-flik-RAIS-Hackathon\data\2021_B\Mvnx\Body_83\Body_83_2021B (1).mvnx


 95%|█████████▌| 1642/1722 [1:02:43<04:01,  3.02s/it]

Not possible to parse file D:\Projects\Drag-flik-RAIS-Hackathon\data\2021_B\Mvnx\Body_83\Body_83_2021B (5).mvnx


100%|██████████| 1722/1722 [1:07:23<00:00,  2.35s/it]


In [31]:
df.head()

Unnamed: 0,Pelvis_acc_0,Pelvis_acc_1,Pelvis_acc_2,Pelvis_angular_acc_0,Pelvis_angular_acc_1,Pelvis_angular_acc_2,Pelvis_vel_0,Pelvis_vel_1,Pelvis_vel_2,Pelvis_angular_vel_0,...,LeftToe_angular_vel_0,LeftToe_angular_vel_1,LeftToe_angular_vel_2,LeftToe_ori_0,LeftToe_ori_1,LeftToe_ori_2,LeftToe_ori_3,LeftToe_pos_0,LeftToe_pos_1,LeftToe_pos_2
0,0.065029,0.06093,-0.063273,0.848845,-0.554506,-1.050589,-0.069628,0.014435,0.004669,0.060686,...,,,,,,,,,,
1,0.124831,-0.110555,-0.033831,-0.365366,-0.883694,-0.479441,-0.068602,0.014635,0.004517,0.059164,...,,,,,,,,,,
2,0.149079,-0.071584,-0.092055,0.063539,-0.969149,-0.945796,-0.067396,0.014898,0.004128,0.059429,...,,,,,,,,,,
3,0.071367,-0.329402,-0.074564,-1.390745,-0.642128,-0.478393,-0.066487,0.014024,0.003817,0.053634,...,,,,,,,,,,
4,0.150359,-0.121912,-0.028015,-0.380801,-1.111007,-0.887547,-0.065267,0.013861,0.0037,0.052047,...,,,,,,,,,,


In [33]:
df = df.astype({'year': int, 'id': int, 'sample': int})

In [34]:
df.to_pickle('mvnx_data.pkl')

In [35]:
label = pd.read_csv(os.path.join("data", "ballspeed.csv"))

In [36]:
label.head()

Unnamed: 0,id,year,sample,speed
0,1,2017,1,65.0
1,1,2017,2,69.0
2,1,2017,3,70.0
3,1,2017,4,69.0
4,1,2017,5,69.0


In [37]:
# todo merge the ballspeed.csv based on id, year, and sample
merged = df.merge(label, on=['year', 'id', 'sample'], how='inner')
print("Labels shape: {} - Features Shape: {} - Inner Join Merged df Shape: {}".format(label.shape, df.shape, merged.shape))

MemoryError: Unable to allocate 14.7 MiB for an array with shape (1, 1922335) and data type float64

In [None]:
merged.to_pickle('mvnx_merged_data.pkl')

In [None]:
report = create_report(df, title='Kinetic Analysis').show_browser()