In [30]:
import os
import numpy as np
import pandas as pd
from sktime.datasets import load_UCR_UEA_dataset
from sktime.datasets import tsc_dataset_names

In [31]:
tsc_dataset_names.multivariate[0:5]

['ArticularyWordRecognition',
 'AsphaltObstaclesCoordinates',
 'AsphaltPavementTypeCoordinates',
 'AsphaltRegularityCoordinates',
 'AtrialFibrillation']

In [32]:
dataset_handle = 'BasicMotions'
dataset_name = 'basic_motions'
processed_dir = './../../processed/'
output_dir = f'./../../processed/{dataset_name}/'
os.makedirs(output_dir, exist_ok=True)
full_outp_fname = os.path.join(output_dir, f'{dataset_name}.csv')
test_key_outp_fname = os.path.join(output_dir, f'{dataset_name}_test_key.csv')
train_outp_fname = os.path.join(output_dir, f'{dataset_name}_train.csv')
test_outp_fname = os.path.join(output_dir, f'{dataset_name}_test.csv')



## Load training data

In [33]:
X_train, y_train = load_UCR_UEA_dataset(name=dataset_handle, split='TRAIN')
X_test, y_test = load_UCR_UEA_dataset(name=dataset_handle, split='TEST')

In [34]:
def prepare_dataset(X, y):
    df = {}
    label_column = []
    for i, row in X.iterrows():
        label = y[i]
        for column in X.columns:
            series_values = row[column].tolist()
            if column in df:
                df[column].extend(series_values)
            else:
                df[column] = series_values
        label_column += [label] * len(series_values)

    df['label'] = label_column
    df = pd.DataFrame(df)
    df.insert(0, 'series_id', 0)
    timestep = list(range(len(df)))
    df.insert(1, 'timestep', timestep)

    return df

            


In [35]:
train_df = prepare_dataset(X_train, y_train)
test_df = prepare_dataset(X_test, y_test)
full = pd.concat([train_df, test_df])


test_key_df = test_df[['series_id', 'timestep', 'label']]
test_df.drop(columns=['label'], inplace=True)

In [36]:
train_df

Unnamed: 0,series_id,timestep,dim_0,dim_1,dim_2,dim_3,dim_4,dim_5,label
0,0,0,0.079106,0.394032,0.551444,0.351565,0.023970,0.633883,standing
1,0,1,0.079106,0.394032,0.551444,0.351565,0.023970,0.633883,standing
2,0,2,-0.903497,-3.666397,-0.282844,-0.095881,-0.319605,0.972131,standing
3,0,3,1.116125,-0.656101,0.333118,1.624657,-0.569962,1.209171,standing
4,0,4,1.638200,1.405135,0.393875,1.187864,-0.271664,1.739182,standing
...,...,...,...,...,...,...,...,...,...
3995,0,3995,1.239144,-6.142442,0.028264,-2.309144,1.472845,-0.998765,badminton
3996,0,3996,0.261434,0.205915,-0.224944,-0.524684,0.769715,0.157139,badminton
3997,0,3997,2.490353,-0.878765,-0.597296,0.111862,-0.117188,-0.050604,badminton
3998,0,3998,4.122120,0.911620,-0.465409,0.535338,0.197090,0.442120,badminton


In [37]:
full.to_csv(full_outp_fname, index=False)
test_key_df.to_csv(test_key_outp_fname, index=False)
train_df.to_csv(train_outp_fname, index=False)
test_df.to_csv(test_outp_fname, index=False)

In [39]:
y_train

array(['standing', 'standing', 'standing', 'standing', 'standing',
       'standing', 'standing', 'standing', 'standing', 'standing',
       'running', 'running', 'running', 'running', 'running', 'running',
       'running', 'running', 'running', 'running', 'walking', 'walking',
       'walking', 'walking', 'walking', 'walking', 'walking', 'walking',
       'walking', 'walking', 'badminton', 'badminton', 'badminton',
       'badminton', 'badminton', 'badminton', 'badminton', 'badminton',
       'badminton', 'badminton'], dtype='<U9')