In [9]:
import os
import numpy as np
import pandas as pd
from sktime.datasets import load_UCR_UEA_dataset
from sktime.datasets import tsc_dataset_names

In [10]:
tsc_dataset_names.multivariate[0:5]

['ArticularyWordRecognition',
 'AsphaltObstaclesCoordinates',
 'AsphaltPavementTypeCoordinates',
 'AsphaltRegularityCoordinates',
 'AtrialFibrillation']

In [11]:
dataset_handle = 'Cricket'
dataset_name = 'cricket'
processed_dir = './../../processed/'
output_dir = f'./../../processed/{dataset_name}/'
os.makedirs(output_dir, exist_ok=True)
full_outp_fname = os.path.join(output_dir, f'{dataset_name}.csv')
test_key_outp_fname = os.path.join(output_dir, f'{dataset_name}_test_key.csv')
train_outp_fname = os.path.join(output_dir, f'{dataset_name}_train.csv')
test_outp_fname = os.path.join(output_dir, f'{dataset_name}_test.csv')



## Load training data

In [12]:
X_train, y_train = load_UCR_UEA_dataset(name=dataset_handle, split='TRAIN')
X_test, y_test = load_UCR_UEA_dataset(name=dataset_handle, split='TEST')

In [13]:
def prepare_dataset(X, y):
    df = {}
    label_column = []
    for i, row in X.iterrows():
        label = y[i]
        for column in X.columns:
            series_values = row[column].tolist()
            if column in df:
                df[column].extend(series_values)
            else:
                df[column] = series_values
        label_column += [label] * len(series_values)

    df['label'] = label_column
    df = pd.DataFrame(df)
    df.insert(0, 'series_id', 0)
    timestep = list(range(len(df)))
    df.insert(1, 'timestep', timestep)

    return df

            


In [14]:
train_df = prepare_dataset(X_train, y_train)
test_df = prepare_dataset(X_test, y_test)
full = pd.concat([train_df, test_df])

test_key_df = test_df[['series_id', 'timestep', 'label']]
test_df.drop(columns=['label'], inplace=True)

In [15]:
train_df

Unnamed: 0,series_id,timestep,dim_0,dim_1,dim_2,dim_3,dim_4,dim_5,label
0,0,0,-0.97211,-0.23077,-0.46863,-0.57729,0.29993,-0.44158,1.0
1,0,1,-0.95768,-0.23077,-0.43983,-0.75975,0.31719,-0.40136,1.0
2,0,2,-0.95901,-0.23077,-0.45135,-0.80110,0.33099,-0.39331,1.0
3,0,3,-0.95901,-0.23077,-0.45135,-0.85827,0.34135,-0.39331,1.0
4,0,4,-0.95035,-0.23077,-0.48591,-0.86314,0.35170,-0.40538,1.0
...,...,...,...,...,...,...,...,...,...
129271,0,129271,-0.69274,-0.62841,-0.65053,-0.68377,0.79428,-0.77377,12.0
129272,0,129272,-0.68128,-0.62841,-0.67173,-0.68377,0.81896,-0.77377,12.0
129273,0,129273,-0.68128,-0.62841,-0.67173,-0.68377,0.84363,-0.77377,12.0
129274,0,129274,-0.66982,-0.62841,-0.69293,-0.68377,0.84363,-0.77377,12.0


In [16]:
full.to_csv(full_outp_fname, index=False)
test_key_df.to_csv(test_key_outp_fname, index=False)
train_df.to_csv(train_outp_fname, index=False)
test_df.to_csv(test_outp_fname, index=False)

In [17]:
X_train['dim_0'].iloc[0]

0      -0.97211
1      -0.95768
2      -0.95901
3      -0.95901
4      -0.95035
         ...   
1192   -0.95480
1193   -0.96345
1194   -0.96345
1195   -0.97211
1196   -0.97211
Length: 1197, dtype: float64