In [2]:
import pickle
import glob
import os
import numpy as np
import json

from scipy.io import loadmat

In [4]:
mat_files = sorted(glob.glob('./datasets/*.mat'), key=lambda x: x.lower())
datasets = {}

In [5]:
for i, mat_file in enumerate(mat_files):
    dataset = os.path.basename(mat_file).rstrip('.mat')
    print('Processing dataset: {} ({}/{})'.format(dataset, i+1, len(mat_files)))
    datasets[dataset] = {}
    
    mat = loadmat(mat_file)
    X_train, y_train, X_test, y_test = np.squeeze(mat['X_train']), np.squeeze(mat['y_train']), np.squeeze(mat['X_test']), np.squeeze(mat['y_test'])
    
    assert len(X_train) == len(y_train)
    assert len(X_test) == len(y_test)
    num_train, num_test = len(X_train), len(X_test)
    datasets[dataset]['n_train'] = num_train
    datasets[dataset]['n_test'] = num_test
    print('- n_train : {}'.format(num_train))
    print('- n_test : {}'.format(num_test))
    
    assert all(np.unique(y_train) == np.unique(y_test))
    num_classes = np.unique(y_train).size
    datasets[dataset]['n_classes'] = num_classes
    print('- n_classes : {}'.format(num_classes))
    
    len_examples = [x.shape[0] for x in X_train] + [x.shape[0] for x in X_test]
    len_min, len_max = min(len_examples), max(len_examples)
    datasets[dataset]['l_min'] = len_min
    datasets[dataset]['l_max'] = len_max
    print('- l_examples : {} - {}'.format(len_min, len_max))
    
    num_features = [x.shape[1] for x in X_train] + [x.shape[1] for x in X_test]
    assert all([x == num_features[0] for x in num_features])
    num_features = num_features[0]
    datasets[dataset]['n_features'] = num_features
    print('- n_features : {}'.format(num_features))
    
    print()

Processing dataset: ArabicDigits (1/16)
- n_train : 6600
- n_test : 2200
- n_classes : 10
- l_examples : 4 - 93
- n_features : 13

Processing dataset: AUSLAN (2/16)
- n_train : 1140
- n_test : 1425
- n_classes : 95
- l_examples : 45 - 136
- n_features : 22

Processing dataset: CharacterTrajectories (3/16)
- n_train : 300
- n_test : 2558
- n_classes : 20
- l_examples : 109 - 205
- n_features : 3

Processing dataset: CMUsubject16 (4/16)
- n_train : 29
- n_test : 29
- n_classes : 2
- l_examples : 127 - 580
- n_features : 62

Processing dataset: DigitShapes (5/16)
- n_train : 24
- n_test : 16
- n_classes : 4
- l_examples : 30 - 98
- n_features : 2

Processing dataset: ECG (6/16)
- n_train : 100
- n_test : 100
- n_classes : 2
- l_examples : 39 - 152
- n_features : 2

Processing dataset: JapaneseVowels (7/16)
- n_train : 270
- n_test : 370
- n_classes : 9
- l_examples : 7 - 29
- n_features : 12

Processing dataset: KickvsPunch (8/16)
- n_train : 16
- n_test : 10
- n_classes : 2
- l_examples 

In [6]:
with open('./datasets.json', 'w') as f:
    json.dump(datasets, f)