# Creating a dataset class in MOABB

In [7]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pyriemann
from scipy.io import savemat, loadmat
import mne

To illustrate the creation of a dataset class in MOABB, we first create an example dataset saved in .mat file. It contains a single fake recording on 8 channels lasting for 150 seconds (sampling frequency 256 Hz). We've included the script that creates this dataset and have uploaded it online. It is available at the Zenodo website on the link : https://sandbox.zenodo.org/record/369543

In [8]:
def create_example_dataset():

    fsample = 256
    Tsample = 1.0/fsample
    Trecording = 150
    Ttrial = 1 # duration of a trial
    intertrial = 2 # time between finishing one trial and beginning another one
    Nchannels = 8

    x = np.zeros((Nchannels+1, Trecording * fsample)) # electrodes + stimulus
    stim = np.zeros(Trecording * fsample)
    toffset = 1.0 # offset where the trials start
    Ntrials = 40

    signal = np.sin(2 * np.pi / Ttrial * np.linspace(0, 4 * Ttrial, Ttrial * fsample))
    for n in range(Ntrials):
        label = n % 2 + 1 # alternate between class 0 and class 1
        tn = int(toffset * fsample + n * (Ttrial+intertrial) * fsample)
        stim[tn] = label
        noise = 0.1 * np.random.randn(Nchannels, len(signal))
        x[:-1, tn:(tn+Ttrial*fsample)] = label * signal + noise
    x[-1,:] = stim    
    
    return x, fsample

for subject in [1, 2, 3]:
    
    x, fs = create_example_dataset()
    filename = 'subject_' + str(subject).zfill(2) + '.mat'
    mdict = {}
    mdict['x'] = x
    mdict['fs'] = fs
    savemat(filename, mdict)

We will create now a dataset class using the fake data simulated with the code from above. For this, we first need to import the right classes from MOABB

In [9]:
from moabb.datasets.base import BaseDataset
from moabb.datasets import download as dl

- `dl` is a very useful script that downloads automatically a dataset online if it is not yet available in the user's computer. The script knows where to download the files because we create a global variable telling the URL where to fetch the data.
- `BaseDataset` is the basic class that we overload to create our dataset.

The global variable with the dataset's URL should specify an online repository where all the files are stored. 

In [10]:
ExampleDataset_URL = 'https://sandbox.zenodo.org/record/369543/files/'

In [16]:
class ExampleDataset(BaseDataset):
    
    '''
    Dataset used to exemplify the creation of a dataset class in MOABB. 
    The data samples have been simulated and has no physiological meaning whatsoever.
    '''
    
    def __init__(self):
        super().__init__(
            subjects=[1, 2, 3],
            sessions_per_subject=1,
            events={'left_hand':1, 'right_hand':2},
            code='Example dataset',
            interval=[0, 0.75],
            paradigm='imagery',
            doi='')

    def _get_single_subject_data(self, subject):
        """return data for a single subject"""

        file_path_list = self.data_path(subject)
        
        data = loadmat(file_path_list[0])
        x = data['x']
        fs = data['fs']
        ch_names = ['ch' + str(i) for i in range(8)] + ['stim'] 
        ch_types = ['eeg' for i in range(8)] + ['stim']
        info = mne.create_info(ch_names, fs, ch_types)
        raw = mne.io.RawArray(x, info)
        
        sessions = {}
        sessions['session_1'] = {}
        sessions['session_1']['run_1'] = raw

        return sessions

    def data_path(self, subject, path=None, force_update=False,
                  update_path=None, verbose=None):

        if subject not in self.subject_list:
            raise(ValueError("Invalid subject number"))

        url = '{:s}subject_0{:d}.mat'.format(ExampleDataset_URL, subject)
        path = dl.data_path(url, 'ExampleDataset')
        
        return [path] # it has to return a list    
    
dataset = ExampleDataset()    

In [20]:
from moabb.paradigms import LeftRightImagery
paradigm = LeftRightImagery()
X, labels, meta = paradigm.get_data(dataset=dataset, subjects=[1])

Creating RawArray with float64 data, n_channels=9, n_times=38400
    Range : 0 ... 38399 =      0.000 ...   149.996 secs
Ready.


In [30]:
from moabb.evaluations import WithinSessionEvaluation
from pyriemann.classification import MDM
from pyriemann.estimation import Covariances
from sklearn.pipeline import make_pipeline
evaluation = WithinSessionEvaluation(paradigm=paradigm, datasets=dataset, overwrite=True)
pipelines = {}
pipelines['MDM'] = make_pipeline(Covariances('oas'), MDM(metric='riemann'))
scores = evaluation.process(pipelines)

Creating RawArray with float64 data, n_channels=9, n_times=38400
    Range : 0 ... 38399 =      0.000 ...   149.996 secs
Ready.
Creating RawArray with float64 data, n_channels=9, n_times=38400
    Range : 0 ... 38399 =      0.000 ...   149.996 secs
Ready.
Creating RawArray with float64 data, n_channels=9, n_times=38400
    Range : 0 ... 38399 =      0.000 ...   149.996 secs
Ready.


In [31]:
scores

Unnamed: 0,score,time,samples,subject,session,channels,n_sessions,dataset,pipeline
0,1.0,0.048424,40.0,1,session_1,8,1,Example dataset,MDM
1,1.0,0.053312,40.0,2,session_1,8,1,Example dataset,MDM
2,1.0,0.056087,40.0,3,session_1,8,1,Example dataset,MDM
