In [1]:
from mvpa2.suite import *
import numpy as np

  from ._conv import register_converters as _register_converters
  from pandas.core import datetools


In [11]:
nvoxels = 5000
nsamples = 20
rand_data = np.random.randn(nsamples, nvoxels)

data = np.zeros((nsamples * 2, nvoxels))
data[:nsamples] = rand_data
data[nsamples:] = rand_data

ds = Dataset(data, 
            sa={'familiarity': [0]*20 + [1] * 20,
                'identity': np.repeat(np.arange(8), 5),
                'orientation': np.tile(np.arange(5), 8)})
ds.sa['targets'] = ds.sa.familiarity

In [13]:
print(ds.summary(chunks_attr='identity'))

Dataset: 40x5000@float64, <sa: familiarity,identity,orientation,targets>
stats: mean=0.00192592 std=1.00266 var=1.00532 min=-4.51238 max=4.15349

Counts of targets in each chunk:
  identity\targets  0   1
                   --- ---
         0          5   0
         1          5   0
         2          5   0
         3          5   0
         4          0   5
         5          0   5
         6          0   5
         7          0   5

Summary for targets across identity
  targets mean std min max #identity
    0      2.5 2.5  0   5      4
    1      2.5 2.5  0   5      4

Summary for identity across targets
  identity mean std min max #targets
     0      2.5 2.5  0   5      1
     1      2.5 2.5  0   5      1
     2      2.5 2.5  0   5      1
     3      2.5 2.5  0   5      1
     4      2.5 2.5  0   5      1
     5      2.5 2.5  0   5      1
     6      2.5 2.5  0   5      1
     7      2.5 2.5  0   5      1
Sequence statistics for 40 entries from set [0, 1]
Counter-balance table f

In [14]:
clf = GNB()
cv = CrossValidation(
    clf, 
    FactorialPartitioner(NFoldPartitioner(attr='identity'), attr='familiarity'), 
    enable_ca=['stats'])

In [15]:
cv(ds)
print(cv.ca.stats)

----------.
predictions\targets   0     1
            `------  ----  ----  P'  N' FP FN PPV NPV  TPR  SPC FDR  MCC   F1   AUC
         0            20    80  100  60 80 60 0.2  0  0.25   0  0.8 -0.77 0.22  0.1
         1            60    0    60 100 60 80  0  0.2   0  0.25  1  -0.77   0  0.15
Per target:          ----  ----
         P            80    80
         N            80    80
         TP           20    0
         TN           0     20
Summary \ Means:     ----  ----  80  80 70 70 0.1 0.1 0.12 0.12 0.9 -0.77 0.11 0.12
       CHI^2         100  p=1.6e-21
        ACC          0.12
        ACC%         12.5
     # of sets        16   ACC(i) = 0.12+0*i p=1 r=0 r^2=0



Compare with completely random

In [16]:
ds_rand = ds.copy(deep=True)
ds_rand.samples = np.random.randn(*ds_rand.shape)

In [17]:
cv(ds_rand)
print(cv.ca.stats)

----------.
predictions\targets   0     1
            `------  ----  ---- P' N' FP FN  PPV  NPV  TPR  SPC  FDR  MCC  F1   AUC
         0            48    42  90 70 42 32 0.53 0.54  0.6 0.47 0.47 0.08 0.56 0.48
         1            32    38  70 90 32 42 0.54 0.53 0.47  0.6 0.46 0.08 0.51 0.59
Per target:          ----  ----
         P            80    80
         N            80    80
         TP           48    38
         TN           38    48
Summary \ Means:     ----  ---- 80 80 37 37 0.54 0.54 0.54 0.54 0.46 0.08 0.54 0.54
       CHI^2         3.4  p=0.33
        ACC          0.54
        ACC%        53.75
     # of sets        16   ACC(i) = 0.58-0.0053*i p=0.44 r=-0.21 r^2=0.04

