In [1]:
# -*- coding: ascii -*-

from numpy.random import normal # http://docs.scipy.org/doc/numpy/reference/generated/numpy.random.normal.html
import numpy
#from scipy.stats.stats import pearsonr  # will give *normalized* correlation coefficient
numpy.random.seed(1234)
pi = numpy.pi

from time import time

#import sys
#print(sys.version)

# note, 2017-11-29:
# pour compiler la librairie cython sans warnings sur les differentes versions des librairies:
# export MACOSX_DEPLOYMENT_TARGET=10.6

# note, 2020-02-22:
# the correct ordering for data is now:
# - nx:   components (dimensionality) as first dimension
# - npts: time as second dimension
# so shape = (nx, npts)

#import entropy_ann as ann # Cython version, old
import entropy.entropy as entropy       # Cython version, new 2020-02-22
import tools
#help(tools)
#print(dir(tools))

In [2]:
def print_result(message, value_from_function):
#    time1=time()
#    value_from_function=function
#    time2=time()-time1
    print(message, "\t%2.5f" %value_from_function, end="")
    tmp=entropy.get_last_info()
    print(" +/- %2.5f (%d effective points, %d errors)" %(tmp[0], tmp[3], tmp[2]), end="")

In [3]:
def print_result_l(message, value_from_function):
#    time1=time()
#    value_from_function=function
#    time2=time()-time1
    print(message, "\t%2.5f, %2.5f" %(value_from_function[0], value_from_function[1]), end="")
    tmp=entropy.get_last_info()
    print(" +/- %2.5f (%d effective points, %d errors)" %(tmp[0], tmp[3], tmp[2]), end="")

In [4]:
# prepare and test a dataset with NaN points
npoints = 1000
nNaN    = npoints//4
ndim    = 1
sigma_x = 1.

m       = 2       # embedding
stride  = 10      # stride (tau)

x = normal(loc = 0., scale=sigma_x, size=(ndim, npoints));

print("testing on data with %d points, including %d NaN\n" %(npoints, nNaN))
time1=time(); print_result("x (full, no NaN)", entropy.compute_entropy(x, m, stride))
print(" (%2.4f s)" %(time()-time1))

print("optimized mask")
time1=time(); print_result("x (mask, no NaN)", entropy.compute_entropy(x, m, stride, mask=tools.mask_finite(x)))
print(" (%2.4f s)" %(time()-time1))

print("conservative mask")
entropy.choose_algorithm(mask=2)
time1=time(); print_result("x (mask, no NaN)", entropy.compute_entropy(x, m, stride, mask=tools.mask_finite(x)))
print(" (%2.4f s)" %(time()-time1))
entropy.choose_algorithm(mask=1) # back to default

NaN_ind = numpy.random.randint(0, npoints, nNaN)
x[0,NaN_ind] = numpy.nan

# building a mask to discard NaNs when calling the library:
mask_good = tools.mask_finite(x)   # using pure Python code
mask_g2   = entropy.mask_finite(x) # using cython code (slower ???)
ma=numpy.max(mask_good-mask_g2)
mi=numpy.min(mask_good-mask_g2)
#print("%d and %d should be 0" %(ma, mi))
#print("\n", numpy.info(mask_good),"\n")
#print("\n", numpy.info(mask_g2),"\n")

if tools.no_NaN(x): # if there are NaNs, then the following will crash the notebook
    print("x (full, no mask):\t", entropy.compute_entropy(x, m, stride), end="")
    tmp=entropy.get_last_info()
    print("\t(%d effective points, %d errors)" %(tmp[3], tmp[2]))

print("")
std = numpy.nanstd(x)
print("theoretical value:\t%2.5f" %(1./2.*numpy.log(2.*numpy.exp(1)*pi*std**2)*m*ndim))
print("")

print("optimized mask")
time1=time(); print_result("x masked, tau=%d" %(stride), entropy.compute_entropy(x, m, stride, mask=mask_good))
print(" (%2.4f s)" %(time()-time1))

print("conservative mask")
entropy.choose_algorithm(mask=2)
time1=time(); print_result("x masked, tau=%d" %(stride), entropy.compute_entropy(x, m, stride, mask=tools.mask_finite(x)))
print(" (%2.4f s)" %(time()-time1))
entropy.choose_algorithm(mask=1) # back to default

#time1=time()
#print("x masked, tau=%d:\t" %stride,   entropy.compute_entropy(x, m, stride, mask=mask_g2), end="")
#time2=time()-time1
#tmp=entropy.get_last_info()
#print("\t(%f s) (%d effective points, %d errors)" %(time2, tmp[3], tmp[2]))


testing on data with 1000 points, including 250 NaN

x (full, no NaN) 	2.69183 +/- 0.18587 (990 effective points, 0 errors) (0.0040 s)
optimized mask
x (mask, no NaN) 	2.69183 +/- 0.18587 (990 effective points, 0 errors) (0.0031 s)
conservative mask
x (mask, no NaN) 	2.69183 +/- 0.18587 (990 effective points, 0 errors) (0.0017 s)

theoretical value:	2.77687

optimized mask
x masked, tau=10 	2.75512 +/- 0.16861 (598 effective points, 0 errors) (0.0015 s)
conservative mask
x masked, tau=10 	nan +/- 0.16861 (0 effective points, 0 errors) (0.0002 s)


In [10]:
help(entropy.choose_algorithm)

Help on built-in function choose_algorithm in module entropy:

choose_algorithm(...)
    choose_algorithm([algo])
    
    Select the algorithms to use for computing all mutual informations (including partial mutual informations and TEs).
    
    algo     : Kraskov-Stogbauer-Grassberger algorithm 
               possible values: {1, 2, 1|2}) for (algo 1, algo 2, both algos)
               (default=1)
    version  : counting algorithm version 
               legacy: faster for small embedding dimensions (<=2)
               mixed ANN: faster for large emmbedding dimensions (>=4)
               possible values: (1, 2) for (legacy, mixed ANN)
               (default=1)
    mask     : mask algorithm 
               Theiler optimized: use all possible vectors 
               legacy: use only large enough contiguous blocks (quite conservative)
               possible values: (1, 2) for (optimized, legacy)
               (default=1)



In [None]:
isf = numpy.asarray(numpy.isfinite(x))
y=tools.reorder(x[isf])

std = numpy.nanstd(y)
print("theoretical value for y:%2.5f" %(1./2.*numpy.log(2.*numpy.exp(1)*pi*std**2)*m*ndim))
print_result("y (full, no mask):", entropy.compute_entropy(y, m, stride))


In [3]:
def FIR_filter(x, T_integration):
    ''' function to filter in time (low pass) by local averaging :
    '''
    stride_f = T_integration*0.8 # overlap
    stride = int(stride_f)
    
    x_f = x[0,0:-1-T_integration];
    for i in range(1,T_integration):
        x_f  = x_f + x[0,i:-1-T_integration+i];
    x_f = x_f / T_integration;
           
    return tools.reorder(x_f)


In [4]:
# prepare and test a dataset with NaN points
npoints = 20000
nNaN    = npoints//2
ndim    = 1
sigma_x = 1.

m       = 5        # embedding
stride  = 20      # stride (tau)
tau_filtre = 20

x = normal(loc = 0., scale=sigma_x, size=(ndim, npoints));
x = FIR_filter(x, tau_filtre)
npoints = npoints-tau_filtre-1

for stride in numpy.arange(1,30,2):
    a=x[:,:-stride]
    b=x[:,stride:]
    print_result("x (full, no NaN), tau=%d" %stride, entropy.compute_MI(a, b, 1, 1, stride)[0])
    print("")

x (full, no NaN), tau=1 	1.18717 +/- 0.00000 (19978 effective points, 0 errors)
x (full, no NaN), tau=3 	0.64469 +/- 0.00635 (19974 effective points, 0 errors)
x (full, no NaN), tau=5 	0.40919 +/- 0.00909 (19970 effective points, 0 errors)
x (full, no NaN), tau=7 	0.27418 +/- 0.01414 (19971 effective points, 0 errors)
x (full, no NaN), tau=9 	0.17994 +/- 0.01178 (19962 effective points, 0 errors)
x (full, no NaN), tau=11 	0.12116 +/- 0.01246 (19965 effective points, 0 errors)
x (full, no NaN), tau=13 	0.06323 +/- 0.01630 (19955 effective points, 0 errors)
x (full, no NaN), tau=15 	0.02934 +/- 0.01692 (19950 effective points, 0 errors)
x (full, no NaN), tau=17 	0.00664 +/- 0.01482 (19958 effective points, 0 errors)
x (full, no NaN), tau=19 	0.00060 +/- 0.01776 (19950 effective points, 0 errors)
x (full, no NaN), tau=21 	-0.00065 +/- 0.01180 (19950 effective points, 0 errors)
x (full, no NaN), tau=23 	-0.00148 +/- 0.01442 (19941 effective points, 0 errors)
x (full, no NaN), tau=25 	-0.00

In [5]:
# now with NaNs:    
NaN_ind = numpy.random.randint(0, npoints, nNaN)
x[0,NaN_ind] = numpy.nan

for stride in numpy.arange(1,30,2):
    a=x[:,:-stride]
    b=x[:,stride:]
    mask_a = tools.mask_finite(a)
    mask_b = tools.mask_finite(b)
#    print(mask_a.shape, mask_b.shape)
    mask = numpy.append(mask_a, mask_b, axis=0).reshape(2,-1)
    mask = tools.mask_clean(mask)
    print_result("x (mask, NaN), tau=%d" %stride, entropy.compute_MI(a, b, 1, 1, stride, mask=mask)[0])
    print("")

x (mask, NaN), tau=1 	1.17476 +/- 0.00000 (7324 effective points, 0 errors)
x (mask, NaN), tau=3 	0.64694 +/- 0.02414 (7387 effective points, 0 errors)
x (mask, NaN), tau=5 	0.42477 +/- 0.01673 (7341 effective points, 0 errors)
x (mask, NaN), tau=7 	0.27354 +/- 0.02747 (7309 effective points, 0 errors)
x (mask, NaN), tau=9 	0.18466 +/- 0.01680 (7353 effective points, 0 errors)
x (mask, NaN), tau=11 	0.11507 +/- 0.01878 (7348 effective points, 0 errors)
x (mask, NaN), tau=13 	0.06312 +/- 0.01561 (7367 effective points, 0 errors)
x (mask, NaN), tau=15 	0.02651 +/- 0.02564 (7374 effective points, 0 errors)
x (mask, NaN), tau=17 	-0.00317 +/- 0.01743 (7428 effective points, 0 errors)
x (mask, NaN), tau=19 	0.00093 +/- 0.02208 (7374 effective points, 0 errors)
x (mask, NaN), tau=21 	-0.00240 +/- 0.02976 (7401 effective points, 0 errors)
x (mask, NaN), tau=23 	-0.01225 +/- 0.02655 (7379 effective points, 0 errors)
x (mask, NaN), tau=25 	0.00535 +/- 0.02396 (7377 effective points, 0 errors)
x

In [None]:
help(entropy.compute_MI)

In [None]:
mask.shape