In [3]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy

# Defining function

In [4]:
#CMI smooth takes in two data vectors, and creates bxb grid
def CMI_smooth_v1(x,y,b):
    
    if len(x)==len(y):
        sizeOfVector = len(x)
        
        x_r = ((x-min(x))/(max(x)-min(x)))*b #Let x' be the vector x rescaled on the interval [0,b]
        y_r = ((y-min(y))/(max(y)-min(y)))*b #Let y' be the vector y rescaled on the interval [0,b]
        
        x=x-numpy.mean(x)
        y=y-numpy.mean(y)
        
        #smooth data by aligning it with the bXb grid
        x_p = x_r
        y_p = y_r
        for i in range(0,sizeOfVector):
            x_p[i] = numpy.ceil(x_r[i])
            y_p[i] = numpy.ceil(y_r[i])
            
        #This will create function distribution which will count the number of points at each grid square
        distribution= [[0 for i in range(b+1)] for i in range(b+1)]
        for j in range (0,b+1):
            for i in range (0, b+1): #For every upper right corner   
                counter = 0
                for k in range (0, sizeOfVector): #For every point data given
                    if (x_p[k]== i) and (y_p[k] == j): #If the point of the data matches the upper right hand corner
                        counter = counter+1 #Then add 1 to the distribution counter
                distribution[i][j] = counter
                
        #Now we count the number of points on or below each square to create PXY
        lattice_count = numpy.asarray(distribution)    # Array of tile data counts
        numpy.cumsum(lattice_count,axis=1, out=lattice_count)
        numpy.cumsum(lattice_count, axis=0, out=lattice_count)
        PXY = lattice_count/float(sizeOfVector)
        
        #Calculate CE(X,Y)
        CEXY =0 
        for i in range (1, b+1):
            for j in range (1 ,b+1):
                if (PXY[i][j] != 0):
                    CEXY = CEXY - PXY[i][j]*numpy.log(PXY[i][j]) 
                    
        #Scale CE(X,Y) back to original data interval
        CEXY= CEXY*(((max(x)-min(x))/b))*(((max(y)-min(y))/b))

        #Find Cumulative distribution function in 1D
        P = [i/(b+1) for i in range(b +1)]

        #Calculate Cumulative Entropy of X and Y (Note CE(X) = CE(Y) by symmetry)
        CEXoY = 0
        for i in range (0,b):
            if(P[i] != 0):
                CEXoY = CEXoY - P[i]*numpy.log(P[i])

        #Scale back the data to original interval
        CEX = CEXoY*(((max(x)-min(x))/b))
        CEY = CEXoY*(((max(y)-min(y))/b))
        
        CMI = CEXY - (max(y) - numpy.mean(y))*CEX - (max(x) - numpy.mean(x))*CEY 
        CMI = CMI/pow(numpy.max(x)*numpy.max(y),2)     
        return CMI
        
        

# Some Samples

# Random Vectors

In [5]:
sizeOfVector = 100
x_rand =  numpy.random.rand(sizeOfVector)
y_rand = numpy.random.rand(sizeOfVector)

# Related Vectors

In [6]:
x_y = numpy.random.rand(sizeOfVector)
y_x = x_y

x_1 = numpy.random.rand(sizeOfVector)
y_1 = x_1 + 0.01*numpy.random.rand(sizeOfVector)

x_2 = numpy.random.rand(sizeOfVector)
y_2 = x_1 + 0.1*numpy.random.rand(sizeOfVector)


In [7]:
print(CMI_smooth_v1(x_rand,y_rand,10))
print(CMI_smooth_v1(x_rand,y_rand,100))
print(CMI_smooth_v1(x_rand,y_rand,1000))

-0.0863735857426
-0.0751321457499
-0.0737635282347


In [8]:
print(CMI_smooth_v1(x_y,y_x,10))
print(CMI_smooth_v1(x_y,y_x,100))
print(CMI_smooth_v1(x_y,y_x,1000))

1.09525843101
1.03599544338
1.04690057285


In [9]:
print(CMI_smooth_v1(x_1,y_1,10))
print(CMI_smooth_v1(x_1,y_1,100))
print(CMI_smooth_v1(x_1,y_1,1000))

0.982468689834
0.85882874695
0.857581081427


In [10]:
print(CMI_smooth_v1(x_2,y_2,10))
print(CMI_smooth_v1(x_2,y_2,100))
print(CMI_smooth_v1(x_2,y_2,1000))

0.256921858455
0.0498033037482
0.0537588553324
