# Exploration of information theory principles

Import modules

In [1]:
import numpy as np

Create some dummy variables

In [2]:
a = np.zeros(100)
a[::2] = 1.0
b = np.zeros(100)
b[0] = 1.0

Function that calculates the probability of each unique value in an array

In [3]:
def p(x):
    _,counts = np.unique(x, return_counts=True)
    return counts / x.shape[0]

In [4]:
print(p(a))
print(p(b))

[0.5 0.5]
[0.99 0.01]


Function that calculates the joint probability (combinations of values) across two arrays

In [5]:
def joint_p(x,y):
    # convert vectors x and y into ordinal scale
    _,x = np.unique(x,return_inverse=True)
    _,y = np.unique(y,return_inverse=True)
    # get the maximum number of unique values in x and y
    maxn = np.max([np.max(x),np.max(y)])
    # Make a 2d matrix and get the counts of unique row combinations
    xy = np.stack([x,y],axis=1)
    joint_counts = np.histogramdd(xy, bins=maxn+1)[0].ravel()
    # Convert to probability by dividing by number of observations
    p_xy = joint_counts / np.sum(joint_counts)
    return p_xy

In [6]:
print(joint_p(a,b))

[0.5  0.   0.49 0.01]


Functions that calculate the entropy

In [7]:
def entropy(x):
    p_x = p(x)
    Hx = -1 * np.sum(p_x * np.log2(p_x))
    return Hx

def joint_entropy(x,y):
    p_xy = joint_p(x,y)
    p_xy = p_xy[p_xy>0]
    Hxy = -1 * np.sum(p_xy * np.log2(p_xy))
    return Hxy

In [8]:
print(entropy(a))
print(entropy(b))

1.0
0.08079313589591118


In [9]:
print(joint_entropy(a,b))

1.0707202712709103


Function that calculates mutual information

In [10]:
def information(x,y):
    Hx = entropy(x)
    Hy = entropy(y)
    Hxy = joint_entropy(x,y)
    I = (Hx + Hy) - Hxy
    return I

In [11]:
print(information(a,b))

0.010072864625000788


In [12]:
b = np.zeros(200)
c = np.zeros(200)
ix = np.random.choice( 200, 200, replace=True )
b[ix[:100]] = 1.0
c[ix[100:]] = 1.0
print(entropy(b))
print(entropy(c))
print(joint_entropy(b,c))
print(information(b,c))

0.9679505356936393
0.954434002924965
1.9223392868424816
4.525177612291209e-05


In [13]:
ix = np.random.choice( 200, 200, replace=True )
b[ix[:50]] = 2.0
c[ix[50:100]] = 2.0

In [14]:
print(entropy(b))
print(entropy(c))
print(joint_entropy(b,c))
print(information(b,c))

1.5173758263626684
1.5273766428746693
3.0325385195409065
0.012213949696431303


In [15]:
b = np.zeros(200)
c = np.zeros(200)
ix = np.random.choice( 200, 200, replace=True )
b[ix[:100]] = 1.0
c[ix[:100]] = 1.0
ix = np.random.choice( 200, 200, replace=True )
c[ix[:100]] = 1.0
print(entropy(b))
print(entropy(c))
print(joint_entropy(b,c))
print(information(b,c))

0.9647995485050872
0.9614969508235551
1.544169559576734
0.38212693975190826


In [18]:
a = np.arange(10)
b = np.concatenate([np.arange(5),np.array([0,1,0,1,0])])
information(a,b)

2.046439344671016