In [1]:
# Import packages
import numpy as np
import pandas as pd
from simpleinfotheory import entropy, entropyempirical, mutualinformationempirical, infocontent, conditionalmutualinformationempirical

In [2]:
# Import data
data = pd.read_csv('data/data.txt', sep=r'\s+', header=None, names=[f'V{i}' for i in range(1, 11)])
data

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10
0,5,2,3,5,1,5,5,0,3,6
1,2,4,3,5,3,2,4,2,5,4
2,2,3,1,2,2,4,3,2,5,5
3,2,4,3,2,2,3,6,0,3,5
4,2,5,1,2,3,2,3,2,5,4
...,...,...,...,...,...,...,...,...,...,...
996,5,7,1,5,5,5,1,2,2,7
997,5,5,1,5,4,5,3,0,0,7
998,5,3,3,5,2,2,1,0,0,4
999,5,7,1,5,5,5,6,2,2,6


In [3]:
# 6.a
current_max_entropy = 0
current_max_entropy_player = 'V1'
for ch in [f'V{i}' for i in range(1, 11)]:
    (result, symbols, probabilities) = entropyempirical(data[ch].to_numpy())
    print(result)
    if result > current_max_entropy:
        current_max_entropy = result
        current_max_entropy_player = ch
current_max_entropy_player, current_max_entropy

2.313837615081067
2.518484195329019
1.5845442969764774
2.3104663798288527
2.3209607039061257
2.3140452060934926
2.7993480717361705
1.5845442969764774
2.578212646650187
2.4889485627981576


('V7', np.float64(2.7993480717361705))

In [4]:
# 6.b
data_v6 = data['V6']
current_max_mi = 0
current_max_mi_player = 'V1'
for ch in [f'V{i}' for i in range(1, 11) if i != 6]:
# for ch in [f'V{i}' for i in range(1, 11)]:
    result = mutualinformationempirical(data_v6.to_numpy(), data[ch].to_numpy())
    print(result)
    if result > current_max_mi:
        current_max_mi = result
        current_max_mi_player = ch
current_max_mi_player, current_max_mi


0.012754737842673336
0.010295128738048831
0.0027889745945293143
0.0077846775201164675
0.0058186472615426865
0.023847932765335145
0.00400041241963045
0.012863990073530118
1.4899714750203668


('V10', np.float64(1.4899714750203668))

In [13]:
# 6.c
# First calculate the entropy of V2 and V9
data_v2 = data['V2']
data_v9 = data['V9']
data_v2_np = data_v2.to_numpy()
data_v9_np = data_v9.to_numpy()
symbols_v2 = np.unique(data_v2_np)
symbols_v9 = np.unique(data_v9_np)
total_num = data.shape[0]
counts_v2 = []
counts_v9 = []
for symbol in symbols_v2:
    count = 0
    for row in data_v2_np:
        if row == symbol:
            count += 1
    counts_v2.append(count)
counts_v2 = np.array(counts_v2)
probabilities_v2 = counts_v2 / total_num
for symbol in symbols_v9:
    count = 0
    for row in data_v9_np:
        if row == symbol:
            count += 1
    counts_v9.append(count)
counts_v9 = np.array(counts_v9)
probabilities_v9 = counts_v9 / total_num
H2 = entropy(probabilities_v2)
H9 = entropy(probabilities_v9)
# Then calculate the joint entropy of V2&V9
data_v2_np_cv = np.reshape(data_v2_np, (total_num, 1))
data_v9_np_cv = np.reshape(data_v9_np, (total_num, 1))
data_v29_np = np.concatenate((data_v2_np_cv, data_v9_np_cv), axis=1)
symbols, symbol_index = np.unique(data_v29_np, axis=0, return_inverse=True)
symbol_index_symbol = np.unique(symbol_index)
counts_v29 = []
for symbol in symbol_index_symbol:
    count = 0
    for row in symbol_index:
        if row == symbol:
            count += 1
    counts_v29.append(count)
counts_v29 = np.array(counts_v29)
probabilities_v29 = counts_v29 / total_num
H29 = entropy(probabilities_v29)
result = H2 + H9 - H29
print(result)
assert (result==mutualinformationempirical(data_v2_np, data_v9_np))

0.02137734154644111


In [14]:
# 6.d
# First calculate the information content of V5=1
total_num = data.shape[0]
data_v5 = data['V5']
data_v5_np = data_v5.to_numpy()
count_v5_1 = 0
for row in data_v5_np:
    if row == 1:
        count_v5_1 += 1
h_v5_1 = infocontent(count_v5_1/total_num)
# Then calculate the information content of each value in V2
data_v2 = data['V2']
data_v2_np = data_v2.to_numpy()
symbols_v2 = np.unique(data_v2_np)
counts_v2 = []
for symbol in symbols_v2:
    count = 0
    for row in data_v2_np:
        if row == symbol:
            count += 1
    counts_v2.append(count)
counts_v2 = np.array(counts_v2)
probabilities_v2 = counts_v2 / total_num
print(probabilities_v2, symbols_v2)
h_v2 = infocontent(probabilities_v2)
print(h_v2)
# Calculate the joint information content of V2=x&V5=1
data_v5_np_cv = np.reshape(data_v5_np, (total_num, 1))
data_v2_np_cv = np.reshape(data_v2_np_cv, (total_num, 1))
data_v25_np = np.concatenate((data_v2_np_cv, data_v5_np_cv), axis=1)
counts_v25 = []
for symbol in symbols_v2:
    count = 0
    for row in data_v25_np:
        if (row == [symbol, 1]).all():
            count += 1
    counts_v25.append(count)
counts_v25 = np.array(counts_v25)
probabilities_v25 = counts_v25 / total_num
h25 = infocontent(probabilities_v25)
# Lastly calculate the pointwise mutial information
h_v5 = np.array([h_v5_1] * h_v2.shape[0])
i25 = h_v2 + h_v5 - h25
i25
# log里面不能是0
data_v5, data_v2

[0.0979021  0.2007992  0.18281718 0.1998002  0.21778222 0.1008991 ] [2 3 4 5 6 7]
[3.35251641 2.31617457 2.45152642 2.32337007 2.19904193 3.30901478]


  return -np.log2(p)


(0       1
 1       3
 2       2
 3       2
 4       3
        ..
 996     5
 997     4
 998     2
 999     5
 1000    3
 Name: V5, Length: 1001, dtype: int64,
 0       2
 1       4
 2       3
 3       4
 4       5
        ..
 996     7
 997     5
 998     3
 999     7
 1000    4
 Name: V2, Length: 1001, dtype: int64)

In [7]:
# 6.e
# $I(X;Y)=H(X)-H(X|Y)$. Just like what we taught in class, $0\leq H(X|Y)\leq H(X)$, so we have $H(X)-H(X|Y)\leq 0$, which means $I(X;Y)\leq 0$. Mutual information is the expceted value of the pointwise mutual information. $i(x;y)<0$ meas that the occurrence of $y$ decreases the probability of $x$ occurring, but some other $i(x;y)>0$ can offset this effect, so we have the expected value greater than or equal to $0$.

In [8]:
# 6.f.i
total_num = data.shape[0]
# First get the data of V1
data_v1_n = data['V1'][0:total_num-1]
data_v1_n_np = data_v1_n.to_numpy()
# Then get the data of V4 (one sample ahead)
data_v4_np1 = data['V4'][1:total_num]
data_v4_np1_np = data_v4_np1.to_numpy()
# Calculate the mutual information between two data sets
result = mutualinformationempirical(data_v1_n_np, data_v4_np1_np)
result

np.float64(1.0195487688698357)

In [9]:
# 6.f.ii
total_num = data.shape[0]
# First get the data of V1
data_v1_n = data['V1'][0:total_num-2]
data_v1_n_np = data_v1_n.to_numpy()
# Then get the data of V4 (two samples ahead)
data_v4_np2 = data['V4'][2:total_num]
data_v4_np2_np = data_v4_np2.to_numpy()
# Then get the data of C4 (one sample ahead)
data_v4_np1 = data['V4'][1:total_num-1]
data_v4_np1_np = data_v4_np1.to_numpy()
# Calculate the conditional mutual information between two data sets conditioned on data_v4_np1_np
result = conditionalmutualinformationempirical(data_v1_n_np, data_v4_np2_np, data_v4_np1_np)
result

np.float64(0.20861439103323853)

In [10]:
# 6.g
