# 03 Information Theory metrics

In [1]:
%load_ext autoreload
%autoreload 2
import sys

sys.path.append("../")

import jupyter_black

jupyter_black.load()

In [2]:
import math
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import src.cleansing as cleansing
from loguru import logger

In [3]:
df = pd.read_csv("../data/Weather Data.csv")

# rename columns
# parse datetime column
df = cleansing.clean_weather_data(df)

In [4]:
df.columns

Index(['date_time', 'temp_c', 'dew_point_temp_c', 'real_hum_pct',
       'wind_speed_kmh', 'visibility_km', 'press_kpa', 'weather'],
      dtype='object')

## Shannons' Entropy function

H(x)

In [5]:
from src.entropy import get_shannons_entropy

In [6]:
x = df.temp_c.values
entropy_temp_c = get_shannons_entropy(x)
entropy_temp_c

[32m2023-07-18 22:57:46.959[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data -1.8[0m


6.303542871569272

## Mutual Information

I(x, y)

In [7]:
from src.mutual_information import get_mutual_information

In [8]:
x = df.temp_c.values
y = df.dew_point_temp_c.values

mi = get_mutual_information(x, y)
mi

[32m2023-07-18 22:57:47.001[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data -1.8[0m
[32m2023-07-18 22:57:47.005[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data -3.9[0m


1.9931993003955788

In [9]:
x = df.temp_c.values
y = df.visibility_km.values

mi = get_mutual_information(x, y)
mi

[32m2023-07-18 22:57:47.034[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data -1.8[0m
[32m2023-07-18 22:57:47.038[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data 8.0[0m


0.2660521426744083

## Conditional Entropy

H(x|y)

In [10]:
x = df.temp_c.values
y = df.visibility_km.values

entropy = get_shannons_entropy(x)
mi = get_mutual_information(x, y)
joint_x_if_y = entropy - mi
print(entropy, mi, joint_x_if_y)

[32m2023-07-18 22:57:47.067[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data -1.8[0m
[32m2023-07-18 22:57:47.073[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data -1.8[0m
[32m2023-07-18 22:57:47.078[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data 8.0[0m


6.303542871569272 0.2660521426744083 6.037490728894864


## Conditional Mutual Information

I(x;y;z)

In [11]:
from src.conditional_mutual_information import get_conditional_mutual_information

In [12]:
x = df.temp_c.values
y = df.visibility_km.values
z = df.press_kpa.values

cmi = get_conditional_mutual_information(x, y, z)
cmi

[32m2023-07-18 22:57:47.164[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data -1.8[0m
[32m2023-07-18 22:57:47.166[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data 8.0[0m
[32m2023-07-18 22:57:47.168[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data 101.24[0m


2.3259358378227235

In [13]:
x = df.temp_c.values
y = df.visibility_km.values
z = df.press_kpa.values

entropy = get_shannons_entropy(x)
mi_x_y = get_mutual_information(x, y)
mi_x_z = get_mutual_information(x, z)
cmi = get_conditional_mutual_information(x, y, z)

joint_x_if_y_and_z = entropy - mi_x_y - mi_x_z + cmi
print(entropy, mi_x_y, mi_x_z, cmi, joint_x_if_y_and_z)

[32m2023-07-18 22:57:47.306[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data -1.8[0m
[32m2023-07-18 22:57:47.308[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data -1.8[0m
[32m2023-07-18 22:57:47.310[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data 8.0[0m
[32m2023-07-18 22:57:47.317[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data -1.8[0m
[32m2023-07-18 22:57:47.319[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data 101.24[0m
[32m2023-07-18 22:57:47.327[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m22[0m - [34m[1mNumerical data -1.8[0m
[32m2023-07-18 22:57

6.303542871569272 0.2660521426744083 0.7384244766812119 2.3259358378227235 7.625002090036375


In [43]:
x = [1, 2, 3, 4, 5, 6, 7, 8, 9]
y = [1, 2, 3, 4, 5, 6, 7, 8, 9]
z = [1, 2, 3, 4, 5, 6, 7, 8, 9]

entropy = get_shannons_entropy(x)
mi_x_y = get_mutual_information(x, y)
mi_x_z = get_mutual_information(x, z)
cmi = get_conditional_mutual_information(x, y, z)

joint_x_if_y_and_z = entropy - mi_x_y - mi_x_z + cmi
print(entropy, mi_x_y, mi_x_z, cmi, joint_x_if_y_and_z)

[32m2023-07-19 22:33:58.891[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m17[0m - [34m[1mNumerical data 1[0m
[32m2023-07-19 22:33:58.892[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m17[0m - [34m[1mNumerical data 1[0m
[32m2023-07-19 22:33:58.892[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m17[0m - [34m[1mNumerical data 1[0m
[32m2023-07-19 22:33:58.893[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m17[0m - [34m[1mNumerical data 1[0m
[32m2023-07-19 22:33:58.893[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m17[0m - [34m[1mNumerical data 1[0m
[32m2023-07-19 22:33:58.894[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m17[0m - [34m[1mNumerical data 1[0m
[32m2023-07-19 22:33:58.894[0m | [34m

3.169925001442312 3.169925001442312 3.169925001442312 6.339850002884624 3.169925001442312


In [44]:
print(entropy, mi_x_y, mi_x_z, cmi, joint_x_if_y_and_z)

3.169925001442312 3.169925001442312 3.169925001442312 6.339850002884624 3.169925001442312


In [47]:
from sklearn.metrics import mutual_info_score

# a = ["A", "B", "A", "A", "B", "B", "A", "A", "B", "B"]
# x = ["X", "X", "X", "Y", "Z", "Z", "Y", "Y", "Z", "Z"]
a = [1, 2, 3, 4, 5, 6, 7, 8, 9]
x = [1, 2, 3, 4, 5, 6, 7, 8, 9]
mi = mutual_info_score(a, x)
mi

2.1972245773362196

In [48]:
a = [1, 2, 3, 4, 5, 6, 7, 8, 9]
x = [1, 2, 3, 4, 5, 6, 7, 8, 9]
mi = get_mutual_information(a, x)
mi

[32m2023-07-19 22:35:00.700[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m17[0m - [34m[1mNumerical data 1[0m
[32m2023-07-19 22:35:00.701[0m | [34m[1mDEBUG   [0m | [36msrc.entropy[0m:[36mget_marginal_probabilities[0m:[36m17[0m - [34m[1mNumerical data 1[0m


3.169925001442312