The following class is used to calculate the mutual information between each class and the target variable.

In [2]:
import numpy as np
import pandas as pd


class CategoricalMI:

    def __init__(self, data, target_name, feature_names):

        self.y = data[target_name]
        self.feature_names = feature_names
        self.X = data[feature_names]
        self.X = self.X.astype(str)
        self.y = self.y.astype(str)
        self.nrows = len(self.X)
        self.mis = {}


    def joint_entropy(self, x, y, p):
        sum = 0
        for x in range(x):
            for y in range(y):
                if p[x][y] == 0:

                    continue
                sum += -(p[x][y] * np.log2((p[x][y])))

        return sum

    def entropy(self, x):

        sum = 0
        for i in x:
            sum -= i * np.log2(i)
        return sum

    def creat_joint_pmt(self, fname):

        ct = pd.crosstab(self.y, self.X[fname])
        jp = np.array(ct)
        jp = jp / self.nrows

        return jp

    def run(self):

        mi_scores = []

        for fname in self.feature_names:

            jpmf = self.creat_joint_pmt(fname)
            h_x_y = self.joint_entropy(jpmf.shape[0], jpmf.shape[1], jpmf)

            xpmf = jpmf.sum(axis=0)
            ypmf = jpmf.sum(axis=1)

            h_x = self.entropy(xpmf)
            h_y = self.entropy(ypmf)

            cond_x_y = h_x_y - h_y
            cond_y_x = h_x_y - h_x

            i_x_y = h_x - cond_x_y

            mi_scores.append(i_x_y)
        
        mi_scores = sorted(zip(mi_scores, self.feature_names), key=lambda x : x[0], reverse=True)
        self.mis = {fn: mi for mi, fn in mi_scores}

    def report(self):

        for fname, mi in self.mis.items():
            
            print(f"{fname} mutual information with target : {mi}")


In [3]:
data = pd.read_csv("./ACME-HappinessSurvey2020.csv")

In [4]:
feature_names = ['X1', 'X2', 'X3', 'X4', 'X5', 'X6']

In [5]:
cat_mi = CategoricalMI(data, "Y", feature_names)

In [6]:
cat_mi.run()

The following cell displays the features and their associated mutual information with the target variable in descending order.

In [7]:
cat_mi.report()

X1 mutual information with target : 0.6022187855318999
X6 mutual information with target : 0.5646106714581822
X5 mutual information with target : 0.4794815437913742
X4 mutual information with target : 0.39708921405515984
X3 mutual information with target : 0.3322625056186468
X2 mutual information with target : 0.17166808026614677
