In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import mutual_info_classif
from sklearn.utils.parallel import Parallel, delayed
from sklearn.preprocessing import OrdinalEncoder
import numpy as np
import pandas as pd

In [2]:
breast = load_breast_cancer()
data = pd.DataFrame(data=breast.data, columns=breast.feature_names)
data['target'] = breast.target

In [4]:
def mi(x,y,n_iter=5):
    X = np.array(x).reshape((-1,1))
    y = np.array(y)
    if X.dtype == 'object':
        X = OrdinalEncoder().fit_transform(X)
    scores = Parallel(n_jobs=-1)(delayed(mutual_info_classif)(X,y,random_state=42+i) for i in range(n_iter))
    return np.array([np.mean(scores), np.std(scores)])

def theil_u(train,target,comment=''):
    print(F'*** U(y|x) in % {comment} ***')
    e = mi(train[target],train[target])[0]
    for c in train.columns:
        if c != target:
            mu, sigma = mi(train[c],train[target])/e*100
            print(F"{c}: {mu:.2f} ± {sigma:.2f}")

theil_u(data,target='target',comment='(Original data)')

*** U(y|x) in % (Original data) ***


mean radius: 55.32 ± 0.32
mean texture: 14.29 ± 0.27
mean perimeter: 61.04 ± 0.17
mean area: 54.46 ± 0.33
mean smoothness: 12.04 ± 0.49
mean compactness: 32.29 ± 0.19
mean concavity: 56.44 ± 0.04
mean concave points: 66.62 ± 0.17
mean symmetry: 9.92 ± 0.56
mean fractal dimension: 1.02 ± 0.18
radius error: 37.54 ± 0.23
texture error: 0.17 ± 0.09
perimeter error: 41.64 ± 0.31
area error: 51.45 ± 0.12
smoothness error: 2.32 ± 0.16
compactness error: 11.35 ± 0.23
concavity error: 17.78 ± 0.14
concave points error: 19.36 ± 0.16
symmetry error: 1.89 ± 0.32
fractal dimension error: 5.91 ± 0.18
worst radius: 68.68 ± 0.14
worst texture: 18.07 ± 0.18
worst perimeter: 71.92 ± 0.06
worst area: 70.20 ± 0.09
worst smoothness: 14.93 ± 0.59
worst compactness: 34.05 ± 0.12
worst concavity: 47.82 ± 0.07
worst concave points: 66.14 ± 0.20
worst symmetry: 14.41 ± 0.34
worst fractal dimension: 10.16 ± 0.34
