In [None]:
### import libraries ###

import numpy as np
import pandas as pd
from scipy.stats import kruskal, pearsonr, chi2_contingency, kruskal, pearsonr, chi2_contingency
from sklearn.feature_selection import mutual_info_classif
from sklearn.preprocessing import MinMaxScaler, KBinsDiscretizer

In [None]:
### Mutual Information Technique ###

def mutual_information(X, y):
    mi = mutual_info_classif(X, y, random_state=42)
    result = pd.DataFrame({"Feature": X.columns, "MI_Score": mi})
    result = result.sort_values("MI_Score", ascending=False).reset_index(drop=True)
    result["Rank"] = result.index + 1
    return result

In [None]:
### Kruskal Wallis Technique ###

def kruskal_wallis(X, y):
    if isinstance(y, pd.DataFrame):
        y = y.squeeze()
    unique_classes = np.unique(y)
    rows = []
    for feature in X.columns:
        groups = [X[feature][y == c] for c in unique_classes]
        H, p = kruskal(*groups, nan_policy="omit")
        rows.append([feature, H, p])
    result = pd.DataFrame(rows, columns=["Feature", "H_statistic", "p_value"])
    result = result.sort_values("p_value", ascending=True).reset_index(drop=True)
    result["Rank"] = result.index + 1
    return result

In [None]:
### Chi sqaured Technique ###

def chi_squared(X, y, n_bins=10):
    rows = []
    if isinstance(y, pd.DataFrame):
        y_numeric = pd.to_numeric(y.iloc[:, 0], errors='coerce')
    else:
        y_numeric = pd.to_numeric(np.ravel(y), errors='coerce')
    valid_idx = ~np.isnan(y_numeric)
    y_numeric = y_numeric[valid_idx]
    X_valid = X.loc[valid_idx].copy()
    kb = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='uniform')
    X_binned = kb.fit_transform(X_valid.apply(pd.to_numeric, errors='coerce'))
    for i, feature in enumerate(X_valid.columns):
        # Remove NaNs from this feature
        mask = ~np.isnan(X_binned[:, i])
        if mask.sum() == 0:
            rows.append([feature, np.nan, np.nan])
            continue
        table = pd.crosstab(X_binned[mask, i], y_numeric[mask])
        chi2, p, _, _ = chi2_contingency(table)
        rows.append([feature, chi2, p])
    chi_df = pd.DataFrame(rows, columns=['Feature', 'Chi2_stat', 'p_value'])
    chi_df = chi_df.sort_values(by='p_value', ascending=True)
    return chi_df

In [None]:
### Pearson'r Technique ###

def pearson_r(X, y):
    rows = []
    if isinstance(y, pd.DataFrame):
        y_numeric = pd.to_numeric(y.iloc[:, 0], errors='coerce')
    else:
        y_numeric = pd.to_numeric(np.ravel(y), errors='coerce')

    for feature in X.columns:
        X_numeric = pd.to_numeric(X[feature], errors='coerce')
        mask = ~np.isnan(X_numeric) & ~np.isnan(y_numeric)
        if mask.sum() == 0:
            rows.append([feature, np.nan, np.nan])
            continue 
        corr, p = pearsonr(X_numeric[mask], y_numeric[mask])
        rows.append([feature, abs(corr), p])
    pearson_df = pd.DataFrame(rows, columns=['Feature', 'Correlation', 'p_value'])
    pearson_df = pearson_df.sort_values(by='p_value', ascending=True)
    return pearson_df

In [None]:
### Load Dataset ###

Df = pd.read_excel("Input the path of Internal Dataset")

X=Df.iloc[:,0:-1]
y=Df.iloc[:,-1]

In [None]:
print("*** Mutual Information ***")
print(mutual_information(X, y))

In [None]:
print("*** Kruskal Wallis ***")
print(kruskal_wallis(X, y))

In [None]:
print("*** Chi squared ***")
print(chi_squared(X, y))

In [None]:
print("*** Pearson Correlation ***")
print(pearson_r(X, y))