In [231]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

In [232]:
df = pd.read_excel('dataset.xlsx', nrows=279)
df["SESSO"].replace({'M': 0., 'F': 1.}, inplace=True)
for key in df.keys():
    df[key] = pd.to_numeric(df[key].astype(str).str.replace(",", ""), errors='coerce')
X = df.drop(columns='TARGET').to_numpy(dtype=np.float64)
y = df['TARGET'].to_numpy(dtype=np.float64)

<h3>Data pre-processing</h3>
<h4>Missing values</h4>
<p>
To address data incompleteness, we performed missing data imputation by means of the Multivariate Imputation by Chained Equation (MICE)
</p>

In [233]:
imp = IterativeImputer(max_iter=300, random_state=0)
imp.fit(X)
X = np.round(imp.transform(X), 1)

<h4>Data normalization</h4>
<p>
We used Min-max scaler. Min-max normalization preserves the relationships among the original data values. The cost of having this bounded range is that we will end up with smaller standard deviations, which can suppress the effect of outliers.
</p>

In [234]:
# from sklearn.preprocessing import normalize
from sklearn.preprocessing import MinMaxScaler

# X = np.round(normalize(X, axis=0, norm='max'), 2)
scaler = MinMaxScaler()
scaler.fit(X)
X = np.round(scaler.transform(X), 2)


[[  0.   56.    2.9 ...  43.   21.  257. ]
 [  0.   56.    3.5 ...  50.   17.  207. ]
 [  0.   72.    4.6 ...  61.1  31.5 404.8]
 ...
 [  1.   44.   15.7 ... 111.   76.2 272. ]
 [  1.   41.   12.  ... 201.   14.  266. ]
 [  0.   53.   17.9 ...  75.2  13.9 258. ]]


In [235]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.15)

In [236]:
def max_min(x1, x2):
    both_sample = np.vstack((x1, x2))
    return np.max(np.min(both_sample, axis=0))


def union(R1, R2):
    both_relation = np.dstack((R1, R2))
    return np.max(both_relation, axis=2)

def maxmin_composition(R1, R2):
    result = np.zeros([len(R1), len(R2)])
    for i, Ri in enumerate(R1):
        for j, Rj in enumerate(R2):
            result[i][j] = max_min(Ri, Rj)
    return np.array(result)