In [1]:
import pandas as pd
import os
import sys

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
src_dir = os.path.join(parent_dir, 'src')

sys.path.append(src_dir)

from TLogicToMLP import *
from MLPToTLogic import *
from utils.Tree import level_order_traversal


In [2]:
df = pd.read_csv('diabetes.csv')

df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
print(f'Number of observations: {df.shape[0]}')
print(f'Number of features: {df.shape[1] - 1}')


Number of observations: 768
Number of features: 8


In [4]:
y = df['Outcome']
X = df.drop(columns = ['Outcome'])

X.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [5]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

X.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,0.483333
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,0.166667
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,0.183333
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,0.0
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,0.2


In [6]:
correlation_matrix = df.corr()
correlation = correlation_matrix['Outcome'].sort_values(ascending=False)

print(correlation)

Outcome                     1.000000
Glucose                     0.466581
BMI                         0.292695
Age                         0.238356
Pregnancies                 0.221898
DiabetesPedigreeFunction    0.173844
Insulin                     0.130548
SkinThickness               0.074752
BloodPressure               0.065068
Name: Outcome, dtype: float64


In [7]:
feature_pairs = [(col1, col2) for i, col1 in enumerate(df.columns) for col2 in df.columns[i+1:] if col1 != 'Outcome' and col2 != 'Outcome']

pair_correlations = {}
for col1, col2 in feature_pairs:
    interaction_term = df[col1] * df[col2]
    correlation = interaction_term.corr(df['Outcome'])
    pair_correlations[(col1, col2)] = correlation

sorted_pairs = sorted(pair_correlations.items(), key=lambda x: abs(x[1]))

print(sorted_pairs)

[(('BloodPressure', 'SkinThickness'), 0.10205351331987537), (('Insulin', 'DiabetesPedigreeFunction'), 0.11612400891968465), (('SkinThickness', 'BMI'), 0.13039743579326546), (('BloodPressure', 'Insulin'), 0.14321188302324858), (('Insulin', 'BMI'), 0.14740696156525737), (('SkinThickness', 'Insulin'), 0.16530715583253192), (('SkinThickness', 'Age'), 0.17020254654136296), (('Glucose', 'Insulin'), 0.1777859210068005), (('SkinThickness', 'DiabetesPedigreeFunction'), 0.17991425848587017), (('BloodPressure', 'DiabetesPedigreeFunction'), 0.18292522010223677), (('Pregnancies', 'SkinThickness'), 0.18907749199318216), (('Insulin', 'Age'), 0.1939828658575503), (('BloodPressure', 'Age'), 0.2061980830562266), (('BloodPressure', 'BMI'), 0.20792151052013763), (('Pregnancies', 'Insulin'), 0.2083333543301126), (('Glucose', 'SkinThickness'), 0.20868927349287592), (('Pregnancies', 'BloodPressure'), 0.21740729481054016), (('Pregnancies', 'Age'), 0.22399202858805636), (('DiabetesPedigreeFunction', 'Age'), 0.

In [8]:
phi_1 = '(((BMI⊕Age)⊙Glucose)⊕(BMI⊙Age))'
ast, max_depth = generate_ast(phi_1, Lukasiewicz.connectives)

In [9]:
from sklearn.metrics import accuracy_score, precision_score, f1_score

ReLUNet = ast_to_ReLU(ast, max_depth, Lukasiewicz_connectives_to_ReLU)
ReLUNet.construct_layers()

X = X[["BMI", "Age", "Glucose", "BMI", "Age"]]

y = y.to_numpy()

num_obs = 768
hits = 0
predictions = []

for index, row in X.iterrows():
    output = ReLUNet(torch.tensor(row.values, dtype=torch.float64)).item()
    prediction = 1 if output > 0.5 else 0
    predictions.append(prediction)

    if prediction == y[index]:
        hits += 1

accuracy = hits / num_obs
precision = precision_score(y, predictions)
f1 = f1_score(y, predictions)

print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'Precision: {precision:.4f}')
print(f'F1 Score: {f1:.4f}')


Accuracy: 73.96%
Precision: 0.6650
F1 Score: 0.5781
