In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from autogluon.tabular import TabularPredictor
from itertools import combinations
from diversity_measures import generalized_diversity, entropy_measure, KW_variance, ia_measure, difficulty_measure
from sklearn.metrics import roc_curve, roc_auc_score, accuracy_score, recall_score, precision_score, balanced_accuracy_score

In [2]:
data = pd.read_csv('adult-income.csv')
data.replace(['?'], np.nan, inplace=True)
data.income = data.income == '>50K'

In [3]:
train_data = data.sample(frac=0.7, random_state=123)
test_data = data.drop(train_data.index)
train_data = train_data.reset_index(drop=True)
test_data = test_data.reset_index(drop=True)

In [4]:
predictor = TabularPredictor(label='income', eval_metric='balanced_accuracy').fit(
    train_data=train_data,
    time_limit= 400,
    presets='good'
)

No path specified. Models will be saved in: "AutogluonModels/ag-20250518_105907"
Preset alias specified: 'good' maps to 'good_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.0
Python Version:     3.12.3
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 24.3.0: Thu Jan  2 20:24:23 PST 2025; root:xnu-11215.81.4~3/RELEASE_ARM64_T8122
CPU Count:          8
Memory Avail:       1.60 GB / 8.00 GB (20.0%)
Disk Space Avail:   37.35 GB / 228.27 GB (16.4%)
Presets specified: ['good']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
Note: `save_bag_folds=False`! This will greatly reduce peak disk usage during fit (by ~8x), but runs the risk of an out-of-memory error during model refit if memory is small relative to the data size.
	You can avoid this risk by set

KeyboardInterrupt: 

In [5]:
predictor1 = TabularPredictor.load("/Users/ola/Library/CloudStorage/OneDrive-Personal/Dokumenty/licencjat/ensemble-diversity-measures/AutogluonModels/ag-20250517_134150")

In [None]:
# dlaczego modele L1 itd nie działają?
predictor1.leaderboard()

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,CatBoost_BAG_L2,0.813387,balanced_accuracy,6.101192,163.858719,0.018226,11.913323,2,False,17
1,WeightedEnsemble_L3,0.813387,balanced_accuracy,6.102802,165.831662,0.001611,1.972943,3,False,23
2,NeuralNetTorch_BAG_L2,0.809694,balanced_accuracy,6.407161,166.935099,0.324195,14.989703,2,False,22
3,WeightedEnsemble_L2,0.804464,balanced_accuracy,0.603134,37.309594,0.001344,0.404588,2,False,12
4,LightGBM_BAG_L2,0.800229,balanced_accuracy,6.37434,154.093761,0.291374,2.148365,2,False,14
5,RandomForestEntr_BAG_L2,0.799738,balanced_accuracy,6.561624,155.89305,0.478658,3.947654,2,False,16
6,XGBoost_BAG_L1,0.799313,balanced_accuracy,0.378362,5.618183,0.378362,5.618183,1,False,9
7,XGBoost_BAG_L2,0.799119,balanced_accuracy,6.215682,155.472842,0.132716,3.527446,2,False,21
8,RandomForestGini_BAG_L2,0.798786,balanced_accuracy,6.563464,155.845161,0.480498,3.899764,2,False,15
9,LightGBM_BAG_L1,0.798722,balanced_accuracy,0.497516,1.718541,0.497516,1.718541,1,False,2


In [6]:
model_names = predictor1.model_names()
model_names.pop(model_names.index('WeightedEnsemble_L2_FULL')) 
model_names.pop(model_names.index('WeightedEnsemble_L3_FULL')) 
mdls = model_names[23:]

In [8]:
comb7 = list(combinations(mdls, 7))
comb5 = list(combinations(mdls, 5))

In [7]:
predictor1.predict_proba(test_data.drop('income', axis = 1), model=mdls[0]).to_numpy()

array([[9.98902619e-01, 1.09735737e-03],
       [3.52998376e-01, 6.47001624e-01],
       [9.99639392e-01, 3.60627891e-04],
       ...,
       [6.32667542e-01, 3.67332488e-01],
       [7.44086742e-01, 2.55913228e-01],
       [9.73890841e-01, 2.61091813e-02]])

In [8]:
len(mdls)

21

In [None]:
# zapisanie predykcji pstwa i predykcji 0 1
pred_prob = []
pred = []
for mdl in mdls:
    #pred_prob.append(predictor1.predict_proba(test_data.drop('income', axis = 1), model=mdl).to_numpy())
    pred.append(predictor1.predict(test_data.drop('income', axis = 1), model=mdl).to_numpy())

In [None]:
num = [i for i in range(21)]
comb_num = list(combinations(num, 5))  

In [None]:
# zapisanie wartości miar dla komitetów
entropy_results = []
KW_results = []
ia_results = []
diff_results = []
gd_results = []

for i in range(len(comb_num)): 
    gd_results.append(generalized_diversity(test_data.income, 0.5, pred_prob[comb_num[i][0]], pred_prob[comb_num[i][1]], pred_prob[comb_num[i][2]], pred_prob[comb_num[i][3]], pred_prob[comb_num[i][4]]))
    entropy_results.append(entropy_measure(test_data.income, 0.5, pred_prob[comb_num[i][0]], pred_prob[comb_num[i][1]], pred_prob[comb_num[i][2]], pred_prob[comb_num[i][3]], pred_prob[comb_num[i][4]]))
    KW_results.append(KW_variance(test_data.income, 0.5, pred_prob[comb_num[i][0]], pred_prob[comb_num[i][1]], pred_prob[comb_num[i][2]], pred_prob[comb_num[i][3]], pred_prob[comb_num[i][4]]))
    ia_results.append(ia_measure(test_data.income, 0.5, pred_prob[comb_num[i][0]], pred_prob[comb_num[i][1]], pred_prob[comb_num[i][2]], pred_prob[comb_num[i][3]], pred_prob[comb_num[i][4]]))
    diff_results.append(difficulty_measure(test_data.income, 0.5, pred_prob[comb_num[i][0]], pred_prob[comb_num[i][1]], pred_prob[comb_num[i][2]], pred_prob[comb_num[i][3]], pred_prob[comb_num[i][4]]))
 

In [None]:
# głosowanie
pred_t = [list(i) for i in zip(*pred)]
res = [[] for i in range (len(comb_num))]
for i in range(len(comb_num)):
    for j in range (len(test_data.income)):
        if sum([pred_t[j][comb_num[i][0]], pred_t[j][comb_num[i][1]], pred_t[j][comb_num[i][2]], pred_t[j][comb_num[i][3]], pred_t[j][comb_num[i][4]]]) > 2:
            res[i].append(1)
        else:
            res[i].append(0)

In [None]:
# zapisanie wartości metryk
scores = []
for i in range (len(comb_num)):
    scores.append([accuracy_score(test_data.income,res[i]), recall_score(test_data.income,res[i]),precision_score(test_data.income,res[i]), roc_auc_score(test_data.income,res[i]), balanced_accuracy_score(test_data.income,res[i])])

In [None]:
df1 = pd.DataFrame(scores)
df1['gd'] = gd_results
df2 = df1.sort_values(by='gd').reset_index(drop=True)
df2 = df2.rename(columns={0: "acc", 1: "recall", 2: "prec", 3:'auc', 4: 'bal_acc'})

In [None]:
df2.plot()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

plt.plot(1- df2.gd, label = "1 - gd")
plt.plot(df2.acc, label = 'acc')
plt.plot(df2.recall, label = 'recall')
plt.plot(df2.prec, label = 'precision')
plt.plot(df2.auc, label = 'auc')
plt.plot(df2.bal_acc, label = 'bal acc')

plt.legend(loc="upper right")
plt.show()

In [None]:
len(res)

In [None]:
max(gd_results)

In [None]:
predictor.model_info('WeightedEnsemble_L3')