In [1]:
# 정의 함수 load
from utils.data import *
from utils.model import *
from utils.metric import *
from utils.analysis import *

In [2]:
# Dataset List: dt_config/datasets
dt_config = dataset_config()
datasets= list(dt_config.keys())

# print('Dataset List:')
# print(datasets)

In [3]:
# Dataset Load: dt_dict
# datasets = [
#     'region_job_r', 'region_job_g', 'region_job_2_r', 'region_job_2_g', 
#     'nba_p', 'nba_m', 'german_g', 'german_f', 'german_s', 'german_t', 'german_h', 'german_e'
# ]

dt_dict = {}
for ds in datasets:
    data, df, cfg = load_and_prepare_dataset(dt_config, config_name=ds, seed=1127)
    dt_dict[ds] = {
        'data': data,
        'df': df,
        'cfg': cfg
    }

# Check Dataset: Train/Val/Test 민감속성 비율 동일하게 설정
# print_sensitive_attr_distribution(dt_dict['nba_p']['data'])

Loading region_job dataset from ./dataset/pokec
[INFO] 유효하지 않은 user_id로 인해 제거된 edge 수: 0
[region_job] sens=0: 43962, sens=1: 23834
--------------------------------------------------
Loading region_job dataset from ./dataset/pokec
[INFO] 유효하지 않은 user_id로 인해 제거된 edge 수: 0
[region_job] sens=0: 34308, sens=1: 33488
--------------------------------------------------
Loading region_job_2 dataset from ./dataset/pokec
[INFO] 유효하지 않은 user_id로 인해 제거된 edge 수: 0
[region_job_2] sens=0: 47338, sens=1: 19231
--------------------------------------------------
Loading region_job_2 dataset from ./dataset/pokec
[INFO] 유효하지 않은 user_id로 인해 제거된 edge 수: 0
[region_job_2] sens=0: 34125, sens=1: 32444
--------------------------------------------------
Loading nba dataset from ./dataset/NBA
[INFO] 유효하지 않은 user_id로 인해 제거된 edge 수: 1641
[nba] sens=0: 296, sens=1: 107
--------------------------------------------------
Loading nba dataset from ./dataset/NBA
[INFO] 유효하지 않은 user_id로 인해 제거된 edge 수: 1641
[nba] sens=0: 29

  return torch.sparse.FloatTensor(indices, values, shape)


[german] sens=0: 310, sens=1: 690
--------------------------------------------------
Loading german dataset from ./dataset/NIFTY
[german] sens=0: 310, sens=1: 690
--------------------------------------------------
Loading german dataset from ./dataset/NIFTY
[german] sens=0: 310, sens=1: 690
--------------------------------------------------
Loading german dataset from ./dataset/NIFTY
[german] sens=0: 310, sens=1: 690
--------------------------------------------------
Loading german dataset from ./dataset/NIFTY
[german] sens=0: 310, sens=1: 690
--------------------------------------------------
Loading german dataset from ./dataset/NIFTY
[german] sens=0: 310, sens=1: 690
--------------------------------------------------


In [None]:
# Evluate Dataset: eval_dt_df
eval_dt = {}
ev_dt_list = ['region_job_r', 'region_job_g', 'region_job_2_r', 'region_job_2_g', 'nba_p', 'german_g']
for ds in ev_dt_list:
    data = dt_dict[ds]['data']
    features = data.x
    edge_index = data.edge_index
    sens = data.sensitive_attr

    eval_dt[ds] = {
        "Homophily Ratio": homophily_ratio(edge_index, sens),
        "Assortativity Coefficient": assortativity_coefficient(data),
        "Local Neighborhood Fairness": local_neighborhood_fairness(edge_index, sens),
        "Degree Balance": degree_balance(edge_index, sens),
        'Structural Bias': structural_bias(features, edge_index, sens)
    }

eval_dt_df = pd.DataFrame(eval_dt).T
print(eval_dt_df)

In [4]:
# Experiments Setting
runs=5
epochs=500

cuda=torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu')
print(f"Using device: {device}")

seed=1127
np.random.seed(seed)
torch.manual_seed(seed)
if cuda:
    torch.cuda.manual_seed(seed)

# 공통
lr=0.001
weight_decay=1e-5

# model mapping
model_map = {
    'FnRGNN': lambda data: FnRGNN(nfeat=data.x.size(1), hidden_dim=64, dropout=0.5, lm=3, ld=1, mmd_sample=500, lr=lr, weight_decay=weight_decay),
    'FairGNN': lambda data: FairGNN(nfeat=data.x.size(1), hidden_dim=64, model='GCN', dropout=0.5, hidden=128, alpha=4, beta=0.01, lr=lr, weight_decay=weight_decay),
    'FMP': lambda data: FMP(data, num_hidden=64, num_layers=5, num_gnn_layer=2, lambda1=3, lambda2=3, dropout=0.5, num_classes=1, L2=True, cached=False),
    'GMMD': lambda data: GMMD(in_channels=data.x.size(1), hidden_channels=64),
    'EDITS': lambda data: EDITS(nfeat=data.x.size(1), node_num=data.x.size(0), nfeat_out=int(data.x.size(0) / 10), adj_lambda=1e-1, layer_threshold=2, dropout=0.2, lr=lr, weight_decay=weight_decay),
    'MLPRegressor': lambda data: MLPRegressor(in_dim=data.x.size(1), hidden_dim=64),
    'GCNRegressor': lambda data: GCNRegressor(in_dim=data.x.size(1), hidden_dim=64),
    'GATRegressor': lambda data: GATRegressor(in_dim=data.x.size(1), hidden_dim=64, heads=1),
    'GraphSAGERegressor': lambda data: GraphSAGERegressor(in_dim=data.x.size(1), hidden_dim=64),
    'GINRegressor': lambda data: GINRegressor(in_dim=data.x.size(1), hidden_dim=64),
}


Using device: cuda:0


In [None]:
# Training
tr_md_list = [
    'FnRGNN', 'FairGNN', 'FMP', 'GMMD', 
    # 'EDITS',
    # 'MLPRegressor', 'GCNRegressor', 'GATRegressor', 'GraphSAGERegressor', 'GINRegressor', 
    ]
tr_dt_list = [
    # 'region_job_r', 
    'region_job_2_r', 
    # 'nba_p', 'nba_m', 'german_g', 'german_f', 'german_s', 'german_t', 'german_h', 'german_e'
    ]

# # FairGNN Sensitive Model
# for ds in tr_dt_list:
#     data = dt_dict[ds]['data']
#     data = data.to(device)

#     cfg = dt_dict[ds]['cfg']
#     dn = cfg['dn']

#     print(f'Train FairGNN Sensitive Model dataset from {ds}')
    
#     fair_sen_model = GCN(nfeat=data.x.shape[1], nhid=128, nclass=1, dropout=0.5).to(device)
#     fair_optimizer = optim.Adam(fair_sen_model.parameters(), lr=lr, weight_decay=weight_decay)
#     criterion = torch.nn.MSELoss()
#     best_mse = float('inf')
#     best_result = {}

#     for epoch in range(epochs + 1):    
#         fair_sen_model.train()
#         fair_optimizer.zero_grad()
#         output = fair_sen_model(data.x, data.edge_index)
#         loss = criterion(output[data.idx_train], data.y[data.idx_train].unsqueeze(1))
#         loss.backward()
#         fair_optimizer.step()

#         fair_fastmode=False
#         if not fair_fastmode:
#             fair_sen_model.eval()
#             with torch.no_grad():
#                 output = fair_sen_model(data.x, data.edge_index)
#                 mse_val = mean_squared_error(data.sensitive_attr[data.idx_val].cpu().numpy(), output[data.idx_val].cpu().numpy())
#                 mse_test = mean_squared_error(data.sensitive_attr[data.idx_test].cpu().numpy(), output[data.idx_test].cpu().numpy())

#         if epoch % 50 == 0:
#             print(f"Epoch [{epoch}] Test set results:",
#                 f"mse_val={mse_val:.4f}, mse_test={mse_test:.4f}")

#             if mse_val < best_mse:
#                 best_mse = mse_val
#                 best_result = {'mse': mse_test}
#                 torch.save(fair_sen_model.state_dict(), f"./checkpoint/GCN_sens_{ds}_ns_{cfg['sens_number']}")

#     print(f"The best MSE of estimator: {best_result['mse']:.4f}")
#     print("Optimization Finished!")

# All Model
for ds in tr_dt_list:
    data = dt_dict[ds]['data']
    data = data.to(device)

    cfg = dt_dict[ds]['cfg']
    dn = cfg['dn']

    for md in tr_md_list:
        print(f'Train {md} dataset from {ds}')
        os.makedirs(f'./model/{md}', exist_ok=True) 
        model_path = f'./model/{md}/{dn}_md.pth'
        
        for run in range(runs + 1):
            model = model_map[md](data).to(device)

            if md not in ['FnRGNN', 'FairGNN', 'EDITS']:
                optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
                criterion = torch.nn.MSELoss()
            elif md == 'FairGNN':
                try:
                    model.estimator.load_state_dict(torch.load( f"./checkpoint/GCN_sens_{ds}_ns_{cfg['sens_number']}", map_location=torch.device(device) ))
                except Exception as e:
                    print(f"Checkpoint load failed: {e}")
            elif md == 'EDITS':
                features = data.x.to(device).to(torch.float32)
                labels = data.y.to(device).to(torch.float32)
                sens = data.sensitive_attr.to(device).to(torch.float32)

                adj = data.adj
                if isinstance(adj, torch.Tensor):
                    adj = adj.to(device).to(torch.float32)
                else:
                    adj = torch.FloatTensor(adj.toarray()).to(device).to(torch.float32)

                idx_train = data.idx_train
                idx_val = data.idx_val
                idx_test = data.idx_test
            
            best_score = float('inf')
            best_model_state = None

            for epoch in range(epochs + 1):
                if md == 'FnRGNN':
                    loss = model.optimize(data)
                elif md == 'FairGNN':
                    model.optimize(data)
                elif md == 'EDITS':
                    lr_adj = 0.001 if epoch > 400 else 0.003
                    model.train()
                    model.optimize(adj, features, idx_train, sens, epoch, lr_adj)
                elif md == 'GMMD':
                    model.train()
                    optimizer.zero_grad()
                    pred, mmd_loss = model(data)
                    pred = pred.squeeze()
                    loss = mmd_loss + criterion(pred[data.idx_train], data.y[data.idx_train].squeeze())
                    loss.backward()
                    optimizer.step()
                else:
                    model.train()
                    optimizer.zero_grad()
                    output = model(data)
                    if md == 'FMP':
                        loss = criterion(output[data.idx_train], data.y[data.idx_train].unsqueeze(1))
                    else:
                        loss = criterion(output[data.idx_train].unsqueeze(1), data.y[data.idx_train].unsqueeze(1))
                    loss.backward()
                    optimizer.step()

                # validation
                model.eval()
                with torch.no_grad():
                    if md == 'EDITS':
                        adj_sparse = to_scipy_sparse_matrix(data.edge_index).tocoo()
                        adj = sparse_mx_to_torch_sparse_tensor(adj_sparse).to(device)
                        features = data.x
                        if not hasattr(model.adj_renew, "estimator"):
                            model.adj_renew.fit(adj, lr=0.003)
                        output = model(adj, features)[0] 
                    elif md in ['FnRGNN', 'FairGNN', 'GMMD']:
                        output, _ = model(data)
                    else:
                        output = model(data)

                    y_true, idx_val, sensitive_attr = data.y, data.idx_val, data.sensitive_attr
                    mse_val = mean_squared_error(y_true[idx_val].cpu().numpy().squeeze(), output[idx_val].cpu().numpy().squeeze())
                    mae_val = mean_absolute_error(y_true[idx_val].cpu().numpy().squeeze(), output[idx_val].cpu().numpy().squeeze())
                    mse_diff, mae_diff, mean_diff, var_diff = fair_metric_regression(output[idx_val].cpu(), y_true[idx_val].cpu(), sensitive_attr[idx_val].cpu())
                    dist_val = group_distribution_metrics(y_true[idx_val].cpu().numpy().squeeze(), output[idx_val].cpu().numpy().squeeze(), sensitive_attr[idx_val].cpu().numpy())
                    
                    if md == 'FnRGNN':
                        val_score = mse_val + 0.5 * mean_diff
                    else:
                        val_score = mse_val

                    if val_score < best_score:
                        best_mse = val_score
                        best_model_state = copy.deepcopy(model.state_dict())
                        torch.save(best_model_state, model_path)
                    
                    if epoch % 100 == 0:
                        print(f"[{md}] Run {run}, Epoch {epoch} | "
                            f"Val MSE: {mse_val:.2f}, MAE: {mae_val:.2f}, \n"
                            f"MSE Diff: {mse_diff:.2f}, MAE Diff: {mae_diff:.2f}, MEAN Diff: {mean_diff:.2f}, Var Diff: {var_diff:.2f}, "
                            f"Wasserstein Diff: {dist_val['wasserstein']:.2f}, JS Diff: {dist_val['js']:.2f}"
                        )

In [None]:
# Testing
ts_md_list = ['FnRGNN', 'FairGNN', 'FMP', 'GMMD', 'EDITS', 'MLPRegressor', 'GCNRegressor', 'GATRegressor', 'GraphSAGERegressor', 'GINRegressor']
ts_dt_list = ['region_job_r', 'region_job_2_r', 'nba_p', 'nba_m', 'german_g', 'german_f', 'german_s', 'german_t', 'german_h', 'german_e']

ts_results = {}
for ds in ts_dt_list:
    print(f'\n>>> Evaluating Dataset: {ds}')
    data = dt_dict[ds]['data']
    data = data.to(device)

    cfg = dt_dict[ds]['cfg']
    dn = cfg['dn']

    for md in ts_md_list:
        print(f'Test {md} dataset from {ds}')
        model_path = f'./model/{md}/{dn}_md.pth'

        if not os.path.exists(model_path):
            print(f"[{md} - {ds}] 모델 파일 없음: {model_path}")
            ts_results.setdefault(md, {})[ds] = None
            continue

        result = []
        for run in range(runs + 1):
            try:
                model = model_map[md](data).to(device)

                state_dict = torch.load(model_path)
                state_dict.pop("adj_renew.estimator.estimated_adj", None)  # EDITS 대응
                model.load_state_dict(state_dict, strict=False)

                model.eval()
                with torch.no_grad():
                    if md == 'EDITS':
                        adj_sparse = to_scipy_sparse_matrix(data.edge_index).tocoo()
                        adj = sparse_mx_to_torch_sparse_tensor(adj_sparse).to(device)
                        features = data.x
                        if not hasattr(model.adj_renew, "estimator"):
                            model.adj_renew.fit(adj, lr=0.003)
                        output = model(adj, features)[0]
                    elif md in ['FnRGNN', 'FairGNN', 'GMMD']:
                        output, _ = model(data)
                    else:
                        output = model(data)

                    y_true, idx_test, sensitive_attr = data.y, data.idx_test, data.sensitive_attr
                    mse_test = mean_squared_error(y_true[idx_test].cpu(), output[idx_test].cpu())
                    mae_test = mean_absolute_error(y_true[idx_test].cpu(), output[idx_test].cpu())
                    mse_diff, mae_diff, mean_diff, var_diff = fair_metric_regression(
                        output[idx_test].cpu(), y_true[idx_test].cpu(), sensitive_attr[idx_test].cpu()
                    )
                    dist_test = group_distribution_metrics(
                        y_true[idx_test].cpu().numpy().squeeze(),
                        output[idx_test].cpu().numpy().squeeze(),
                        sensitive_attr[idx_test].cpu().numpy()
                    )

                result.append([mse_test, mse_diff, mae_test, mae_diff, mean_diff, var_diff,
                               dist_test['wasserstein'], dist_test['js'], dist_test['kl'], dist_test['ks'], dist_test['cvm'], dist_test['tv']])
            except Exception as e:
                print(f"[{md} - {ds} - Run {run}] 오류 발생: {e}")
                result.append(None)

        ts_results.setdefault(md, {})[ds] = result


In [None]:
# Results Check
metric_names = ['mse', 'mse_diff',  'mae', 'mae_diff', 'mean_diff', 'var_diff', 'wasserstein', 'js', 'kl', 'ks', 'cvm', 'tv']

for md in ts_md_list:
    md_results = ts_results[md]

    summary_table = {}
    for dataset, runs_rst in md_results.items():
        if runs_rst is None:
            continue
        valid_runs = [r for r in runs_rst if r is not None]
        if not valid_runs:
            continue

        arr = np.array(valid_runs)
        means = np.mean(arr, axis=0)
        stds = np.std(arr, axis=0)

        # summary_table[dataset] = [
        #     f"{mean:.4f} ± {std:.4f}" for mean, std in zip(means, stds)
        # ]
        summary_table[dataset] = [
            f"{mean:.4f}" for mean, std in zip(means, stds)
        ]

    df_summary = pd.DataFrame(summary_table, index=metric_names)
    globals()[f'df_{md}_summary'] = df_summary

selected_metrics = ['mse', 'mae', 'mean_diff', 'var_diff', 'wasserstein']
selected_dt_list = [
    'nba_p', 'nba_m', 
    'german_g', 'german_f', 'german_s', 
    'german_t', 'german_h', 'german_e',
    'region_job_r', 'region_job_2_r', 
    ]
selected_md_list = ['GCNRegressor', 'FairGNN', 'FMP', 'GMMD', 'EDITS', 'FnRGNN']

all_tables = {}
for ds in selected_dt_list:
    result_table = {}
    for md in selected_md_list:
        df = globals().get(f'df_{md}_summary')
        if df is not None:
            if isinstance(df.index, pd.Index):
                filtered_df = df.loc[df.index.intersection(selected_metrics)]
                if ds in filtered_df.columns:
                    result_table[md] = filtered_df[ds]
    if result_table:
        df_combined = pd.DataFrame(result_table)
        df_combined.index.name = "Metric"
        all_tables[ds] = df_combined

        print(f'{ds} Results:')
        print(df_combined)


In [None]:
# latex 테이블 생성
df_latex = all_tables['german_e']
dt = '\cellcolor{lightblue}Loan/Employ.'
df_latex.index.name = dt
df_latex = df_latex.reset_index()
df_latex.rename(columns={'GCNRegressor': 'GCN'}, inplace=True)
df_latex[dt] = df_latex[dt].replace({'mse': 'MSE', 'mae': 'MAE', 'mean_diff': 'Mean Gap', 'var_diff': 'Var Gap', 'wasserstein': 'WD'})

# LaTeX 테이블로 변환
print(df_latex.to_latex(index=False, float_format="%.4f"))

In [None]:
# 결과 시각화
selected_md_list = [
    "GCNRegressor", 
    "FairGNN", "FMP", "GMMD", 
    "EDITS", 
    "FnRGNN"
                    ]
selected_dt_list = [
    'nba_p', 'nba_m', 
    'german_g', 'german_f', 'german_s', 
    'german_t', 'german_h', 'german_e',
    'region_job_r', 'region_job_2_r', 
    ]
legend_names = {
    'GCNRegressor': 'GCN',
    'FairGNN': 'FairGNN',
    'FMP': 'FMP',
    'GMMD': 'GMMD',
    'EDITS': 'EDITS',
    'FnRGNN': r'$\mathbf{FnRGNN}$'  # LaTeX 강조
}
markers = {
    'GCNRegressor': 'o',
    'FairGNN': 'o',
    'FMP': 'o',
    'GMMD': 'o',
    'EDITS': 'o',
    'FnRGNN': '*'  # 또는 'x'
}

sns.set(style="whitegrid", font_scale=0.6)
palette = sns.color_palette("Set2", len(selected_md_list))
model_colors = {md: color for md, color in zip(selected_md_list, palette)}
plt.figure(figsize=(3.5, 2.5))  # 논문용 크기

scatter_data = []

for md in selected_md_list:
    for dt in selected_dt_list:
        if md in ts_results and dt in ts_results[md] and ts_results[md][dt] is not None:
            for r in ts_results[md][dt]:
                if r is not None:
                    mse = r[0]
                    wd = r[6]
                    plt.scatter(
                        mse, wd,
                        color=model_colors[md],
                        marker=markers.get(md, 'o'),
                        label=legend_names.get(md, md),
                        alpha=0.7,
                        s=100 if md == 'FnRGNN' else 30
                    )
                    scatter_data.append(legend_names.get(md, md))

handles, labels = plt.gca().get_legend_handles_labels()
unique = dict(zip(labels, handles))
plt.legend(unique.values(), unique.keys(), bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)

plt.xlabel('MSE (Accuracy)', size=8)
plt.ylabel('Wasserstein Distance (Fairness)', size=8)
plt.xlim(-0.1, 2.1)
plt.ylim(-0.01, 0.16)
plt.tight_layout()
plt.show()

plt.savefig("exp_results.pdf", bbox_inches='tight')

In [4]:
# Ablation study
runs=5
epochs=500

cuda=torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu')
print(f"Using device: {device}")

seed=1127
np.random.seed(seed)
torch.manual_seed(seed)
if cuda:
    torch.cuda.manual_seed(seed)

lr=0.001
weight_decay=1e-5

# model mapping
model_map = {
    'Full': lambda data: FnRGNN(nfeat=data.x.size(1), hidden_dim=64, dropout=0.5, lm=3, ld=1, mmd_sample=500, lr=lr, weight_decay=weight_decay, use_mmd=True, use_gwn=True, use_edge_weight=True),
    'No_MMD': lambda data: FnRGNN(nfeat=data.x.size(1), hidden_dim=64, dropout=0.5, lm=3, ld=1, mmd_sample=500, lr=lr, weight_decay=weight_decay, use_mmd=False, use_gwn=True, use_edge_weight=True),
    'No_GWN': lambda data: FnRGNN(nfeat=data.x.size(1), hidden_dim=64, dropout=0.5, lm=3, ld=1, mmd_sample=500, lr=lr, weight_decay=weight_decay, use_mmd=True, use_gwn=False, use_edge_weight=True),
    'No_Edge': lambda data: FnRGNN(nfeat=data.x.size(1), hidden_dim=64, dropout=0.5, lm=3, ld=1, mmd_sample=500, lr=lr, weight_decay=weight_decay, use_mmd=True, use_gwn=True, use_edge_weight=False),
    'Vanilla': lambda data: FnRGNN(nfeat=data.x.size(1), hidden_dim=64, dropout=0.5, lm=3, ld=1, mmd_sample=500, lr=lr, weight_decay=weight_decay, use_mmd=False, use_gwn=False, use_edge_weight=False),
}

ab_md_list = ['Full', 'No_MMD', 'No_GWN', 'No_Edge', 'Vanilla']
ab_dt_list = ['region_job_r', 'region_job_2_r', 'nba_p', 'nba_m', 'german_g', 'german_f', 'german_s', 'german_t', 'german_h', 'german_e']

# train
for ds in ab_dt_list:
    data = dt_dict[ds]['data']
    data = data.to(device)

    cfg = dt_dict[ds]['cfg']
    dn = cfg['dn']

    for md in ab_md_list:
        print(f'Train Ablation study ({md}) dataset from {ds}')
        os.makedirs(f'./model/ablation', exist_ok=True) 
        model_path = f'./model/ablation/{md}_{dn}_md.pth'
        
        for run in range(runs + 1):
            model = model_map[md](data).to(device)

            best_score = float('inf')
            best_model_state = None

            for epoch in range(epochs + 1):
                loss = model.optimize(data)

                # validation
                model.eval()
                with torch.no_grad():
                    output, _ = model(data)

                    y_true, idx_val, sensitive_attr = data.y, data.idx_val, data.sensitive_attr
                    mse_val = mean_squared_error(y_true[idx_val].cpu().numpy().squeeze(), output[idx_val].cpu().numpy().squeeze())
                    mae_val = mean_absolute_error(y_true[idx_val].cpu().numpy().squeeze(), output[idx_val].cpu().numpy().squeeze())
                    mse_diff, mae_diff, mean_diff, var_diff = fair_metric_regression(output[idx_val].cpu(), y_true[idx_val].cpu(), sensitive_attr[idx_val].cpu())
                    dist_val = group_distribution_metrics(y_true[idx_val].cpu().numpy().squeeze(), output[idx_val].cpu().numpy().squeeze(), sensitive_attr[idx_val].cpu().numpy())
                    
                    val_score = mse_val + 0.5 * mean_diff
                    if val_score < best_score:
                        best_mse = val_score
                        best_model_state = copy.deepcopy(model.state_dict())
                        torch.save(best_model_state, model_path)
                    
                    if epoch % 100 == 0:
                        print(f"[{md}] Run {run}, Epoch {epoch} | "
                            f"Val MSE: {mse_val:.2f}, MAE: {mae_val:.2f}, \n"
                            f"MEAN Diff: {mean_diff:.2f}, Var Diff: {var_diff:.2f}, Wasserstein Diff: {dist_val['wasserstein']:.2f}")
                        

# test
ab_results = {}
for ds in ab_dt_list:
    print(f'\n>>> Evaluating Ablation Study Dataset: {ds}')
    data = dt_dict[ds]['data']
    data = data.to(device)

    cfg = dt_dict[ds]['cfg']
    dn = cfg['dn']

    for md in ab_md_list:
        print(f'Test {md} dataset from {ds}')
        model_path = f'./model/ablation/{md}_{dn}_md.pth'

        if not os.path.exists(model_path):
            print(f"[{md} - {ds}] 모델 파일 없음: {model_path}")
            ts_results.setdefault(md, {})[ds] = None
            continue

        result = []
        for run in range(runs + 1):
            try:
                model = model_map[md](data).to(device)
                state_dict = torch.load(model_path)
                model.load_state_dict(state_dict, strict=False)

                model.eval()
                with torch.no_grad():
                    output, _ = model(data)

                    y_true, idx_test, sensitive_attr = data.y, data.idx_test, data.sensitive_attr
                    mse_test = mean_squared_error(y_true[idx_test].cpu(), output[idx_test].cpu())
                    mae_test = mean_absolute_error(y_true[idx_test].cpu(), output[idx_test].cpu())
                    mse_diff, mae_diff, mean_diff, var_diff = fair_metric_regression(output[idx_test].cpu(), y_true[idx_test].cpu(), sensitive_attr[idx_test].cpu())
                    dist_test = group_distribution_metrics(y_true[idx_test].cpu().numpy().squeeze(), output[idx_test].cpu().numpy().squeeze(), sensitive_attr[idx_test].cpu().numpy())

                result.append([mse_test, mae_test, mean_diff, var_diff, dist_test['wasserstein']])

            except Exception as e:
                print(f"[{md} - {ds} - Run {run}] 오류 발생: {e}")
                result.append(None)

        ab_results.setdefault(md, {})[ds] = result
  

Using device: cuda:0
Train Ablation study (Full) dataset from region_job_r
[Full] Run 0, Epoch 0 | Val MSE: 0.96, MAE: 0.90, 
MEAN Diff: 0.00, Var Diff: 0.00, Wasserstein Diff: 0.00
[Full] Run 0, Epoch 100 | Val MSE: 0.54, MAE: 0.62, 
MEAN Diff: 0.00, Var Diff: 0.01, Wasserstein Diff: 0.01
[Full] Run 0, Epoch 200 | Val MSE: 0.50, MAE: 0.58, 
MEAN Diff: 0.00, Var Diff: 0.00, Wasserstein Diff: 0.01
[Full] Run 0, Epoch 300 | Val MSE: 0.48, MAE: 0.56, 
MEAN Diff: 0.00, Var Diff: 0.00, Wasserstein Diff: 0.01
[Full] Run 0, Epoch 400 | Val MSE: 0.47, MAE: 0.55, 
MEAN Diff: 0.02, Var Diff: 0.00, Wasserstein Diff: 0.01
[Full] Run 0, Epoch 500 | Val MSE: 0.46, MAE: 0.54, 
MEAN Diff: 0.01, Var Diff: 0.00, Wasserstein Diff: 0.00
[Full] Run 1, Epoch 0 | Val MSE: 0.97, MAE: 0.89, 
MEAN Diff: 0.01, Var Diff: 0.00, Wasserstein Diff: 0.01
[Full] Run 1, Epoch 100 | Val MSE: 0.54, MAE: 0.61, 
MEAN Diff: 0.00, Var Diff: 0.01, Wasserstein Diff: 0.00
[Full] Run 1, Epoch 200 | Val MSE: 0.50, MAE: 0.58, 
MEAN

In [20]:
# Ablation Results Check
metric_names = ['mse', 'mae', 'mean_diff', 'var_diff', 'wasserstein']

for md in ab_md_list:
    md_results = ab_results[md]

    summary_table = {}
    for dataset, runs_rst in md_results.items():
        if runs_rst is None:
            continue
        valid_runs = [r for r in runs_rst if r is not None]
        if not valid_runs:
            continue

        arr = np.array(valid_runs)
        means = np.mean(arr, axis=0)
        stds = np.std(arr, axis=0)
        summary_table[dataset] = [f"{mean:.4f}" for mean, std in zip(means, stds)]

    df_summary = pd.DataFrame(summary_table, index=metric_names)
    globals()[f'df_{md}_summary'] = df_summary

all_tables = {}
for ds in ab_dt_list:
    result_table = {}
    for md in [ 'Vanilla', 'No_Edge', 'No_MMD', 'No_GWN', 'Full']:
        df = globals().get(f'df_{md}_summary')
        if df is not None:
            if isinstance(df.index, pd.Index):
                filtered_df = df.loc[df.index.intersection(metric_names)]
                if ds in filtered_df.columns:
                    result_table[md] = filtered_df[ds]
    if result_table:
        df_combined = pd.DataFrame(result_table)
        df_combined.index.name = "Metric"
        all_tables[ds] = df_combined

        print(f'{ds} Results:')
        print(df_combined)


region_job_r Results:
            Vanilla No_Edge  No_MMD  No_GWN    Full
Metric                                             
mse          0.4092  0.4566  0.4589  0.4103  0.4581
mae          0.4934  0.5368  0.5425  0.4939  0.5372
mean_diff    0.0828  0.0023  0.0025  0.0635  0.0122
var_diff     0.0455  0.0075  0.0115  0.0420  0.0032
wasserstein  0.0182  0.0088  0.0119  0.0183  0.0168
region_job_2_r Results:
            Vanilla No_Edge  No_MMD  No_GWN    Full
Metric                                             
mse          0.4192  0.4645  0.4669  0.4193  0.4683
mae          0.5002  0.5403  0.5451  0.4994  0.5506
mean_diff    0.1433  0.0255  0.0019  0.1119  0.0150
var_diff     0.0205  0.0030  0.0093  0.0351  0.0013
wasserstein  0.0155  0.0034  0.0185  0.0266  0.0076
nba_p Results:
            Vanilla No_Edge  No_MMD  No_GWN    Full
Metric                                             
mse          0.3599  0.4939  0.5210  0.3697  0.5425
mae          0.4442  0.5057  0.5136  0.4502  0.5279
mea

In [21]:
# 모든 값을 숫자형(float)으로 변환 (변환 불가한 값은 NaN 처리)
nba_1 = all_tables['nba_m'].apply(pd.to_numeric, errors='coerce')
nba_2 = all_tables['nba_p'].apply(pd.to_numeric, errors='coerce')
# nba_3 = all_tables['german_s'].apply(pd.to_numeric, errors='coerce')
# nba_4 = all_tables['german_t'].apply(pd.to_numeric, errors='coerce')
# nba_5 = all_tables['german_g'].apply(pd.to_numeric, errors='coerce')
# nba_6 = all_tables['german_h'].apply(pd.to_numeric, errors='coerce')

# 두 DataFrame 더하기
combined_df = nba_1 + nba_2 
# + nba_3 + nba_4 + nba_5 + nba_6 

# 평균 계산
average_df = combined_df / 2

# 결과 출력
print(average_df)


             Vanilla  No_Edge   No_MMD   No_GWN     Full
Metric                                                  
mse          0.39185  0.55505  0.58305  0.39860  0.60095
mae          0.47955  0.56905  0.58055  0.48385  0.59550
mean_diff    0.11330  0.04745  0.05690  0.10870  0.05285
var_diff     0.11275  0.03790  0.03200  0.05700  0.01795
wasserstein  0.04765  0.05630  0.04665  0.05900  0.04005


In [26]:
all_tables['nba_m']

Unnamed: 0_level_0,Vanilla,No_Edge,No_MMD,No_GWN,Full
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
mse,0.4238,0.6162,0.6451,0.4275,0.6594
mae,0.5149,0.6324,0.6475,0.5175,0.6631
mean_diff,0.1692,0.0087,0.0265,0.149,0.0211
var_diff,0.1072,0.0578,0.0444,0.0862,0.0245
wasserstein,0.0705,0.0395,0.0229,0.0699,0.0077


In [22]:
# latex 테이블 생성
df_latex = average_df.reset_index()
df_latex['Metric'] = df_latex['Metric'].replace({'mse': 'MSE', 'mae': 'MAE', 'mean_diff': 'Mean Gap', 'var_diff': 'Var Gap', 'wasserstein': 'WD'})
print(df_latex.to_latex(index=False, float_format="%.4f"))

\begin{tabular}{lrrrrr}
\toprule
Metric & Vanilla & No_Edge & No_MMD & No_GWN & Full \\
\midrule
MSE & 0.3919 & 0.5551 & 0.5831 & 0.3986 & 0.6009 \\
MAE & 0.4796 & 0.5691 & 0.5805 & 0.4839 & 0.5955 \\
Mean Gap & 0.1133 & 0.0474 & 0.0569 & 0.1087 & 0.0528 \\
Var Gap & 0.1128 & 0.0379 & 0.0320 & 0.0570 & 0.0180 \\
WD & 0.0476 & 0.0563 & 0.0467 & 0.0590 & 0.0401 \\
\bottomrule
\end{tabular}

