In [1]:
cd ..

m:\OneDrive\Projects\federated_imputation


In [2]:
%load_ext autoreload
%autoreload
import os
import numpy as np
import json
from src.fed_imp.sub_modules.client.simple_client import SimpleClient
from src.modules.data_preprocessing import load_data
from src.modules.data_spliting import split_train_test
from src.fed_imp.sub_modules.server.load_server import load_server
import multiprocessing as mp


## code

In [27]:
# get all directories under root_dir
def get_all_dirs(root_dir, method):
    all_dirs, all_files = [], []
    for root, dirs, files in os.walk(root_dir):
        for dir in dirs:
            all_dirs.append(os.path.join(root, dir))
        for file in files:
            all_files.append(os.path.join(root, file))
    data_dir, exp_file = None, None
    for dir_ in all_dirs:
        if method in dir_:
            data_dir = dir_
    
    for file in all_files:
        if method in file and file.endswith(".json"):
            exp_file = file
    
    if data_dir is None or exp_file is None:
        raise ValueError("No folder for such method: {}".format(method))
    else:
        return data_dir, exp_file


In [5]:
def main_prediction(data_dir, server_name, server_config, server_pred_config, test_data, n_clients, round_):
    print("round: {}".format(round_))
    data_imp = np.load(os.path.join(data_dir, "imputed_data_{}.npy".format(round_)))
    data_true = np.load(os.path.join(data_dir, "origin_data_{}.npy".format(round_)))
    missing_mask = np.load(os.path.join(data_dir, "missing_mask_{}.npy".format(round_)))

    print("data_imp.shape: {}".format(data_imp.shape))
    print("data_true.shape: {}".format(data_true.shape))
    print("missing_mask.shape: {}".format(missing_mask.shape))

    # setup client
    clients = {}
    for client_id in range(n_clients):
        clients[client_id] = SimpleClient(
                client_id=client_id,
                data_imp=data_imp[client_id],
                missing_mask=missing_mask[client_id],
                data_true=data_true[client_id],
                data_test=test_data.values
            )
        

    # setup server
    server = load_server(
        server_name, clients = clients, server_config = server_config, pred_config = server_pred_config,
        test_data = test_data.values
    )

    # prediction
    ret = server.prediction()
    print(ret)
    return ret


def prediction(main_config, server_config_, pred_rounds, seed, mtp = False):

    data = main_config["data"]
    n_clients = main_config["n_clients"]
    sample_size = main_config["sample_size"]
    scenario = main_config["scenario"]
    mr = main_config["mr"]
    method = main_config["method"]
    # server
    server_name = server_config_["server_name"]
    server_pred_config = server_config_["server_pred_config"]
    server_config = server_config_["server_config"]
    server_config["pred_rounds"] = pred_rounds
    server_config["seed"] = seed 

    ###################################################################################
    # Main part
    ###################################################################################
    root_dir = "./results/raw_results/{}/{}/sample@p={}/{}/{}/".format(data, n_clients, sample_size, scenario, mr)
    data_dir, exp_file = get_all_dirs(root_dir, method)

    ####################################################################################
    # Find overall train and test data
    ####################################################################################
    with open(os.path.join(exp_file), 'r') as fp:
        exp_ret = json.load(fp)

    exp_config = exp_ret["params"]["config"]

    dataset_params = exp_config['data']
    data, data_config = load_data(**dataset_params)
    seed = exp_config['experiment']['seed']
    n_rounds = exp_config['experiment']['n_rounds']
    if n_rounds == 1:
        n_rounds_data = split_train_test(data, n_folds=2, seed=seed)
    else:
        n_rounds_data = split_train_test(data, n_folds=n_rounds, seed=seed)

    # n rounds average
    train_data, test_data = n_rounds_data[0]
    print("train_data.shape: {}".format(train_data.shape))
    print("test_data.shape: {}".format(test_data.shape))

    ####################################################################################
    # load clients imputed and original datas
    ####################################################################################
    print("n_rounds: {}".format(n_rounds))
    
    rets = []
    if mtp == False:
        for round_ in range(n_rounds):
            print("round: {}".format(round_))
            data_imp = np.load(os.path.join(data_dir, "imputed_data_{}.npy".format(round_)))
            data_true = np.load(os.path.join(data_dir, "origin_data_{}.npy".format(round_)))
            missing_mask = np.load(os.path.join(data_dir, "missing_mask_{}.npy".format(round_)))

            print("data_imp.shape: {}".format(data_imp.shape))
            print("data_true.shape: {}".format(data_true.shape))
            print("missing_mask.shape: {}".format(missing_mask.shape))

            # setup client
            clients = {}
            for client_id in range(n_clients):
                clients[client_id] = SimpleClient(
                        client_id=client_id,
                        data_imp=data_imp[client_id],
                        missing_mask=missing_mask[client_id],
                        data_true=data_true[client_id],
                        data_test=test_data.values
                    )
                

            # setup server
            server = load_server(
                server_name, clients = clients, server_config = server_config, pred_config = server_pred_config,
                test_data = test_data.values
            )

            # prediction
            ret = server.prediction()

            print(ret)
            rets.append(ret)
    else:
        n_process = n_rounds
        chunk_size = n_rounds // n_process
        rounds = list(range(n_rounds))

        with mp.Pool(n_process) as pool:
            process_args = [
                (data_dir, server_name, server_config, server_pred_config, test_data, n_clients, round_)
                for round_ in rounds]
            process_results = pool.starmap(main_prediction, process_args, chunksize=chunk_size)

        rets = process_results
    
    # average results
    average_ret = {}
    for key in rets[0].keys():
        average_ret[key] = np.mean([ret[key] for ret in rets])
        average_ret['{}_std'.format(key)] = np.std([ret[key] for ret in rets])

    print(average_ret)
    return average_ret   
    


## fedavgs

In [14]:
main_config_tmpl = {
    "data":"fed_imp10/0716/ijcnn_balanced",
    "n_clients":20,
    "sample_size":0.01,
    "scenario" :"mary_lr",
    "mr": "random_in_group2",
    "method":"fedavg-s"
}

server_config_tmpl = {
    "server_name": 'central_mlp_pytorch_pred',
    "server_pred_config" :{
        "model_params":{
            "model": "2nn",
            "num_hiddens": 128,
            "model_init_config": None,
            "model_other_params": None
        },
        "train_params":{
            "batch_size": 128,
            "learning_rate": 0.001,
            "weight_decay": 0.0001,
            "pred_round": 200,
            "pred_local_epochs": 3
        }
    },
    "server_config" :{
        'pred_rounds':3,
        'seed': 21
    }
}

pred_rounds = 3
seed = 21

In [8]:
prediction(main_config, server_config, pred_rounds=3, seed = 21, mtp = False)

train_data.shape: (25785, 23)
test_data.shape: (11051, 23)
n_rounds: 5
round: 0
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
INFO: Early stopping
INFO: Early stopping
INFO: Early stopping
{'accu_mean': 0.929689620848792, 'f1_mean': 0.9296262850228557, 'accu_std': 0.010404593057343252, 'f1_std': 0.01044335286537278}
round: 1
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
INFO: Early stopping
INFO: Early stopping
INFO: Early stopping
{'accu_mean': 0.9248635115977438, 'f1_mean': 0.9246560341113165, 'accu_std': 0.013271320562889263, 'f1_std': 0.013405751675288683}
round: 2
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
INFO: Early stopping
INFO: Early stopping
INFO: Early stopping
{'accu_mean': 0.928543419901668, 'f1_mean': 0.9285219829870274, 'accu_std': 0.005998910339587672, 'f1_std': 0.005978101210211205}
round: 3
data_imp.shape: (20, 258, 23)

{'accu_mean': 0.9275299369589479,
 'accu_mean_std': 0.0022169918852821134,
 'f1_mean': 0.9274173242510744,
 'f1_mean_std': 0.0022903594817103856,
 'accu_std': 0.01067014751601054,
 'accu_std_std': 0.0026643459812512307,
 'f1_std': 0.010735262058306827,
 'f1_std_std': 0.0027226469061304467}

In [15]:
data = 'ijcnn_balanced'
n_clients = 20
sample_size = 0.01
scenario = 'mary_lr'
method = 'fedavg-s'
sever_name = 'central_lr_pytorch_pred'

main_config_tmpl.update({
    "data":data,
    "n_clients":n_clients,
    "sample_size":sample_size,
    "scenario" :scenario,
    "method":method
})

server_config_tmpl.update({
    "server_name": sever_name
})

prediction(main_config_tmpl, server_config_tmpl, pred_rounds=3, seed = 21, mtp = False)

In [10]:
prediction(main_config_tmpl, server_config_tmpl, pred_rounds=3, seed = 21, mtp = False)

train_data.shape: (25785, 23)
test_data.shape: (11051, 23)
n_rounds: 5
round: 0
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
{'accu_mean': 0.8183573130636744, 'f1_mean': 0.818150640954534, 'accu_std': 0.002034617917418031, 'f1_std': 0.0020547695732984817}
round: 1
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
INFO: Early stopping
{'accu_mean': 0.8031852321056917, 'f1_mean': 0.8024093679255166, 'accu_std': 0.007423820499099393, 'f1_std': 0.007631806449603853}
round: 2
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
{'accu_mean': 0.7705185051126596, 'f1_mean': 0.7692518167582009, 'accu_std': 0.0031407391478889017, 'f1_std': 0.003180115730963238}
round: 3
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
{'accu_mean': 0.8183573130636744, 'f1_mean': 0.818150640954534, 'accu_std': 0.002034617917418031, 

{'accu_mean': 0.8027207190902784,
 'accu_mean_std': 0.017472379757325632,
 'f1_mean': 0.8020743669036605,
 'f1_mean_std': 0.017857421001578486,
 'accu_std': 0.00441152319618475,
 'accu_std_std': 0.0024924734537465003,
 'f1_std': 0.004510653555353582,
 'f1_std_std': 0.0025813273309418434}

## fedmechw

In [24]:
data = "fed_imp10/0716/ijcnn_balanced"
n_clients = 20
sample_size = 0.01
scenario = 'mary_lr'
method = 'fedmechw'
sever_name = 'central_mlp_pytorch_pred'

main_config_tmpl.update({
    "data":data,
    "n_clients":n_clients,
    "sample_size":sample_size,
    "scenario" :scenario,
    "method":method
})

server_config_tmpl.update({
    "server_name": sever_name
})

prediction(main_config_tmpl, server_config_tmpl, pred_rounds=3, seed = 21, mtp = False)

./results/raw_results/fed_imp10/0716/ijcnn_balanced/20/sample@p=0.01/mary_lr/random_in_group2/as_fedmechw@s_102931466@s_50@p_False ./results/raw_results/fed_imp10/0716/ijcnn_balanced/20/sample@p=0.01/mary_lr/random_in_group2/as_fedmechw@s_102931466@s_50@p_False.json
['./results/raw_results/fed_imp10/0716/ijcnn_balanced/20/sample@p=0.01/mary_lr/random_in_group2/as_fedavg-s@s_102931466@s_50@p_False', './results/raw_results/fed_imp10/0716/ijcnn_balanced/20/sample@p=0.01/mary_lr/random_in_group2/as_fedmechclw@s_102931466@s_50@p_False', './results/raw_results/fed_imp10/0716/ijcnn_balanced/20/sample@p=0.01/mary_lr/random_in_group2/as_fedmechw@s_102931466@s_50@p_False', './results/raw_results/fed_imp10/0716/ijcnn_balanced/20/sample@p=0.01/mary_lr/random_in_group2/as_local@s_102931466@s_50@p_False']
train_data.shape: (25785, 23)
test_data.shape: (11051, 23)
n_rounds: 5
round: 0
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
INFO: Early stopping
I

{'accu_mean': 0.929593098663771,
 'accu_mean_std': 0.005458799458961521,
 'f1_mean': 0.929546608764673,
 'f1_mean_std': 0.005477968673047355,
 'accu_std': 0.005559847464740322,
 'accu_std_std': 0.004093985668888116,
 'f1_std': 0.005575268888033823,
 'f1_std_std': 0.004067474476335251}

In [25]:
data = "fed_imp10/0716/ijcnn_balanced"
n_clients = 20
sample_size = 0.01
scenario = 'mary_lr'
method = 'fedmechw'
sever_name = 'central_lr_pytorch_pred'

main_config_tmpl.update({
    "data":data,
    "n_clients":n_clients,
    "sample_size":sample_size,
    "scenario" :scenario,
    "method":method
})

server_config_tmpl.update({
    "server_name": sever_name
})

prediction(main_config_tmpl, server_config_tmpl, pred_rounds=3, seed = 21, mtp = False)

./results/raw_results/fed_imp10/0716/ijcnn_balanced/20/sample@p=0.01/mary_lr/random_in_group2/as_fedmechw@s_102931466@s_50@p_False ./results/raw_results/fed_imp10/0716/ijcnn_balanced/20/sample@p=0.01/mary_lr/random_in_group2/as_fedmechw@s_102931466@s_50@p_False.json
['./results/raw_results/fed_imp10/0716/ijcnn_balanced/20/sample@p=0.01/mary_lr/random_in_group2/as_fedavg-s@s_102931466@s_50@p_False', './results/raw_results/fed_imp10/0716/ijcnn_balanced/20/sample@p=0.01/mary_lr/random_in_group2/as_fedmechclw@s_102931466@s_50@p_False', './results/raw_results/fed_imp10/0716/ijcnn_balanced/20/sample@p=0.01/mary_lr/random_in_group2/as_fedmechw@s_102931466@s_50@p_False', './results/raw_results/fed_imp10/0716/ijcnn_balanced/20/sample@p=0.01/mary_lr/random_in_group2/as_local@s_102931466@s_50@p_False']
train_data.shape: (25785, 23)
test_data.shape: (11051, 23)
n_rounds: 5
round: 0
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
{'accu_mean': 0.811449

{'accu_mean': 0.7958555786806623,
 'accu_mean_std': 0.02564375811858789,
 'f1_mean': 0.7953998829861101,
 'f1_mean_std': 0.026083408428633773,
 'accu_std': 0.0016608144515552529,
 'accu_std_std': 0.0003794910992487674,
 'f1_std': 0.0016761135706620603,
 'f1_std_std': 0.00037514621774264073}

## mnar

In [28]:
data = "fed_imp10/0716/ijcnn_balanced"
n_clients = 20
sample_size = 0.01
scenario = 'mnar_lr'
method = 'fedavg-s'
sever_name = 'central_mlp_pytorch_pred'

main_config_tmpl.update({
    "data":data,
    "n_clients":n_clients,
    "sample_size":sample_size,
    "scenario" :scenario,
    "method":method
})

server_config_tmpl.update({
    "server_name": sever_name
})

prediction(main_config_tmpl, server_config_tmpl, pred_rounds=3, seed = 21, mtp = False)

train_data.shape: (25785, 23)
test_data.shape: (11051, 23)
n_rounds: 5
round: 0
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
INFO: Early stopping
INFO: Early stopping
INFO: Early stopping
{'accu_mean': 0.8496666968298494, 'f1_mean': 0.8482336387421978, 'accu_std': 0.005481572400701579, 'f1_std': 0.006490153436981167}
round: 1
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
INFO: Early stopping
INFO: Early stopping
INFO: Early stopping
{'accu_mean': 0.8784122100564051, 'f1_mean': 0.8774162295224558, 'accu_std': 0.017909058000727028, 'f1_std': 0.018542305644729893}
round: 2
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
INFO: Early stopping
INFO: Early stopping
INFO: Early stopping
{'accu_mean': 0.8372696286912195, 'f1_mean': 0.8342277432603958, 'accu_std': 0.015619054113394036, 'f1_std': 0.01666647117040593}
round: 3
data_imp.shape: (20, 258, 2

{'accu_mean': 0.8586854884927458,
 'accu_mean_std': 0.016730829892485158,
 'f1_mean': 0.8571054959579406,
 'f1_mean_std': 0.017354325485368305,
 'accu_std': 0.01248006298325025,
 'accu_std_std': 0.005775101384092605,
 'f1_std': 0.01334627786676561,
 'f1_std_std': 0.005639751365187388}

In [29]:
data = "fed_imp10/0716/ijcnn_balanced"
n_clients = 20
sample_size = 0.01
scenario = 'mnar_lr'
method = 'fedmechw'
sever_name = 'central_mlp_pytorch_pred'

main_config_tmpl.update({
    "data":data,
    "n_clients":n_clients,
    "sample_size":sample_size,
    "scenario" :scenario,
    "method":method
})

server_config_tmpl.update({
    "server_name": sever_name
})

prediction(main_config_tmpl, server_config_tmpl, pred_rounds=3, seed = 21, mtp = False)

train_data.shape: (25785, 23)
test_data.shape: (11051, 23)
n_rounds: 5
round: 0
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
INFO: Early stopping
INFO: Early stopping
INFO: Early stopping
{'accu_mean': 0.8755768708714143, 'f1_mean': 0.8752631161518317, 'accu_std': 0.009425982565672656, 'f1_std': 0.00951227865222609}
round: 1
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
INFO: Early stopping
INFO: Early stopping
INFO: Early stopping
{'accu_mean': 0.9008837812565981, 'f1_mean': 0.9007866687854449, 'accu_std': 0.007314680669036828, 'f1_std': 0.007275863118102666}
round: 2
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
INFO: Early stopping
INFO: Early stopping
INFO: Early stopping
{'accu_mean': 0.887762796730311, 'f1_mean': 0.887059641574278, 'accu_std': 0.017178773403524954, 'f1_std': 0.017888501513687215}
round: 3
data_imp.shape: (20, 258, 23)

{'accu_mean': 0.8881368201972671,
 'accu_mean_std': 0.011319139368220649,
 'f1_mean': 0.8878318422897662,
 'f1_mean_std': 0.01142100789310709,
 'accu_std': 0.010132019974588784,
 'accu_std_std': 0.003647698262165269,
 'f1_std': 0.010292957010868947,
 'f1_std_std': 0.003927261763853654}

In [30]:
data = "fed_imp10/0716/ijcnn_balanced"
n_clients = 20
sample_size = 0.01
scenario = 'mnar_lr'
method = 'fedavg-s'
sever_name = 'central_lr_pytorch_pred'

main_config_tmpl.update({
    "data":data,
    "n_clients":n_clients,
    "sample_size":sample_size,
    "scenario" :scenario,
    "method":method
})

server_config_tmpl.update({
    "server_name": sever_name
})

prediction(main_config_tmpl, server_config_tmpl, pred_rounds=3, seed = 21, mtp = False)

train_data.shape: (25785, 23)
test_data.shape: (11051, 23)
n_rounds: 5
round: 0
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
{'accu_mean': 0.7964286791542244, 'f1_mean': 0.796358710441727, 'accu_std': 0.002813757760689837, 'f1_std': 0.0028623041695184062}
round: 1
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
{'accu_mean': 0.7958555786806624, 'f1_mean': 0.7956081903758462, 'accu_std': 0.0015267457828665755, 'f1_std': 0.0016327876399726935}
round: 2
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
{'accu_mean': 0.7541097336590957, 'f1_mean': 0.7540715614381358, 'accu_std': 0.0037545590766338427, 'f1_std': 0.003701582281981298}
round: 3
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
{'accu_mean': 0.7964286791542244, 'f1_mean': 0.796358710441727, 'accu_std': 0.002813757760689837, 'f1_std': 0.0028623

{'accu_mean': 0.7877356498657739,
 'accu_mean_std': 0.016814911507801473,
 'f1_mean': 0.7876010726146564,
 'f1_mean_std': 0.016768115159670256,
 'accu_std': 0.0024871132327493337,
 'accu_std_std': 0.0008560868577470031,
 'f1_std': 0.002538353180192699,
 'f1_std_std': 0.0008003859475885393}

In [31]:
data = "fed_imp10/0716/ijcnn_balanced"
n_clients = 20
sample_size = 0.01
scenario = 'mnar_lr'
method = 'fedmechw'
sever_name = 'central_lr_pytorch_pred'

main_config_tmpl.update({
    "data":data,
    "n_clients":n_clients,
    "sample_size":sample_size,
    "scenario" :scenario,
    "method":method
})

server_config_tmpl.update({
    "server_name": sever_name
})

prediction(main_config_tmpl, server_config_tmpl, pred_rounds=3, seed = 21, mtp = False)

train_data.shape: (25785, 23)
test_data.shape: (11051, 23)
n_rounds: 5
round: 0
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
{'accu_mean': 0.822218200464513, 'f1_mean': 0.8221254090198817, 'accu_std': 0.001831280335011687, 'f1_std': 0.0018488596414887519}
round: 1
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
{'accu_mean': 0.7996259765330439, 'f1_mean': 0.799408219637081, 'accu_std': 0.0027727168447576776, 'f1_std': 0.002820680882427079}
round: 2
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
{'accu_mean': 0.8000180979096915, 'f1_mean': 0.7999372672783912, 'accu_std': 0.0033033849711650532, 'f1_std': 0.0033031205642740295}
round: 3
data_imp.shape: (20, 258, 23)
data_true.shape: (20, 258, 23)
missing_mask.shape: (20, 258, 22)
{'accu_mean': 0.822218200464513, 'f1_mean': 0.8221254090198817, 'accu_std': 0.001831280335011687, 'f1_std': 0.00184885

{'accu_mean': 0.8087412903809611,
 'accu_mean_std': 0.011004782512918304,
 'f1_mean': 0.8086009049184634,
 'f1_mean_std': 0.011044400978795806,
 'accu_std': 0.0025022758661407568,
 'accu_std_std': 0.0005811234721767397,
 'f1_std': 0.0025284403224211383,
 'f1_std_std': 0.0005821680806666538}