-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval_mc.py
135 lines (115 loc) · 5.51 KB
/
eval_mc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
from utils.eval_utils import model_fn, model_fn_eval, eval, eval_with_training_dataset
import utils.train_utils as tu
from utils.dataset import *
from utils.log_utils import create_tb_logger
from utils.utils import *
if __name__ == "__main__":
#TODO: Simplifiy and automate the process
#Create directory for storing results
output_dirs = {}
output_dirs["boston"] = os.path.join("./", "output_mc", "boston")
output_dirs["concrete"] = os.path.join("./", "output_mc", "concrete")
output_dirs["energy"] = os.path.join("./", "output_mc", "energy")
output_dirs["kin8nm"] = os.path.join("./", "output_mc", "kin8nm")
output_dirs["naval"] = os.path.join("./", "output_mc", "naval")
output_dirs["power_plant"] = os.path.join("./", "output_mc", "power_plant")
output_dirs["protein"] = os.path.join("./", "output_mc", "protein")
output_dirs["wine"] = os.path.join("./", "output_mc", "wine")
output_dirs["yacht"] = os.path.join("./", "output_mc", "yacht")
# output_dirs["year"] = os.path.join("./", "output_mc", "year")
ckpt_dirs = {}
for key, val in output_dirs.items():
os.makedirs(val, exist_ok=True)
ckpt_dirs[key] = os.path.join(val, 'ckpts')
os.makedirs(ckpt_dirs[key], exist_ok=True)
# some cfgs, some cfg will be used in the future
# TODO::put all kinds of cfgs and hyperparameter into a config file. e.g. yaml
cfg = {}
cfg["ckpt"] = None
cfg["num_epochs"] = 40
cfg["ckpt_save_interval"] = 20
cfg["batch_size"] = 100
cfg["pdrop"] = 0.1
cfg["grad_norm_clip"] = None
cfg["num_networks"] = 50
data_dirs = {}
for key, val in output_dirs.items():
data_dirs[key] = os.path.join("./data", key)
data_files = {}
for key, _ in data_dirs.items():
data_files[key] = ["{}_train.csv".format(key), "{}_eval.csv".format(key), "{}_test.csv".format(key)]
train_datasets = {}
train_loaders = {}
eval_datasets = {}
eval_loaders = {}
for key, fname in data_files.items():
train_datasets[key] = UCIDataset(os.path.join(data_dirs[key], fname[0]), testing=True)
train_loaders[key] = torch.utils.data.DataLoader(train_datasets[key],
batch_size=cfg["batch_size"],
num_workers=0,
collate_fn=train_datasets[key].collate_batch)
# Prepare model
print("Prepare model")
from model.fc import FC, FC2
models = {}
for key, dataset in train_datasets.items():
if key in ["protein", "year"]:
models[key] = FC2(dataset.input_dim, cfg["pdrop"])
else:
models[key] = FC(dataset.input_dim, cfg["pdrop"])
models[key].cuda()
# Logging
tb_loggers = {}
for key, val in output_dirs.items():
tb_loggers[key] = create_tb_logger(val)
#Testing
print("Start testing")
# Currently the test dataset is the same as training set. TODO: K-Flod cross validation
test_datasets = {}
test_loaders = {}
for key, fname in data_files.items():
test_datasets[key] = UCIDataset(os.path.join(data_dirs[key], fname[2]), testing=True)
test_loaders[key] = torch.utils.data.DataLoader(test_datasets[key],
batch_size=cfg["batch_size"],
num_workers=0,
collate_fn=test_datasets[key].collate_batch)
cur_ckpts = {}
for key, ckpt_dir in ckpt_dirs.items():
cur_ckpts[key] = '{}.pth'.format(os.path.join(ckpt_dir, "ckpt_e{}".format(40)))
print("loading checkpoint ckpt_e{}".format(40))
models[key].load_state_dict(torch.load(cur_ckpts[key])["model_state"])
models[key].train()
# Summarize the result into table and save it
results = {}
for key, model in models.items():
print("==================================Evaluating {}==========================================".format(key))
result = eval(model, test_loader=test_loaders[key], cfg=cfg, output_dir=output_dirs[key], tb_logger=tb_loggers[key], title='test-'+key)
results[key] = result
#TODO below function is to generate figures for training dataset as requested by Joachim
eval_with_training_dataset(model, train_loaders[key], cfg=cfg, output_dir=output_dirs[key], tb_logger=tb_loggers[key], title='train-'+key)
print("Finished\n")
dataset_list = []
NLL_list = []
NLL_without_v_Noise_list = []
RMSE_list = []
NLL_over_cap_cnt = []
cap = 0
for key, val in results.items():
dataset_list.append(key)
NLL_list.append(val[0][0])
RMSE_list.append(val[0][1])
NLL_over_cap_cnt.append(val[2][0])
cap = val[2][1]
NLL_without_v_Noise_list.append(val[3][0])
err_df = pd.DataFrame(index=range(len(dataset_list)), columns=["Datasets", "RMSE", "NLL", "NLL_no_v_noise"])
err_df["Datasets"] = pd.DataFrame(dataset_list)
err_df["RMSE"] = pd.DataFrame(RMSE_list)
err_df["NLL"] = pd.DataFrame(NLL_list)
err_df["NLL_no_v_noise"] = pd.DataFrame(NLL_without_v_Noise_list)
err_sum_dir = "./output_mc/err_summary"
os.makedirs(err_sum_dir, exist_ok=True)
err_df.to_csv(os.path.join(err_sum_dir, "err_summary.csv"))
plot_NLL_cap_cnt(dataset_list, NLL_over_cap_cnt, cap, err_sum_dir)
#Finalizing
print("Analysis finished\n")
#TODO: integrate logging, visualiztion, GPU data parallel etc in the future