Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
jbkoh committed Jun 21, 2018
1 parent 486037a commit 6d57f0a
Show file tree
Hide file tree
Showing 9 changed files with 256 additions and 28 deletions.
1 change: 1 addition & 0 deletions plastering/inferencers/inferencer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import time
import pdb
import random
from copy import deepcopy
Expand Down
16 changes: 9 additions & 7 deletions plastering/inferencers/scrabble_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,32 +185,34 @@ def apply_filter_by_zodiac(self, pred):
triple = (BASE[srcid], RDF.type, BRICK[point_tagset])
if self.prior_confidences[triple] > 0.8:
self.zodiac_good_preds[srcid] = point_tagset
fixed_cnt = 0
for srcid, pred_tagsets in pred.items():
pred_point_tagset = sel_point_tagset(pred_tagsets, srcid)
good_point_tagset = self.zodiac_good_preds.get(srcid, None)
if not good_point_tagset:
continue
if not self.is_same_tagset(pred_point_tagset, good_point_tagset):
pred_tagsets = [tagset for tagset in pred_tagsets
if self.is_same_tagset(tagset,
pred_point_tagset)]
if not is_point_tagset(tagset)]
pred_tagsets.append(good_point_tagset)
print('FIXED {0}, {1} -> {2}'.format(srcid,
pred_point_tagset,
good_point_tagset))
fixed_cnt += 1
pred[srcid] = pred_tagsets
print('TOTAL_FIXED_POINTS: {0}'.format(fixed_cnt))
return pred

def select_informative_samples(self, sample_num=10):
# Use prior (e.g., from Zodiac.)
new_srcids = []
if self.apply_validating_samples:
new_srcids += self.apply_prior_zodiac(sample_num)
#if self.apply_validating_samples:
# new_srcids += self.apply_prior_zodiac(sample_num)
if len(new_srcids) < sample_num:
new_srcids += self.scrabble.select_informative_samples(
sample_num * 3 - len(new_srcids))
new_srcids = [srcid for srcid in new_srcids
if srcid not in self.zodiac_good_preds][0:sample_num]
sample_num - len(new_srcids))
#new_srcids = [srcid for srcid in new_srcids
# if srcid not in self.zodiac_good_preds][0:sample_num]
return new_srcids


Expand Down
6 changes: 4 additions & 2 deletions plastering/inferencers/zodiac_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ def __init__(self,
sample_num_list = config['sample_num_list']
else:
sample_num_list = [0] * (len(source_buildings) + 1) # +1 for target
if 'use_quiver' in config:
self.use_quiver = config['use_quiver']
else:
self.use_quiver = False
if len(self.source_buildings) > len(sample_num_list):
sample_num_list.append(0)

Expand Down Expand Up @@ -301,8 +305,6 @@ def calc_prior_g_acc(self):
acc += 1
acc = 0 if not cnt else acc / cnt
print('Accuracy: {0}'.format(acc))
pdb.set_trace()


def apply_prior_augment_samples(self):
prior_preds = {}
Expand Down
141 changes: 137 additions & 4 deletions result/arka/parse_result.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,150 @@
import os
import sys
import pdb
import re
from copy import deepcopy
from operator import itemgetter
import json

import pandas as pd

dir_path = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, dir_path + '/../..')
from plastering.metadata_interface import *
from plastering.evaluator import *

target_building = 'sdh'
currfile = __file__
base_dir = os.path.dirname(currfile)
target_dir = base_dir + '/' + target_building
cluster_sizes = {}


def get_number(s):
return int(re.findall('\\d+', s)[0])

def is_finished():
for cid, curr_eid in curr_eids.items():
if curr_eid < len(qualified_examples_nums[cid]) - 1:
return False
return True

def select_next_cid():
ordered_cids = [row[0] for row in
sorted(curr_cluster_sizes.items(),
key=itemgetter(1),
reverse=True)]
for cid in ordered_cids:
curr_eid = curr_eids[cid]
if curr_eid < len(qualified_examples_nums[cid]) - 1:
return cid
raise Exception('cannot find cids without finishing the algorithm. A bug')

def get_srcid(name):
return '_'.join(re.findall('[a-zA-Z0-9]+', name))

orig_cluster_sizes = {}
total_names = []
for filename in os.listdir(target_dir):
if not re.match('{0}-ORIGINAL-METADATA-\\d+$'.format(target_building.upper()),
filename):
continue
cid = get_number(filename)
with open(target_dir + '/' + filename, 'r') as fp:
names = fp.readlines()
orig_cluster_sizes[cid] = len(names)
total_names += names
total_names = list(set(total_names))
total_srcids = [get_srcid(name) for name in total_names]
curr_cluster_sizes = deepcopy(orig_cluster_sizes)

true_tagsets = {srcid: LabeledMetadata.objects(srcid=srcid).first().tagsets
for srcid in total_srcids}
true_points = {srcid: LabeledMetadata.objects(srcid=srcid).first().point_tagset
for srcid in total_srcids}

qualified_examples_nums = {}
for filename in os.listdir(target_dir):
if not re.match('l-ex-\\d+-out$', filename):
continue
cid = get_number(filename)
df = pd.read_csv(target_dir + '/' + filename)
df.columns = df.columns.str.strip()
cluster_id = int(re.findall('\\d+', filename)[0])
coverages = df['fullyQualified'].tolist()
pdb.set_trace()
coverages = df['Num Examples Thought to be fully qualified'].tolist()
qualified_examples_nums[cid] = coverages


inferred_points_dict = {i: {} for i in curr_cluster_sizes.keys()}
for filename in os.listdir(target_dir):
if not re.match('l-ex-\\d+-out-points-qualified$', filename):
continue
cid = get_number(filename)
with open(target_dir + '/' + filename, 'r') as fp:
lines = fp.readlines()
for line in lines:
ex_id = int(line.split(' ')[0])
if "'" not in line:
items = []
else:
items = line.split('[')[-1].split(']')[0][1:-1].split("', '")
inferred_points_dict[cid][ex_id] = items

pred = {}

curr_eids = {i: 0 for i in curr_cluster_sizes.keys()}


total_num = sum(orig_cluster_sizes.values())

pred_names = set()
cnt = 0
accs = []
f1s = []
mf1s = []
anymf1s = []
srcids = []
pred = {srcid: [] for srcid in total_srcids}
point_pred = {srcid: [] for srcid in total_srcids}
res = []

while not is_finished():
# select cluster
#max_cid = max(curr_cluster_sizes.items(), key=itemgetter(1))[0]
cnt += 1
max_cid = select_next_cid()
curr_eids[max_cid] += 1
curr_eid = curr_eids[max_cid]
found_names = set(inferred_points_dict[max_cid][curr_eid])
new_names = found_names - pred_names
new_srcids = [get_srcid(name) for name in new_names]
pred_names = pred_names.union(new_names)
curr_cluster_sizes[max_cid] = orig_cluster_sizes[max_cid] - len(found_names)
acc = len(pred_names) / total_num
print('{0}\tacc: {1}'.format(cnt, acc))
pred.update({srcid: LabeledMetadata.objects(srcid=srcid).first().tagsets
for srcid in new_srcids})
point_pred.update({
srcid: LabeledMetadata.objects(srcid=srcid).first().point_tagset
for srcid in new_srcids})
anymf1 = get_macro_f1(true_tagsets, pred)
mf1 = get_macro_f1(true_points, point_pred)
f1 = get_micro_f1(true_points, point_pred)
#mf1s.append(mf1)
#f1s.append(f1)
#anymf1s.append(anymf1)
#accs.append(acc)
#srcids.append(len(pred_names))
row = {
'metrics': {
'f1': f1,
'macrof1': mf1,
'accuracy': acc,
'macrof1-all': anymf1
},
'learning_srcids': cnt
}
res.append(row)


with open('result/pointonly_notransfer_arka_{0}_0.json'.format(target_building),
'w') as fp:
json.dump(res, fp)
2 changes: 1 addition & 1 deletion scripts/exp_scrabble_zodiac.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,6 @@
'metrics': hist['metrics'],
'learning_srcids': len(hist['total_training_srcids'])
} for hist in workflow.history]
with open('result/scrabble_zodiac_{0}_{1}.json'
with open('result/scrabble_zodiac_{0}_{1}_onlyfiltering.json'
.format(target_building, exp_id), 'w') as fp:
json.dump(history, fp)
3 changes: 2 additions & 1 deletion scripts/exp_zodiac.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import sys, os
import pdb
import json
os.environ['TRIPLE_STORE_TYPE'] = "rdflib"
dir_path = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, dir_path + '/..')
#sys.path.append(os.path.abspath(os.path.join(dir_path + '/..', 'config')))
Expand All @@ -10,7 +11,7 @@
from plastering.metadata_interface import *
import pdb

EXP_NUM = 4
EXP_NUM = 2

target_building = sys.argv[1]
try:
Expand Down
91 changes: 87 additions & 4 deletions scripts/result_drawer.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ def plot_entities():
if target_building != 'sdh':
continue
exp_num = 1
elif inferencer_name == 'scrabble':
exp_num = 2
else:
exp_num = EXP_NUM
# Notransfer
Expand Down Expand Up @@ -407,8 +409,8 @@ def plot_scrabble_zodiac():
fig, ax = plt.subplots(1, 1)
xticks, xticks_labels, yticks, yticks_labels, xlim, ylim, interp_x, \
xlabel, ylabel, linestyles, xtickRotate = get_grid_params(
ymin = 0, ymax = 40, ydelta = 5,
xmin = 10, xmin2=50, xmax = 150, xdelta=50)
ymin = 0, ymax = 35, ydelta = 5,
xmin = 10, xmin2=50, xmax = 250, xdelta=50)
ylabel = 'Count'
# Baseline (Naive Zodiac)
with open('result/scrabble_zodiac.json', 'r') as fp:
Expand All @@ -435,6 +437,86 @@ def plot_scrabble_zodiac():
fig.set_size_inches((1.5,1.7))
save_fig(fig, outputfile)

def plot_ba_zodiac():
EXP_NUM = 2
building = 'ebu3b'
outputfile = FIG_DIR + '/ba_zodiac.pdf'
fig, ax = plt.subplots(1, 1)
xticks, xticks_labels, yticks, yticks_labels, xlim, ylim, interp_x, \
xlabel, ylabel, linestyles, xtickRotate = get_grid_params()

title = building_anon_map[building]

# Baseline (Naive Zodiac)
xs = []
ys = []
xss = []
f1s = []
mf1s = []
for i in range(0, EXP_NUM):
with open('result/pointonly_notransfer_zodiac_{0}_{1}.json'
.format(building, i)) as fp:
data = json.load(fp)
xss.append([datum['learning_srcids'] for datum in data])
f1s.append([datum['metrics']['f1'] for datum in data])
mf1s.append([datum['metrics']['macrof1'] for datum in data])
xs = xss[0] # Assuming all xss are same.
f1 = average_data(xss, f1s, interp_x)
mf1 = average_data(xss, mf1s, interp_x)
x = interp_x
ys = [f1, mf1]
legends = ['MicroF1, {0}'.format('Zodiac'),
'MacroF1, {0}'.format('Zodiac')
]

_, plots = plotter.plot_multiple_2dline(
x, ys, xlabel, ylabel, xticks, xticks_labels,
yticks, yticks_labels, title, ax, fig, ylim, xlim, legends,
linestyles=[linestyles.pop()]*len(ys), cs=colors,
xtickRotate=xtickRotate)

# Baseline (Naive Zodiac)
xs = []
ys = []
xss = []
f1s = []
mf1s = []
for i in range(0, EXP_NUM):
with open('result/ba_zodiac_{0}_{1}.json'
.format(building, i)) as fp:
data = json.load(fp)
xss.append([datum['learning_srcids'] for datum in data])
f1s.append([datum['metrics']['f1'] for datum in data])
mf1s.append([datum['metrics']['macrof1'] for datum in data])
xs = xss[0] # Assuming all xss are same.
f1 = average_data(xss, f1s, interp_x)
mf1 = average_data(xss, mf1s, interp_x)
x = interp_x
ys = [f1, mf1]
legends = ['MicroF1, {0}'.format('BA/Zodiac'),
'MacroF1, {0}'.format('BA/Zodiac')
]
xtickRotate = 45

_, plots = plotter.plot_multiple_2dline(
x, ys, xlabel, ylabel, xticks, xticks_labels,
yticks, yticks_labels, title, ax, fig,
ylim=ylim, xlim=xlim,
dataLabels=legends,
linestyles=[linestyles.pop()]*len(ys), cs=colors,
xtickRotate=xtickRotate)



ax.grid(True)
ax.tick_params(axis='x', pad=-1.5)
#ax.xaxis.set_label_coords(1.1, -0.2)

ax.legend(bbox_to_anchor=(1.26, 1.75), ncol=1, frameon=False, fontsize='small')
#fig.set_size_inches((8,2))
fig.set_size_inches((1.5,1.7))
save_fig(fig, outputfile)

def plot_quiver_zodiac():
EXP_NUM = 2
building = 'ebu3b'
Expand Down Expand Up @@ -520,5 +602,6 @@ def plot_quiver_zodiac():
#plot_pointonly_notransfer()
#plot_pointonly_transfer()
#plot_quiver_zodiac()
plot_entities()
#plot_scrabble_zodiac()
#plot_entities()
plot_scrabble_zodiac()
#plot_ba_zodiac()
11 changes: 8 additions & 3 deletions scripts/run_hong_all.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
#!/usr/bin/env python
(nohup python -u scripts/exp_hong_al.py ebu3b sdh > nohup.hong.ebu3b.sdh; slack_notify --msg 'hong ebu3b sdh at ozone') &
(nohup python -u scripts/exp_hong_al.py sdh ebu3b > nohup.hong.sdh.ebu3b; slack_notify --msg 'hong sdh ebu3b at ozone') &
(nohup python -u scripts/exp_hong_al.py ebu3b ap_m > nohup.hong.ebu3b.ap_m; slack_notify --msg 'hong ebu3b ap_m at ozone') &
(nohup python -u scripts/exp_hong_al.py ebu3b sdh > nohup.hong.ebu3b.sdh; slack_notify --msg 'nohup.hong.ebu3b.sdh at labpc')
(nohup python -u scripts/exp_hong_al.py sdh ebu3b > nohup.hong.sdh.ebu3b; slack_notify --msg 'nohup.hong.sdh.ebu3b at labpc')
(nohup python -u scripts/exp_hong_al.py ebu3b ap_m > nohup.hong.ebu3b.ap_m; slack_notify --msg 'nohup.hong.ebu3b.ap_m at labpc')

(nohup python -u scripts/exp_hong_al.py ebu3b > nohup.hong.ebu3b; slack_notify --msg 'nohup.hong.ebu3b at labpc')
(nohup python -u scripts/exp_hong_al.py sdh > nohup.hong.sdh; slack_notify --msg 'nohup.hong.sdh at labpc')
(nohup python -u scripts/exp_hong_al.py ghc > nohup.hong.ghc; slack_notify --msg 'nohup.hong.ghc at labpc')
(nohup python -u scripts/exp_hong_al.py uva_cse > nohup.hong.uva_cse; slack_notify --msg 'nohup.hong.uva_cse at labpc')
13 changes: 7 additions & 6 deletions scripts/run_zodiac_all.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/usr/bin/env python
(nohup python -u scripts/exp_zodiac.py ebu3b sdh > nohup.zodiac.ebu3b.sdh; slack_notify --msg 'zodiac ebu3b sdh at ozone') &
(nohup python -u scripts/exp_zodiac.py sdh ebu3b > nohup.zodiac.sdh.ebu3b; slack_notify --msg 'zodiac sdh ebu3b at ozone') &
(nohup python -u scripts/exp_zodiac.py ebu3b ap_m > nohup.zodiac.ebu3b.ap_m; slack_notify --msg 'zodiac ebu3b ap_m at ozone') &
(nohup python -u scripts/exp_zodiac.py ebu3b sdh > nohup.zodiac.ebu3b.sdh; slack_notify --msg 'nohup.zodiac.ebu3b.sdh at lab-pc') &
(nohup python -u scripts/exp_zodiac.py sdh ebu3b > nohup.zodiac.sdh.ebu3b; slack_notify --msg 'nohup.zodiac.sdh.ebu3b at lab-pc') &
(nohup python -u scripts/exp_zodiac.py ebu3b ap_m > nohup.zodiac.ebu3b.ap_m; slack_notify --msg 'nohup.zodiac.ebu3b.ap_m') &

(nohup python -u scripts/exp_zodiac.py ebu3b sdh > nohup.zodiac.ebu3b.sdh; slack_notify --msg 'zodiac ebu3b sdh at ozone') &
(nohup python -u scripts/exp_zodiac.py sdh ebu3b > nohup.zodiac.sdh.ebu3b; slack_notify --msg 'zodiac sdh ebu3b at ozone') &
(nohup python -u scripts/exp_zodiac.py ebu3b ap_m > nohup.zodiac.ebu3b.ap_m; slack_notify --msg 'zodiac ebu3b ap_m at ozone') &
(nohup python -u scripts/exp_zodiac.py ebu3b > nohup.zodiac.ebu3b; slack_notify --msg 'zodiac ebu3b sdh at ozone') &
(nohup python -u scripts/exp_zodiac.py sdh > nohup.zodiac.sdh; slack_notify --msg 'nohup.zodiac.sdh') &
(nohup python -u scripts/exp_zodiac.py uva_cse > nohup.zodiac.uva_cse; slack_notify --msg 'nohup.zodiac.uva_cse') &
(nohup python -u scripts/exp_zodiac.py ghc > nohup.zodiac.ghc ; slack_notify --msg 'nohup.zodiac.ghc') &

0 comments on commit 6d57f0a

Please sign in to comment.