Skip to content

Commit

Permalink
a runnable version
Browse files Browse the repository at this point in the history
  • Loading branch information
yangarbiter authored and lsc36 committed Oct 7, 2015
1 parent 7074f0d commit ef7dbd2
Showing 1 changed file with 58 additions and 29 deletions.
87 changes: 58 additions & 29 deletions libact/query_strategies/active_learning_by_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,20 @@
"""
from libact.base.interfaces import QueryStrategy
import numpy as np
import copy


class ActiveLearmingByLearning(QueryStrategy):
class ActiveLearningByLearning(QueryStrategy):

def __init__(self, *args, **kwargs):
""" """
""" ALBL """
super(ActiveLearningByLearning, self).__init__(*args, **kwargs)
self.models = kwargs.pop('models', None)
if self.models is none:
if self.models is None:
raise TypeError(
"__init__() missing required keyword-only argument: 'models'"
)
elif not models:
elif not self.models:
raise ValueError("models list is empty")

# parameters for Exp4.p
Expand All @@ -23,22 +24,29 @@ def __init__(self, *args, **kwargs):
# query budget
self.T = kwargs.pop('T', 100)

self.unlabeled_entry_ids, X_pool = zip(*self.dataset.get_unlabeled_entries())
self.invert_id_idx = {}
for i, entry in enumerate(self.dataset.get_unlabeled_entries()):
self.invert_id_idx[entry[0]] = i

self.exp4p = Exp4P(
experts = self.models,
T = self.T,
delta = self.delta,
K = self.dataset.len_unlabeled()
K = self.dataset.len_unlabeled(),
invert_id_idx = self.invert_id_idx,
idx_id = self.unlabeled_entry_ids
)
self.budget_used = 0

# Classifier instance
sef.clf = kwargs.pop('clf', None)
if self.clf is none:
self.clf = kwargs.pop('clf', None)
if self.clf is None:
raise TypeError(
"__init__() missing required keyword-only argument: 'models'"
)

self.reward = -1
self.reward = -1.
self.W = []
self.queried_hist = []

Expand All @@ -48,9 +56,9 @@ def calc_reward_fn(self):

# reward function: Importance-Weighted-Accuracy (IW-ACC) (tau, f)
self.reward = 0.
for i in range(self.exp4p.t):
self.reward += self.W *\
(clf.predict(self.dataset.data[self.queried_hist[i]][0]) == \
for i in range(len(self.queried_hist)):
self.reward += self.W[i] *\
(clf.predict(self.dataset.data[self.queried_hist[i]][0])[0] == \
self.dataset.data[self.queried_hist[i]][1])
self.reward /= (self.dataset.len_labeled() + self.dataset.len_unlabeled())
self.reward /= self.T
Expand All @@ -66,11 +74,20 @@ def make_query(self):
unlabeled_entry_ids, X_pool = zip(*dataset.get_unlabeled_entries())

while self.budget_used < self.T:
# query vector on unlabeled instances
p = exp4p.next(self.reward)
ask_id = np.random.choice(np.arange(self.K), size=1, p=p)
# query vector on unlabeled instances
if self.reward == -1.:
p = self.exp4p.next(self.reward, None, None)
else:
p = self.exp4p.next(
self.reward,
self.queried_hist[-1],
self.dataset.data[self.queried_hist[-1]][1]
)
p = p/np.sum(p)
ask_idx = np.random.choice(np.arange(self.exp4p.K), size=1, p=p)[0]
ask_id = self.unlabeled_entry_ids[ask_idx]

self.W.append(1./p[ask_id])
self.W.append(1./p[ask_idx])
self.queried_hist.append(ask_id)
if ask_id in unlabeled_entry_ids:
self.budget_used += 1
Expand All @@ -85,11 +102,11 @@ def __init__(self, *args, **kwargs):
""" """
# QueryStrategy class object instances
self.experts = kwargs.pop('experts', None)
if experts is None:
if self.experts is None:
raise TypeError(
"__init__() missing required keyword-only argument: 'experts'"
)
elif not experts:
elif not self.experts:
raise ValueError("experts list is empty")

self.N = len(self.experts)
Expand All @@ -100,7 +117,7 @@ def __init__(self, *args, **kwargs):
# delta > 0
self.delta = kwargs.pop('delta', 1.0)

# n_arms
# n_arms --> n_unlabeled_data
self.K = kwargs.pop('K', None)
if not self.K:
raise TypeError(
Expand All @@ -110,35 +127,47 @@ def __init__(self, *args, **kwargs):
# p_min in [0, 1/n_arms]
self.pmin = kwargs.pop('pmin', np.sqrt(np.log(self.N) / self.K / self.T))

self.exp4p_gen = exp4p()
self.exp4p_gen = self.exp4p()

self.invert_id_idx = kwargs.pop('invert_id_idx')
if not self.invert_id_idx:
raise TypeError(
"__init__() missing required keyword-only argument: 'invert_id_idx'"
)
self.idx_id = kwargs.pop('idx_id')
if not self.idx_id:
raise TypeError(
"__init__() missing required keyword-only argument: 'idx_id'"
)

# Python3 compatibility
def __next__(self, reward):
def __next__(self, reward, ask_id, lbl):
""" """
return self.next(reward)
return self.next(reward ,ask_id, lbl)

def next(self, reward):
def next(self, reward, ask_id, lbl):
# first run don't have reward, TODO exception on reward == -1 only once
if reward == -1:
return self.exp4p_gen.next()
return next(self.exp4p_gen)
else:
# TODO exception on reward in [0, 1]
return self.exp4p_gen.send(reward)
return self.exp4p_gen.send((reward, ask_id, lbl))

def update_experts(qid, lbl):
def update_experts(self, qid, lbl):
for expert in self.experts:
expert.update(qid, lbl)

def exp4p(self):
#TODO probabilistic active learning algorithm
self.t = 0

rhat = np.zeros((self.K,))
yhat = np.zeros((self.N,))
vhat = np.zeros((self.N,))
while self.t < self.T:
advice = np.zeros((self.N, self.K))
for i, expert in enumerate(experts):
advice[i, :] = expert.make_query()
for i, expert in enumerate(self.experts):
advice[i][self.invert_id_idx[expert.make_query()]] = 1
W = np.sum(self.w)

# shape = (self.N, )
Expand All @@ -148,9 +177,9 @@ def exp4p(self):

reward, ask_id, lbl = yield p
self.update_experts(ask_id, lbl)
ask_idx = self.invert_id_idx[ask_id]

rhat[ask_id] = reward / p[ask_id]

rhat[ask_idx] = reward / p[ask_idx]
for i in range(self.N):
yhat[i] = np.dot(advice[i], rhat)
vhat[i] = np.sum(advice[i] / p)
Expand Down

0 comments on commit ef7dbd2

Please sign in to comment.