Skip to content

Commit

Permalink
提取 feature function
Browse files Browse the repository at this point in the history
  • Loading branch information
laike9m committed Mar 22, 2016
1 parent f8da362 commit cdf62e9
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 7 deletions.
41 changes: 35 additions & 6 deletions infer/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,8 @@ def gen_edges(self):
# 1. 多个tail uid 相同,type 不同,直接 |
# 2. 把uid 相同的 commenters 和 collectors 合并到 upvoters 里面
# 3. 能添加 TimeRange 的添加 TimeRange
for head_index in range(len(self.upvoters) + 1):
head = self.affecters[head_index]
for tail_index in range(head_index+1, len(self.affecters)):
tail = self.affecters[tail_index]
for i, head in enumerate(self.affecters[:len(self.upvoters) + 1]):
for tail in self.affecters[i+1:]:
if self.has_follow_relation(head, tail):
self.cand_edges.append(FollowEdge(head, tail))

Expand All @@ -132,9 +130,40 @@ def gen_features(self):
生成 features
:return: n_samples * n_features vector
"""
pass
return [
[self.feature_head_rank(edge),
*self.feature_node_type(edge),
self.feature_relative_order(edge)] for edge in self.cand_edges
]

def feature_head_rank(self, edge: FollowEdge) -> int:
"""
head 在 tail 的候选中排第几
"""
rank = 0
head, tail = edge.head, edge.tail
for cand in self.cand_edges:
if cand.tail is tail:
if cand.head is head:
return rank
else:
rank += 1

def feature_node_type(self, edge: FollowEdge) -> list:
"""
因为head不是 answer 就是upvote,所以其实特征只需要提供 is_answer
:return:
[head_is_answer, tail_is_upvote, tail_is_comment, tail_is_collect]
"""
head, tail = edge.head, edge.tail
return [
is_answer(head),
is_upvote(tail),
is_comment(tail),
is_collect(tail)
]

def feature_relative_order(self, edge: FollowEdge):
def feature_relative_order(self, edge: FollowEdge) -> int:
"""
判断 edge.head, edge.tail 相对顺序
head 只可能 answer or upvote
Expand Down
23 changes: 22 additions & 1 deletion infer/iutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,11 +254,32 @@ def trans_before_save(tree_data):
link['reltype'] = str(link['reltype'])
return tree_data


def is_upvote(action: UserAction):
acttype = action.acttype
return True if acttype & 0b001000 else False


def is_comment(action: UserAction):
acttype = action.acttype
return True if acttype & 0b010000 else False


def is_collect(action: UserAction):
acttype = action.acttype
return True if acttype & 0b100000 else False


def is_answer(action: UserAction):
acttype = action.acttype
return True if acttype & 0b000100 else False


__all__ = [
'a_col', 'q_col', 'get_time_string', 'now_string',
'get_datetime_day_month_year', 'get_datetime_hour_min_sec',
'get_datetime_full_string', 'validate_config', 'validate_cookie',
'dict_equal', 'is_a_col', 'is_q_col', 'config_smtp_handler', 'interpolate',
'get_action_type', 'MyEncoder', 'a_to_q', 'q_to_a', 'Transform',
'trans_before_save'
'trans_before_save', 'is_upvote', 'is_comment', 'is_collect', 'is_answer'
]

0 comments on commit cdf62e9

Please sign in to comment.