/
bookmarks.rb
89 lines (76 loc) · 2.2 KB
/
bookmarks.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# encoding: UTF-8
class HateDa::Bookmarks
def initialize(username, total=nil)
@username = username
@total = total # total numbers of bookmarks
@dataset = nil
end
def dataset
pages = [total / PER_PAGE(), MAX_PAGES()].min
urls = (0..pages).map { |page| URL(page) }
@dataset ||= get_dataset(urls)
end
def total(url=HOST(:diary))
@total ||= get_total(url)
end
def clear
@dataset, @total = nil, nil
end
def get_dataset(paths)
htmls = Array(paths).thread_with { |path| open path }
htmls.inject([]) { |mem, html| mem += parse(html) }
.uniq.sort_by { |h| h[:time] }.reverse
rescue OpenURI::HTTPError => e
STDERR.puts "HTTP Access Error:#{e}"
rescue Exception => e
STDERR.puts e
end
def group_by_top(key, top=nil, dataset=@dataset)
grouped = dataset.group_by { |h| h[key] }.sort_by { |k, v| -v.size }
grouped = grouped.take(top) if top
Hash[grouped]
end
private
def get_total(url)
client = XMLRPC::Client.new2( HOST(:xmlrpc) )
client.call("bookmark.getTotalCount", url)
rescue => e
STDERR.puts "Fail to get Total number of Bookmarks: #{e}"
end
def HOST(target)
{ bmlist: "http://b.hatena.ne.jp/bookmarklist",
diary: "http://d.hatena.ne.jp/#{@username}",
xmlrpc: "http://b.hatena.ne.jp/xmlrpc" }[target]
end
def URL(page=0)
url = CGI.escape( HOST(:diary).encode "EUC-JP" )
"%s?url=%s&of=%s" % [HOST(:bmlist), url, page*PER_PAGE()]
end
def PER_PAGE
20
end
def MAX_PAGES
200
end
def parse(html)
q = []
entries = Nokogiri::HTML(html).search( CSS(:entry) )
entries.each do |entry|
link = entry.at( CSS(:site) ).attributes
href, title = %w(href title).map { |e| link[e].value }
comment = entry.at( CSS(:comment) )
marker, tags, note, time =
%w(username tags comment timestamp).map { |e| comment.at(".#{e}").content rescue '' }
q << { url: href, title: title, marker: marker,
tags: tags.split(','), note: note, time: Time.parse(time) }
end
q
ensure
html.close
end
def CSS(at)
{ entry: '.bookmarklist .entry-body',
site: 'a.entry-link',
comment: 'ul.comment>li' }[at]
end
end