/
identica.rb
58 lines (49 loc) · 1.38 KB
/
identica.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/ruby
$: << ::File.dirname(__FILE__)
::Dir.chdir(::File.dirname(__FILE__))
$: << File.dirname(__FILE__) + '/lib';
require 'htmlentities'
require 'util'
require 'xml_feed_polyglot'
require 'nokogiri'
require 'xapian_schema'
def strip_tags(s)
HTMLEntities.decode_entities(s.to_s.gsub(/<[^<]*>/, ' '))
end
xa = xapian_schema
res = fetch('http://identi.ca/api/statuses/public_timeline.atom')[1]
last_id = open(File.dirname(__FILE__) + '/db/identica_last_id').read
new_last_id = last_id
meta, feed = xml_feed_polyglot(res.body)
feed.each do |item|
break if item[:id] == last_id
new_last_id = item[:id] if new_last_id == last_id
to = Nokogiri::parse('<span>' + item[:content] + '</span>').search('.vcard').map {|el|
{
:url => el.at('.url').attributes['href'].to_s,
:fn => strip_tags(el.at('.fn').inner_html).strip
}
}
xa << {
:content_full => item[:content],
:content => strip_tags(item[:content]),
:category => item[:category],
:in_reply_to => item[:in_reply_to],
:bookmark => item[:bookmark],
:id => item[:id],
:author => item[:author],
:to => to,
:published => item[:published],
:source => item[:source],
}
end
try_count = 0
begin
xa.flush
rescue Exception
try_count += 1
raise $! unless try_count < 15
sleep 5
retry
end
open(File.dirname(__FILE__) + '/db/identica_last_id', 'w') {|fh| fh.write new_last_id }