-
Notifications
You must be signed in to change notification settings - Fork 0
/
headlines.tcl
388 lines (386 loc) · 16.6 KB
/
headlines.tcl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
namespace eval headlines {
set ver 0.2.2
##############################################################################
# Copyright 2012 lee8oi@gmail.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# http://www.gnu.org/licenses/
#
# -----------------------------------------------------------------------
# headlines eggdrop script by lee8oi@gmail.com
# https://github.com/lee8oi/headlines/blob/master/headlines.tcl
#
# There's plenty of eggdrop news syndication scripts if you are looking for
# something automatic. This script is for retrieving news headlines(titles w/links)
# Right from the source. The source can be a feed url directly or it can be the
# name of one of the preconfigured feeds followed by the number of headlines you would
# like. The output is sent to your nick through the irc notice system keeping the
# public channels spam free. The direct approach to handling feeds allows the script
# manage more news sources and it keeps a small foot print in memory because nothing
# is stored, and nothing is running when nobody is reading the news. We only get the
# headlines we ask for, when we ask for them, nothing else.
#
# This script is currently written for utf-8 patched bots and assumes the users
# system is utf-8. It currenly supports RSS and Atom feeds.
#
# Note: Encoding issues are common. If you have problems please make sure your
# bot is patched for utf-8 as outlined here: http://eggwiki.org/Utf-8
#
# If you still have problems consider updating to the latest tcl8.6 and
# recompiling your bot. This script DOES WORK. Its just hard to detect & fix all the
# possible encoding issues up front. I'll work in the fixes as I go. Check the
# 'Custom Charsets' section in 'Configuration' for more information about
# setting charsets for specific feeds.
#
# Currently you can call the rss script from any channel the bot resides in or
# by using /msg or /query. The output is 'noticed' directly to you.
#
# Usage:
#
# Retrieve news:
# !news <feedname-or-url> ?how-many?
#
# List the available feeds
# !feeds
#
#
##############################################################################
# Configuration:
#
# Default number of headlines to show when ?how-many? is not specified.
variable numberOfheadlines 5
#
# Feeds
set feeds(google) "http://news.google.com/news?ned=us&topic=h&output=rss"
set feeds(linuxtoday) "http://feeds.feedburner.com/linuxtoday/linux?format=xml"
set feeds(linuxjournal) "http://feeds.feedburner.com/linuxjournalcom?format=xml"
set feeds(slashdot) "http://rss.slashdot.org/Slashdot/slashdotLinux"
set feeds(securitynow) "http://leoville.tv/podcasts/sn.xml"
set feeds(krotkie) "http://www.joemonster.org/backend.php?channel=krotkie"
set feeds(bfh-alerts) "http://www.battlefieldheroes.com/en/forum/syndication.php?fid=43&limit=5"
set feeds(bfh-main) "http://www.battlefieldheroes.com/en/forum/syndication.php?fid=94&type=atom1.0&limit=15"
set feeds(rususa) "http://www.rususa.com/tools/rss/feed.asp-rss-newsrus"
set feeds(google-china) "http://news.google.com/news?ned=cn&topic=po&output=rss"
set feeds(apple-japan) "http://rss.support.apple.com/ja_JP/"
set feeds(mageia-group) "http://identi.ca/api/statusnet/groups/timeline/16485.rss"
set feeds(lxer) "http://lxer.com/module/newswire/headlines.rss"
set feeds(yahoo) "http://news.yahoo.com/rss/"
set feeds(testing) "not.a.valid.url"
# ~Custom Charsets~
#
# Usage: set charset(feedname) "charset"
#
# Use this section to specify the charset for a specific feed to be converted from.
# This will convert the feed to unicode, from the specified charset, so that special
# characters & other languages can be properly displayed.
#
# The script attempts to automattically detect and resolve the charsets needed to
# display the feed but its hard to correctly resolve all charsets this way. Hence
# the addition of this section for specifying the charset manually.
#
# This section currently is only used by feeds listed as 'utf-8' but don't
# display correctly (mixed characters, japanese, etc). If script detects *"utf-8-plain"
# charset it will skip encoding as well as htmldecoding in order to display characters
# correctly in output.
#
# *utf-8-plain is NOT a real charset. Its simply a fake charset that tells the script
# to skip encoding AND htmldecode.
#
# For example, to set the charset for the japanese rss feed 'feedname':
# set charset(feedname) "utf-8-plain"
#
set charset(apple-japan) "utf-8-plain"
set charset(yahoo) "utf-8-plain"
#
#
# END OF FEED CONFIGURATION
##############################################################################
}
package require http
if {![catch {package require tls}]} { ::http::register https 443 ::tls::socket }
bind pub - !rss ::headlines::pub_news
bind pub - !atom ::headlines::pub_news
bind pub - !news ::headlines::pub_news
bind pub - !feeds ::headlines::grabflist
bind msg - !feeds ::headlines::grabflist
bind msg - !news ::headlines::msg_news
bind pub - !test ::headlines::pub_news
namespace eval headlines {
proc msg_news {nick userhost handle text} {
::headlines::grabnews $nick $text
}
proc pub_news {nick host user chan text} {
::headlines::grabnews $nick $text
}
proc grabflist {nick args} {
set result [::headlines::flist]
puthelp "notice $nick : Available feeds: $result"
}
proc flist {args} {
#:get list of feeds available:::::::::::::::::::::::::::::::::::::::::::
variable feeds; set result ""
foreach item [array names ::headlines::feeds] {
append result "$item "
}
return $result
}
proc grabnews {target text} {
set arr [split $text]
set feed [string tolower [lindex $arr 0]]
set numb [string tolower [lindex $arr 1]]
if {[string length $feed] >= 10 && [regexp {^(f|ht)tp(s|)://} $feed] && ![regexp {://([^/:]*:([^/]*@|\d+(/|$))|.*/\.)} $feed]} {
puthelp "notice $target : Url detected : $feed"
set url $feed
} elseif {[info exists ::headlines::feeds($feed)]} {
set url $::headlines::feeds($feed)
if {![regexp {^(f|ht)tp(s|)://} $url] || [regexp {://([^/:]*:([^/]*@|\d+(/|$))|.*/\.)} $url]} {
puthelp "notice $target : Feed Error: invalid url format specified for $feed feed ($url)"
return
}
} else {
set result [::headlines::flist]
set available "Available feeds: $result"
if {$feed == ""} {
puthelp "notice $target : Usage: !news <feed-or-url> ?num? ~~ $available"
return
} else {
puthelp "notice $target : Invalid feed ~~ $available"
return
}
}
if (![string is integer -strict $numb]) {
set numb [set ::headlines::numberOfheadlines]
}
set data [::headlines::fetch $feed $url]
regexp {(?i)<rss.*>(.*?)</rss>} $data rssdata none
regexp {(?i)<feed.*>(.*?)</feed>} $data atomdata none
regexp {(?i)<rdf:RDF.*>(.*?)</rdf:RDF>} $data rdfdata none
if {([info exists rssdata]) || ([info exists rdfdata])} {
if {[info exists rssdata]} {
set data $rssdata
} elseif {[info exists rdfdata]} {
set data $rdfdata
}
regsub -all {(?i)<items.*?>.*?</items>} $data {} data
set count 1
foreach {foo item} [regexp -all -inline {(?i)<item.*?>(.*?)</item>} $data] {
set item [string map {"<![CDATA[" "" "]]>" ""} $item]
regexp {<title.*?>(.*?)</title>} $item subt title
regexp {<link.*?>(.*?)</link} $item subl link
if {![info exists title]} {set title "(none)"} {set title [unhtml [join [split $title]]]}
if {![info exists link]} {set link "(none)"} {set link [unhtml [join [split $link]]]}
set tinyurl [::headlines::tinyurl $link]
puthelp "notice $target : $title ($tinyurl)"
if {($count == $numb)} {
return
} else {
incr count
}
}
} elseif {[info exists atomdata]} {
set count 1
foreach {foo item} [regexp -all -inline {(?i)<entry.*?>(.*?)</entry>} $atomdata] {
set item [string map {"<![CDATA[" "" "]]>" ""} $item]
regexp {<title.*?>(.*?)</title>} $item subt title
regexp {<link.*?href=\"(.*?)\"} $item sub1 link
if {![info exists title]} {set title "(none)"} {set title [unhtml [join [split $title]]]}
if {![info exists link]} {set link "(none)"} {set link [unhtml [join [split $link]]]}
set tinyurl [::headlines::tinyurl $link]
puthelp "notice $target : $feed $title ($tinyurl)"
if {($count == $numb)} {
return
} else {
incr count
}
}
} else {
puthelp "notice $target : No news data found."
}
}
proc fetch {feed {url ""}} {
set ua "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"
set http [::http::config -useragent $ua]
catch {set http [::http::geturl $url -timeout 60000]} error
if {[info exists http]} {
if { [::http::status $http] == "timeout" } {
return 0
}
upvar #0 $http state
array set meta $state(meta)
set url $state(url)
set data [::http::data $http]
#:handle redirects::::::::::::::::::::::::::::::::::::::::::::::
foreach {name value} $state(meta) {
if {[regexp -nocase ^location$ $name]} {
set mapvar [list " " "%20"]
::http::cleanup $http
catch {set http [::http::geturl $value -timeout 60000]} error
if {![string match -nocase "::http::*" $error]} {
puthelp "notice $nick : http error: [string totitle $error] \( $value \)"
return
}
if {![string equal -nocase [::http::status $http] "ok"]} {
puthelp "notice $nick : http status: [::http::status $http]"
return
}
set url [string map {" " "%20"} $value]
upvar #0 $http state
if {[incr r] > 10} { puthelp "notice $nick : redirect error (>10 too deep) \( $url \)" ; return 0}
set data [::http::data $http]
}
}
::http::cleanup $http
set html $data
if {[regexp -nocase {"Content-Type" content=".*?; charset=(.*?)".*?>} $html - char]} {
set char [string trim [string trim $char "\"' /"] {;}]
regexp {^(.*?)"} $char - char
set mset $char
if {![string length $char]} { set char "None Given" ; set char2 "None Given" }
set char2 [string tolower [string map -nocase {"UTF-" "utf-" "iso-" "iso" "windows-" "cp" "shift_jis" "shiftjis"} $char]]
} else {
if {[regexp -nocase {<meta content=".*?; charset=(.*?)".*?>} $html - char]} {
set char [string trim $char "\"' /"]
regexp {^(.*?)"} $char - char
set mset $char
if {![string length $char]} { set char "None Given" ; set char2 "None Given" }
set char2 [string tolower [string map -nocase {"UTF-" "utf-" "iso-" "iso" "windows-" "cp" "shift_jis" "shiftjis"} $char]]
} elseif {[regexp -nocase {encoding="(.*?)"} $html - char]} {
set mset $char ; set char [string trim $char]
if {![string length $char]} { set char "None Given" ; set char2 "None Given" }
set char2 [string tolower [string map -nocase {"UTF-" "utf-" "iso-" "iso" "windows-" "cp" "shift_jis" "shiftjis"} $char]]
} else {
set char "None Given" ; set char2 "None Given" ; set mset "None Given"
}
}
if {[info exists state(charset)]} {
set char3 [string tolower [string map -nocase {"UTF-" "utf-" "iso-" "iso" "windows-" "cp" "shift_jis" "shiftjis"} $state(charset)]]
} else {
set char3 "utf-8"
}
set char [string trim $char2 {;}]
if {($char2 == "None Given")} {
set char $char3
} else {
set char $char2
}
variable ::headlines::charset
if {[info exists charset($feed)]} {
set char $charset($feed)
}
switch $char {
"euc-jp" {
#do nothing.
}
"utf-8-plain" {
#do nothing.
}
default {
if {[string equal -nocase "utf-8" [encoding system]]} {
set html [encoding convertfrom $char $html]
set data [htmldecode $html]
}
}
}
return $data
}
}
proc unhtml {{data ""}} {
regsub -all "(?:<b>|</b>|<b />|<em>|</em>|<strong>|</strong>)" $data"\002" data
regsub -all "(?:<u>|</u>|<u />)" $data "\037" data
regsub -all "(?:<br>|<br/>|<br />)" $data ". " data
regsub -all "<script.*?>.*?</script>" $data "" data
regsub -all "<style.*?>.*?</style>" $data "" data
regsub -all -- {<.*?>} $data " " data
while {[string match "* *" $data]} { regsub -all " " $data " " data }
return [string trim $data]
}
proc tinyurl {url} {
set ua "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5"
set http [::http::config -useragent $ua -urlencoding "utf-8"]
set query "http://tinyurl.com/api-create.php?[http::formatQuery url $url]"
set token [http::geturl $query -timeout 3000]
upvar #0 $token state
if {[string length $state(body)]} {
set result [string map {"\n" ""} $state(body)]
}
if {($result == "Error")} {
set result [::headlines::tinyurl $url]
}
return $result
}
proc htmldecode {{data ""}} {
if {![string match *&* $data]} {return $data}
set escapes {
\xa0 ¡ \xa1 ¢ \xa2 £ \xa3 ¤ \xa4
¥ \xa5 ¦ \xa6 § \xa7 ¨ \xa8 © \xa9
ª \xaa « \xab ¬ \xac ­ \xad ® \xae
¯ \xaf ° \xb0 ± \xb1 ² \xb2 ³ \xb3
´ \xb4 µ \xb5 ¶ \xb6 · \xb7 ¸ \xb8
¹ \xb9 º \xba » \xbb ¼ \xbc ½ \xbd
¾ \xbe ¿ \xbf À \xc0 Á \xc1 Â \xc2
à \xc3 Ä \xc4 Å \xc5 Æ \xc6 Ç \xc7
È \xc8 É \xc9 Ê \xca Ë \xcb Ì \xcc
Í \xcd Î \xce Ï \xcf Ð \xd0 Ñ \xd1
Ò \xd2 Ó \xd3 Ô \xd4 Õ \xd5 Ö \xd6
× \xd7 Ø \xd8 Ù \xd9 Ú \xda Û \xdb
Ü \xdc Ý \xdd Þ \xde ß \xdf à \xe0
á \xe1 â \xe2 ã \xe3 ä \xe4 å \xe5
æ \xe6 ç \xe7 è \xe8 é \xe9 ê \xea
ë \xeb ì \xec í \xed î \xee ï \xef
ð \xf0 ñ \xf1 ò \xf2 ó \xf3 ô \xf4
õ \xf5 ö \xf6 ÷ \xf7 ø \xf8 ù \xf9
ú \xfa û \xfb ü \xfc ý \xfd þ \xfe
ÿ \xff ƒ \u192 Α \u391 Β \u392 Γ \u393 Δ \u394
Ε \u395 Ζ \u396 Η \u397 Θ \u398 Ι \u399
Κ \u39A Λ \u39B Μ \u39C Ν \u39D Ξ \u39E
Ο \u39F Π \u3A0 Ρ \u3A1 Σ \u3A3 Τ \u3A4
Υ \u3A5 Φ \u3A6 Χ \u3A7 Ψ \u3A8 Ω \u3A9
α \u3B1 β \u3B2 γ \u3B3 δ \u3B4 ε \u3B5
ζ \u3B6 η \u3B7 θ \u3B8 ι \u3B9 κ \u3BA
λ \u3BB μ \u3BC ν \u3BD ξ \u3BE ο \u3BF
π \u3C0 ρ \u3C1 ς \u3C2 σ \u3C3 τ \u3C4
υ \u3C5 φ \u3C6 χ \u3C7 ψ \u3C8 ω \u3C9
ϑ \u3D1 ϒ \u3D2 ϖ \u3D6 • \u2022
… \u2026 ′ \u2032 ″ \u2033 ‾ \u203E
⁄ \u2044 ℘ \u2118 ℑ \u2111 ℜ \u211C
™ \u2122 ℵ \u2135 ← \u2190 ↑ \u2191
→ \u2192 ↓ \u2193 ↔ \u2194 ↵ \u21B5
⇐ \u21D0 ⇑ \u21D1 ⇒ \u21D2 ⇓ \u21D3 ⇔ \u21D4
∀ \u2200 ∂ \u2202 ∃ \u2203 ∅ \u2205
∇ \u2207 ∈ \u2208 ∉ \u2209 ∋ \u220B ∏ \u220F
∑ \u2211 − \u2212 ∗ \u2217 √ \u221A
∝ \u221D ∞ \u221E ∠ \u2220 ∧ \u2227 ∨ \u2228
∩ \u2229 ∪ \u222A ∫ \u222B ∴ \u2234 ∼ \u223C
≅ \u2245 ≈ \u2248 ≠ \u2260 ≡ \u2261 ≤ \u2264
≥ \u2265 ⊂ \u2282 ⊃ \u2283 ⊄ \u2284 ⊆ \u2286
⊇ \u2287 ⊕ \u2295 ⊗ \u2297 ⊥ \u22A5
⋅ \u22C5 ⌈ \u2308 ⌉ \u2309 ⌊ \u230A
⌋ \u230B ⟨ \u2329 ⟩ \u232A ◊ \u25CA
♠ \u2660 ♣ \u2663 ♥ \u2665 ♦ \u2666
" \x22 & \x26 < \x3C > \x3E O&Elig; \u152 œ \u153
Š \u160 š \u161 Ÿ \u178 ˆ \u2C6
˜ \u2DC   \u2002   \u2003   \u2009
‌ \u200C ‍ \u200D ‎ \u200E ‏ \u200F – \u2013
— \u2014 ‘ \u2018 ’ \u2019 ‚ \u201A
“ \u201C ” \u201D „ \u201E † \u2020
‡ \u2021 ‰ \u2030 ‹ \u2039 › \u203A
€ \u20AC ' \u0027 ‎ "" ‏ "" ‬ "" ‭ ""
‮ "" — \u2014
};
set data [string map [list "\]" "\\\]" "\[" "\\\[" "\$" "\\\$" "\\" "\\\\"] [string map $escapes $data]]
regsub -all -- {&#([[:digit:]]{1,5});} $data {[format %c [string trimleft "\1" "0"]]} data
regsub -all -- {&#x([[:xdigit:]]{1,4});} $data {[format %c [scan "\1" %x]]} data
regsub -all -- {\\x([[:xdigit:]]{1,2})} $data {[format %c [scan "\1" %x]]} data
set data [subst "$data"]
return $data
}
}
putlog "Headlines $::headlines::ver loaded"