Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

expose bots hash from config

  • Loading branch information...
commit efe533167324a1d100e93925212612fd361edc31 1 parent b95e0b5
@themgt themgt authored
Showing with 56 additions and 49 deletions.
  1. +56 −49 lib/split/configuration.rb
View
105 lib/split/configuration.rb
@@ -1,52 +1,6 @@
module Split
class Configuration
- BOTS = {
- # Indexers
- "AdsBot-Google" => 'Google Adwords',
- 'Baidu' => 'Chinese search engine',
- 'Gigabot' => 'Gigabot spider',
- 'Googlebot' => 'Google spider',
- 'msnbot' => 'Microsoft bot',
- 'bingbot' => 'Microsoft bing bot',
- 'rogerbot' => 'SeoMoz spider',
- 'Slurp' => 'Yahoo spider',
- 'Sogou' => 'Chinese search engine',
- "spider" => 'generic web spider',
- 'WordPress' => 'WordPress spider',
- 'ZIBB' => 'ZIBB spider',
- 'YandexBot' => 'Yandex spider',
- # HTTP libraries
- 'Apache-HttpClient' => 'Java http library',
- 'AppEngine-Google' => 'Google App Engine',
- "curl" => 'curl unix CLI http client',
- 'ColdFusion' => 'ColdFusion http library',
- "EventMachine HttpClient" => 'Ruby http library',
- "Go http package" => 'Go http library',
- 'Java' => 'Generic Java http library',
- 'libwww-perl' => 'Perl client-server library loved by script kids',
- 'lwp-trivial' => 'Another Perl library loved by script kids',
- "Python-urllib" => 'Python http library',
- "PycURL" => 'Python http library',
- "Test Certificate Info" => 'C http library?',
- "Wget" => 'wget unix CLI http client',
- # URL expanders / previewers
- 'awe.sm' => 'Awe.sm URL expander',
- "bitlybot" => 'bit.ly bot',
- "facebookexternalhit" => 'facebook bot',
- 'LongURL' => 'URL expander service',
- 'Twitterbot' => 'Twitter URL expander',
- 'UnwindFetch' => 'Gnip URL expander',
- # Uptime monitoring
- 'check_http' => 'Nagios monitor',
- 'NewRelicPinger' => 'NewRelic monitor',
- 'Panopta' => 'Monitoring service',
- "Pingdom" => 'Pingdom monitoring',
- 'SiteUptime' => 'Site monitoring services',
- # ???
- "DigitalPersona Fingerprint Software" => 'HP Fingerprint scanner',
- "ShowyouBot" => 'Showyou iOS app spider',
- 'ZyBorg' => 'Zyborg? Hmmm....',
- }
+ attr_accessor :bots
attr_accessor :robot_regex
attr_accessor :ignore_ip_addresses
attr_accessor :db_failover
@@ -58,6 +12,56 @@ class Configuration
attr_accessor :persistence
attr_accessor :algorithm
+ def bots
+ @bots ||= {
+ # Indexers
+ "AdsBot-Google" => 'Google Adwords',
+ 'Baidu' => 'Chinese search engine',
+ 'Gigabot' => 'Gigabot spider',
+ 'Googlebot' => 'Google spider',
+ 'msnbot' => 'Microsoft bot',
+ 'bingbot' => 'Microsoft bing bot',
+ 'rogerbot' => 'SeoMoz spider',
+ 'Slurp' => 'Yahoo spider',
+ 'Sogou' => 'Chinese search engine',
+ "spider" => 'generic web spider',
+ 'WordPress' => 'WordPress spider',
+ 'ZIBB' => 'ZIBB spider',
+ 'YandexBot' => 'Yandex spider',
+ # HTTP libraries
+ 'Apache-HttpClient' => 'Java http library',
+ 'AppEngine-Google' => 'Google App Engine',
+ "curl" => 'curl unix CLI http client',
+ 'ColdFusion' => 'ColdFusion http library',
+ "EventMachine HttpClient" => 'Ruby http library',
+ "Go http package" => 'Go http library',
+ 'Java' => 'Generic Java http library',
+ 'libwww-perl' => 'Perl client-server library loved by script kids',
+ 'lwp-trivial' => 'Another Perl library loved by script kids',
+ "Python-urllib" => 'Python http library',
+ "PycURL" => 'Python http library',
+ "Test Certificate Info" => 'C http library?',
+ "Wget" => 'wget unix CLI http client',
+ # URL expanders / previewers
+ 'awe.sm' => 'Awe.sm URL expander',
+ "bitlybot" => 'bit.ly bot',
+ "facebookexternalhit" => 'facebook bot',
+ 'LongURL' => 'URL expander service',
+ 'Twitterbot' => 'Twitter URL expander',
+ 'UnwindFetch' => 'Gnip URL expander',
+ # Uptime monitoring
+ 'check_http' => 'Nagios monitor',
+ 'NewRelicPinger' => 'NewRelic monitor',
+ 'Panopta' => 'Monitoring service',
+ "Pingdom" => 'Pingdom monitoring',
+ 'SiteUptime' => 'Site monitoring services',
+ # ???
+ "DigitalPersona Fingerprint Software" => 'HP Fingerprint scanner',
+ "ShowyouBot" => 'Showyou iOS app spider',
+ 'ZyBorg' => 'Zyborg? Hmmm....',
+ }
+ end
+
def disabled?
!enabled
end
@@ -138,8 +142,11 @@ def normalize_alternatives(alternatives)
end
end
+ def robot_regex
+ @robot_regex ||= /\b(?:#{escaped_bots.join('|')})\b|\A\W*\z/i
+ end
+
def initialize
- @robot_regex = /\b(?:#{escaped_bots.join('|')})\b|\A\W*\z/i
@ignore_ip_addresses = []
@db_failover = false
@db_failover_on_db_error = proc{|error|} # e.g. use Rails logger here
@@ -160,7 +167,7 @@ def value_for(hash, key)
end
def escaped_bots
- BOTS.map { |key, _| Regexp.escape(key) }
+ bots.map { |key, _| Regexp.escape(key) }
end
end
end

0 comments on commit efe5331

Please sign in to comment.
Something went wrong with that request. Please try again.