Skip to content

Commit

Permalink
[ci skip] Refactor proxy validation
Browse files Browse the repository at this point in the history
  • Loading branch information
nbulaj committed Aug 23, 2017
1 parent 069d088 commit 0d4460b
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 41 deletions.
9 changes: 8 additions & 1 deletion bin/proxy_fetcher
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ require 'optparse'
require 'proxy_fetcher'

options = {
filters: {},
validate: true,
json: false
}
Expand Down Expand Up @@ -33,6 +34,12 @@ OptionParser.new do |opts|
options[:validate] = false
end

opts.on('-f', '--filters={}', String, '# Filters for proxy provider in JSON format') do |filters|
require 'json'

options[:filters] = JSON.parse(filters)
end

opts.on('-t', '--timeout=SECONDS', Integer, '# Connection timeout in seconds') do |value|
options[:timeout] = value
end
Expand All @@ -45,7 +52,7 @@ end.parse!
ProxyFetcher.config.provider = options[:provider] if options[:provider]
ProxyFetcher.config.connection_timeout = options[:timeout] if options[:timeout]

manager = ProxyFetcher::Manager.new
manager = ProxyFetcher::Manager.new(filters: options[:filters])
manager.validate! if options[:validate]

if options[:json]
Expand Down
34 changes: 18 additions & 16 deletions lib/proxy_fetcher.rb
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
require 'uri'
require 'net/http'
require 'openssl'
require 'net/https'
require 'nokogiri'
require 'ostruct'
require 'thread'

require 'proxy_fetcher/configuration'
require 'proxy_fetcher/proxy'
require 'proxy_fetcher/manager'
require File.dirname(__FILE__) + '/proxy_fetcher/configuration'
require File.dirname(__FILE__) + '/proxy_fetcher/proxy'
require File.dirname(__FILE__) + '/proxy_fetcher/manager'

require 'proxy_fetcher/utils/http_client'
require 'proxy_fetcher/utils/html'

require 'proxy_fetcher/providers/base'
require 'proxy_fetcher/providers/free_proxy_list'
require 'proxy_fetcher/providers/free_proxy_list_ssl'
require 'proxy_fetcher/providers/hide_my_name'
require 'proxy_fetcher/providers/proxy_docker'
require 'proxy_fetcher/providers/proxy_list'
require 'proxy_fetcher/providers/xroxy'
require File.dirname(__FILE__) + '/proxy_fetcher/utils/http_client'
require File.dirname(__FILE__) + '/proxy_fetcher/utils/html'
require File.dirname(__FILE__) + '/proxy_fetcher/utils/proxy_validator'

module ProxyFetcher
module Providers
require File.dirname(__FILE__) + '/proxy_fetcher/providers/base'
require File.dirname(__FILE__) + '/proxy_fetcher/providers/free_proxy_list'
require File.dirname(__FILE__) + '/proxy_fetcher/providers/free_proxy_list_ssl'
require File.dirname(__FILE__) + '/proxy_fetcher/providers/hide_my_name'
require File.dirname(__FILE__) + '/proxy_fetcher/providers/proxy_docker'
require File.dirname(__FILE__) + '/proxy_fetcher/providers/proxy_list'
require File.dirname(__FILE__) + '/proxy_fetcher/providers/xroxy'
end

class << self
def config
@config ||= ProxyFetcher::Configuration.new
Expand Down
23 changes: 17 additions & 6 deletions lib/proxy_fetcher/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ module ProxyFetcher
class Configuration
UnknownProvider = Class.new(StandardError)
RegisteredProvider = Class.new(StandardError)
WrongHttpClient = Class.new(StandardError)
WrongCustomClass = Class.new(StandardError)

attr_accessor :http_client, :connection_timeout
attr_accessor :provider
attr_accessor :provider, :connection_timeout
attr_accessor :http_client, :proxy_validator, :logger

class << self
def providers
Expand All @@ -26,6 +26,7 @@ def initialize
def reset!
@connection_timeout = 3
@http_client = HTTPClient
@proxy_validator = ProxyValidator

self.provider = :hide_my_name # currently default one
end
Expand All @@ -37,11 +38,21 @@ def provider=(name)
end

def http_client=(klass)
unless klass.respond_to?(:fetch, :connectable?)
raise WrongHttpClient, "#{klass} must respond to #fetch and #connectable? class methods!"
@http_client = setup_custom_class(klass, required_methods: :fetch)
end

def proxy_validator=(klass)
@proxy_validator = setup_custom_class(klass, required_methods: :connectable?)
end

private

def setup_custom_class(klass, required_methods: [])
unless klass.respond_to?(*required_methods)
raise WrongCustomClass, "#{klass} must respond to [#{Array(required_methods).join(', ')}] class methods!"
end

@http_client = klass
klass
end
end
end
18 changes: 15 additions & 3 deletions lib/proxy_fetcher/manager.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,21 @@ def get!

alias pop! get!

# Clean current proxy list from dead proxies (doesn't respond by timeout)
def cleanup!
proxies.keep_if(&:connectable?)
# Clean current proxy list from dead proxies (that doesn't respond by timeout)
def cleanup!(pool_size = 10)
lock = Mutex.new

proxies.dup.each_slice(pool_size) do |proxy_group|
threads = proxy_group.map do |group_proxy|
Thread.new(group_proxy, proxies) do |proxy, proxies|
lock.synchronize { proxies.delete(proxy) } unless proxy.connectable?
end
end

threads.each(&:join)
end

@proxies
end

alias validate! cleanup!
Expand Down
15 changes: 0 additions & 15 deletions lib/proxy_fetcher/utils/http_client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,6 @@ def fetch
response.body
end

def connectable?
@http.open_timeout = ProxyFetcher.config.connection_timeout
@http.read_timeout = ProxyFetcher.config.connection_timeout

@http.start { |connection| return true if connection.request_head('/') }

false
rescue StandardError
false
end

def https?
@uri.scheme.casecmp('https').zero?
end
Expand All @@ -37,10 +26,6 @@ class << self
def fetch(url)
new(url).fetch
end

def connectable?(url)
new(url).connectable?
end
end
end
end
32 changes: 32 additions & 0 deletions lib/proxy_fetcher/utils/proxy_validator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
module ProxyFetcher
class ProxyValidator
URL_TO_CHECK = 'https://google.com'.freeze

def initialize(proxy_addr, proxy_port)
uri = URI.parse(URL_TO_CHECK)
@http = Net::HTTP.new(uri.host, uri.port, proxy_addr, proxy_port.to_i)

return unless uri.scheme.casecmp('https').zero?

@http.use_ssl = true
@http.verify_mode = OpenSSL::SSL::VERIFY_NONE
end

def connectable?
@http.open_timeout = ProxyFetcher.config.connection_timeout
@http.read_timeout = ProxyFetcher.config.connection_timeout

@http.start { |connection| return true if connection.request_head('/') }

false
rescue StandardError
false
end

class << self
def connectable?(proxy_addr, proxy_port)
new(proxy_addr, proxy_port).connectable?
end
end
end
end

0 comments on commit 0d4460b

Please sign in to comment.