diff --git a/README.rdoc b/README.rdoc index 81da480..9a82cdc 100644 --- a/README.rdoc +++ b/README.rdoc @@ -80,7 +80,7 @@ or in the more classic way in which you can create an Esearchy objetc and work o domain.save_to_file "~/emails.txt" We now also have a LinkedIn search which looks for Names in the site. With those names it -* creates emails based on those emails. +* creates emails based on those names. * searches Google and Yahoo for emails related to those people. ESearchy.create "domain.com" do |d| @@ -116,7 +116,7 @@ Not short of that now, we also have the possibility of choosing between a Librar == INSTALL: * > sudo gem sources -a http://gems.github.com (If you do not have the repository) -* > sudo gem install freedomcoder-esearchy +* > sudo gem install FreedomCoder-esearchy == THANKS: diff --git a/TODO b/TODO index ad80a87..ce3cb9f 100644 --- a/TODO +++ b/TODO @@ -2,8 +2,6 @@ TO DO ----- GLOBAL: -- IMPORTANT: Add UTF-16 support in Ruby 1.9 :) It would be very interesting to search for emails in that way. - - [COMPLETED] Implement a better way to read the AppID keys for yahoo and Bing. - [COMPLETED] Make it possible to suppy the total of desires hits. - [COMPLETED] Implement a more open regex that will cover cases such as @@ -12,19 +10,17 @@ GLOBAL: - user at domain dot com - [BUG] It returns some strings that are not emails. Checking regext to fine-tune it to avoid this false pasitivs - - [COMPLETED] Add LinkedIn Support. - Fetch results into Yahoo People to obtain possible emails accounts. [SOLVED][BUG] - This will return the users' vcard. Need to find some way to fix this. - -- Add other social network sites ( I need to research on this) - [COMPLETED] Added BugMeNot class that allows uses to fetch from bug_me_not user credentials. - [COMPLETED] Add Random User-Agent support for Search Engines to prevent from being banned. -- Add Random sleep times to avoid being banned. - [FIXED] ESearchy crashed when an URI for a PDF contained a white space. - [FIXED] ESearchy::BUGMENOT was not handling correctly on Backtrack live CD. - [COMPLETED] Add an ESearchy::DELAY Constant. This could be overwritten and setup a new delay. - +- Add Random sleep times to avoid being banned. +- IMPORTANT: Add UTF-16 support in Ruby 1.9 :) It would be very interesting to search for emails in that way. +- Add other social network sites ( I need to research on this) LINUX/UNIX/OSX: - [COMPLETED] Add support for .doc in unix platforms. (Through antiword) It would be nice to take a look at the implementation and see if we can create a method to read files, independently @@ -38,3 +34,5 @@ WINDOWS: - [FIXED][BUG] Windows Vista with Ruby One click installer 3.0 does not support String.first so now we use String[range] (name[0,1]) +FEATURE REQUEST: +- Add session handling. ( recover last scan, etc ...) \ No newline at end of file diff --git a/bin/esearchy b/bin/esearchy old mode 100644 new mode 100755 index 305d51b..8038b94 --- a/bin/esearchy +++ b/bin/esearchy @@ -2,7 +2,8 @@ require 'rubygems' require 'getoptlong' -require 'esearchy' +require '../lib/esearchy.rb' + ESearchy::LOG.level = ESearchy::APP @yahoo_key = nil @@ -123,12 +124,12 @@ opts.each do |opt, arg| end end -require 'esearchy' puts "DISCLOSURE: This is just an example tool ESearchy is more and more a piece of code intended to work as a Library and you should create your own little.rb file :)" puts "------------------------------------------------------------------------" puts "REMINDER: if you want to use GoogleProfiles, LinkedIn or Naymz, you will need to use the --company (-c) option" + @domains.each_with_index do |domain, idx| ESearchy.create domain do |d| @no_eng.each do |eng,val| @@ -143,7 +144,7 @@ need to use the --company (-c) option" d.linkedin_credentials = ESearchy::BUGMENOT end d.company_name = @company[idx] unless @company.empty? - d.search d.save_to_file @output if @output + d.search end end diff --git a/esearch.gemspec b/esearch.gemspec index 20b0569..34fb624 100644 --- a/esearch.gemspec +++ b/esearch.gemspec @@ -1,6 +1,6 @@ SPEC = Gem::Specification.new do |s| s.name = "esearchy" - s.version = "0.1.2.2" + s.version = "0.1.2.3" s.author = "Matias P. Brutti" s.email = "matiasbrutti@gmail.com" s.homepage = "http://freedomcoder.com.ar/esearchy" diff --git a/lib/esearchy.rb b/lib/esearchy.rb index 13ce757..b74abf9 100644 --- a/lib/esearchy.rb +++ b/lib/esearchy.rb @@ -30,6 +30,7 @@ def log_type=(value) def log_file=(value) ESearchy::LOG.file = value end + # Need to find another way of fixing this. #def delay=(value) # ESearch::DELAY = value @@ -71,6 +72,7 @@ def search(query=nil) e.search(query || @query) e.search_depth if depth_search? LOG.puts "+--- Finishing Search for #{n} ---+\n" + write_to_file if @file end end # retrieve emails @@ -173,10 +175,17 @@ def search_#{engine}=(value) end" end ## Saving methods - def save_to_file(file, list=nil) - open(file,"a") do |f| - list ? list.each { |e| f << e + "\n" } : emails.each { |e| f << e + "\n" } - end + def save_to_file(file) + @file = File.new(file,"a") + @file.sync = true + return 0 + end + + def write_to_file(list=nil) + #open(@file,"a") do |f| + # list ? list.each { |e| f << e + "\n" } : emails.each { |e| f << e + "\n" } + #end + list ? list.each { |e| @file << e + "\n" } : emails.each { |e| @file << e + "\n" } end def save_to_sqlite(file) @@ -209,6 +218,7 @@ def calculate_score(email) score = score + 0.2 if email =~ /#{@query}/ score = score + 0.3 if verify_domain!(email) score = 1.0 if verify_email!(email) + return score end def depth_search? diff --git a/lib/esearchy/OtherEngines/googlegroups.rb b/lib/esearchy/OtherEngines/googlegroups.rb index 4c6aba5..f09b38e 100644 --- a/lib/esearchy/OtherEngines/googlegroups.rb +++ b/lib/esearchy/OtherEngines/googlegroups.rb @@ -17,7 +17,14 @@ def initialize(maxhits = nil, start = nil) @lock = Mutex.new @threads = [] end - attr_accessor :emails + + def emails + @emails.uniq! + end + + def emails=(value) + @emails=value + end def search(query) @query = query diff --git a/lib/esearchy/OtherEngines/pgp.rb b/lib/esearchy/OtherEngines/pgp.rb index 1e76fdd..b311f74 100644 --- a/lib/esearchy/OtherEngines/pgp.rb +++ b/lib/esearchy/OtherEngines/pgp.rb @@ -10,7 +10,14 @@ def initialize(maxhits=0) @emails = [] @lock = Mutex.new end - attr_accessor :emails + + def emails + @emails.uniq! + end + + def emails=(value) + @emails=value + end def search(query) @query = query diff --git a/lib/esearchy/OtherEngines/usenet.rb b/lib/esearchy/OtherEngines/usenet.rb index 288b056..028ebd0 100644 --- a/lib/esearchy/OtherEngines/usenet.rb +++ b/lib/esearchy/OtherEngines/usenet.rb @@ -10,7 +10,14 @@ def initialize(maxhits=0) @emails = [] @lock = Mutex.new end - attr_accessor :emails + + def emails + @emails.uniq! + end + + def emails=(value) + @emails=value + end def search(query) @query = query diff --git a/lib/esearchy/SearchEngines/altavista.rb b/lib/esearchy/SearchEngines/altavista.rb index 2af3eaa..81a029c 100644 --- a/lib/esearchy/SearchEngines/altavista.rb +++ b/lib/esearchy/SearchEngines/altavista.rb @@ -17,7 +17,14 @@ def initialize(maxhits = 0, start = 0) @lock = Mutex.new @threads = [] end - attr_accessor :emails + + def emails + @emails.uniq! + end + + def emails=(value) + @emails=value + end def search(query) @query = query diff --git a/lib/esearchy/SearchEngines/bing.rb b/lib/esearchy/SearchEngines/bing.rb index 2af6409..64107f2 100644 --- a/lib/esearchy/SearchEngines/bing.rb +++ b/lib/esearchy/SearchEngines/bing.rb @@ -18,7 +18,15 @@ def initialize(maxhits=0, appid=nil, start=0) @r_txts = Queue.new @lock = Mutex.new end - attr_accessor :emails, :appid + attr_accessor :appid + + def emails + @emails.uniq! + end + + def emails=(value) + @emails=value + end def search(query) @query = query diff --git a/lib/esearchy/SearchEngines/google.rb b/lib/esearchy/SearchEngines/google.rb index 58aa38c..3b06e96 100644 --- a/lib/esearchy/SearchEngines/google.rb +++ b/lib/esearchy/SearchEngines/google.rb @@ -17,7 +17,14 @@ def initialize(maxhits = 0, start = 0) @lock = Mutex.new @threads = [] end - attr_accessor :emails + + def emails + @emails.uniq! + end + + def emails=(value) + @emails=value + end def search(query) @query = query diff --git a/lib/esearchy/SearchEngines/yahoo.rb b/lib/esearchy/SearchEngines/yahoo.rb index e463c58..634eb65 100644 --- a/lib/esearchy/SearchEngines/yahoo.rb +++ b/lib/esearchy/SearchEngines/yahoo.rb @@ -18,7 +18,15 @@ def initialize(maxhits=0, appid = nil, start=0) @threads = [] @lock = Mutex.new end - attr_accessor :emails, :appid + attr_accessor :appid + + def emails + @emails.uniq! + end + + def emails=(value) + @emails=value + end def search(query) @query = query diff --git a/lib/esearchy/SocialNetworks/googleprofiles.rb b/lib/esearchy/SocialNetworks/googleprofiles.rb index 1f2af3f..d332577 100644 --- a/lib/esearchy/SocialNetworks/googleprofiles.rb +++ b/lib/esearchy/SocialNetworks/googleprofiles.rb @@ -22,7 +22,23 @@ def initialize(maxhits = 0, start = 0) @lock = Mutex.new @threads = [] end - attr_accessor :emails, :company_name, :people + attr_accessor :company_name + + def emails + @emails.uniq! + end + + def emails=(value) + @emails=value + end + + def people + @people.uniq! + end + + def people=(value) + @people=value + end def search(query) @query = query diff --git a/lib/esearchy/SocialNetworks/linkedin.rb b/lib/esearchy/SocialNetworks/linkedin.rb index ba8b519..2588fff 100644 --- a/lib/esearchy/SocialNetworks/linkedin.rb +++ b/lib/esearchy/SocialNetworks/linkedin.rb @@ -18,7 +18,23 @@ def initialize(maxhits = 0) @company_name = nil @cookie = nil end - attr_accessor :emails, :username, :password, :company_name, :people + attr_accessor :username, :password, :company_name, :people + + def emails + @emails.uniq! + end + + def emails=(value) + @emails=value + end + + def people + @people.uniq! + end + + def people=(value) + @people=value + end def login begin diff --git a/lib/esearchy/SocialNetworks/naymz.rb b/lib/esearchy/SocialNetworks/naymz.rb index e410926..9abad62 100644 --- a/lib/esearchy/SocialNetworks/naymz.rb +++ b/lib/esearchy/SocialNetworks/naymz.rb @@ -20,7 +20,23 @@ def initialize(maxhits = 0, start = 0) @lock = Mutex.new @threads = [] end - attr_accessor :emails, :company_name, :people + attr_accessor :company_name + + def emails + @emails.uniq! + end + + def emails=(value) + @emails=value + end + + def people + @people.uniq! + end + + def people=(value) + @people=value + end def search(query) @query = query @@ -86,8 +102,9 @@ def parse(html) end name,last = person.size > 2 ? [person[0],person[-1]] : person @people << person - @emails << "#{name.split(' ')[0]}.#{last.split(' ')[0]}#{@domain}" - @emails << "#{name[0,1]}#{last.split(' ')[0]}#{@domain}" + @emails << "#{name.split(' ')[0] unless name.nil?}" + + ".#{last.split(' ')[0] unless last.nil?}#{@domain}" + @emails << "#{name[0,1] unless name.nil?}#{last.split(' ')[0] unless last.nil?}#{@domain}" #@emails.concat(fix(search_person(name,last))) @emails.uniq! print_emails(@emails) diff --git a/lib/esearchy/searchy.rb b/lib/esearchy/searchy.rb index 8064b0f..c32e7c2 100644 --- a/lib/esearchy/searchy.rb +++ b/lib/esearchy/searchy.rb @@ -17,8 +17,8 @@ def search_emails(string) [a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?^_`{|}~-]+)*\sat\s(?:[a-z0-9](?:[a-z0-9-]\ *[a-z0-9])?\.)+[a-z](?:[a-z-]*[a-z])?|[a-z0-9!#$&'*+=?^_`{|}~-]+\ (?:\.[a-z0-9!#$&'*+=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z](?:[a-z-]*[a-z])?|\ -[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?^_`{|}~-]+)*\s@\s(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+\ -[a-z](?:[a-z-]*[a-z])?|[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\sdot\s[a-z0-9!#$&'*+=?^_`\ +[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?^_`{|}~-]+)*\s@\s(?:[a-z0-9](?:[a-z0-9-]*\ +[a-z0-9])?\.)+[a-z](?:[a-z-]*[a-z])?|[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\sdot\s[a-z0-9!#$&'*+=?^_`\ {|}~-]+)*\sat\s(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\sdot\s)+[a-z](?:[a-z-]*[a-z])??/i) @lock.synchronize do print_emails(list) @@ -30,30 +30,36 @@ def search_pdfs(urls) while urls.size >= 1 @threads << Thread.new do web = URI.parse(urls.pop.gsub(' ','+')) - ESearchy::LOG.puts "Searching in PDF: #{web.to_s}\n" begin http = Net::HTTP.new(web.host,80) http.start do |http| - request = Net::HTTP::Get.new("#{web.path}#{web.query}") + request = Net::HTTP::Head.new("#{web.path}#{web.query}") response = http.request(request) - case response - when Net::HTTPSuccess, Net::HTTPRedirection - name = ESearchy::TEMP + "#{hash_url(web.to_s)}.pdf" - open(name, "wb") do |file| - file.write(response.body) - end - begin - receiver = PageTextReceiver.new - pdf = PDF::Reader.file(name, receiver) - search_emails(receiver.content.inspect) - rescue PDF::Reader::UnsupportedFeatureError - ESearchy::LOG.puts "Encrypted PDF: Unable to parse it.\n" - rescue PDF::Reader::MalformedPDFError - ESearchy::LOG.puts "Malformed PDF: Unable to parse it.\n" + if response.content_length < 10485760 + ESearchy::LOG.puts "Searching in PDF: #{web.to_s}\n" + request = Net::HTTP::Get.new("#{web.path}#{web.query}") + response = http.request(request) + case response + when Net::HTTPSuccess, Net::HTTPRedirection + name = ESearchy::TEMP + "#{hash_url(web.to_s)}.pdf" + open(name, "wb") do |file| + file.write(response.body) + end + begin + receiver = PageTextReceiver.new + pdf = PDF::Reader.file(name, receiver) + search_emails(receiver.content.inspect) + rescue PDF::Reader::UnsupportedFeatureError + ESearchy::LOG.puts "Encrypted PDF: Unable to parse it.\n" + rescue PDF::Reader::MalformedPDFError + ESearchy::LOG.puts "Malformed PDF: Unable to parse it.\n" + end + `rm "#{name}"` + else + return response.error! end - `rm "#{name}"` else - return response.error! + ESearchy::LOG.puts "Skipping PDF #{web.to_s}, bigger than 10MB." end end rescue Net::HTTPFatalError @@ -141,7 +147,6 @@ def search_office_xml(urls) while urls.size >= 1 @threads << Thread.new do web = URI.parse(urls.pop.gsub(' ','+')) - #format = web.scan(/docx|xlsx|pptx/i)[0] format = web.scan(/docx|xlsx|pptx|odt|odp|ods|odb/i)[0] ESearchy::LOG.puts "Searching in #{format.upcase}: #{web.to_s}\n" begin @@ -244,6 +249,8 @@ def fix(list) e.gsub!(" at ","@") e.gsub!("_at_","@") e.gsub!(" dot ",".") + e.gsub!(/[+0-9]{0,3}[0-9()]{3,5}[-]{0,1}[0-9]{3,4}[-]{0,1}[0-9]{3,5}/,"") + # e = e[/[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)(?:arpa|com|edu|firm|gov|int|mil|mobi|nato|net|nom|org|store|web|co|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg.eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|fx|ga|gb|gd|ge|gf|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|in|io|iq|ir|is|it|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nt|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|pt|pw|py|qa|re|ro|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zr|zw)*(?:\.ac|\.ad|\.ae|\.af|\.ag|\.ai|\.al|\.am|\.an|\.ao|\.aq|\.ar|\.as|\.at|\.au|\.aw|\.az|\.ba|\.bb|\.bd|\.be|\.bf|\.bg|\.bh|\.bi|\.bj|\.bm|\.bn|\.bo|\.br|\.bs|\.bt|\.bv|\.bw|\.by|\.bz|\.ca|\.cc|\.cf|\.cg|\.ch|\.ci|\.ck|\.cl|\.cm|\.cn|\.co|\.cr|\.cs|\.cu|\.cv|\.cx|\.cy|\.cz|\.de|\.dj|\.dk|\.dm|\.do|\.dz|\.ec|\.ee|\.eg.eh|\.er|\.es|\.et|\.eu|\.fi|\.fj|\.fk|\.fm|\.fo|\.fr|\.fx|\.ga|\.gb|\.gd|\.ge|\.gf|\.gh|\.gi|\.gl|\.gm|\.gn|\.gp|\.gq|\.gr|\.gs|\.gt|\.gu|\.gw|\.gy|\.hk|\.hm|\.hn|\.hr|\.ht|\.hu|\.id|\.ie|\.il|\.in|\.io|\.iq|\.ir|\.is|\.it|\.jm|\.jo|\.jp|\.ke|\.kg|\.kh|\.ki|\.km|\.kn|\.kp|\.kr|\.kw|\.ky|\.kz|\.la|\.lb|\.lc|\.li|\.lk|\.lr|\.ls|\.lt|\.lu|\.lv|\.ly|\.ma|\.mc|\.md|\.mg|\.mh|\.mk|\.ml|\.mm|\.mn|\.mo|\.mp|\.mq|\.mr|\.ms|\.mt|\.mu|\.mv|\.mw|\.mx|\.my|\.mz|\.na|\.nc|\.ne|\.nf|\.ng|\.ni|\.nl|\.no|\.np|\.nr|\.nt|\.nu|\.nz|\.om|\.pa|\.pe|\.pf|\.pg|\.ph|\.pk|\.pl|\.pm|\.pn|\.pr|\.pt|\.pw|\.py|\.qa|\.re|\.ro|\.ru|\.rw|\.sa|\.sb|\.sc|\.sd|\.se|\.sg|\.sh|\.si|\.sj|\.sk|\.sl|\.sm|\.sn|\.so|\.sr|\.st|\.su|\.sv|\.sy|\.sz|\.tc|\.td|\.tf|\.tg|\.th|\.tj|\.tk|\.tm|\.tn|\.to|\.tp|\.tr|\.tt|\.tv|\.tw|\.tz|\.ua|\.ug|\.uk|\.um|\.us|\.uy|\.uz|\.va|\.vc|\.ve|\.vg|\.vi|\.vn|\.vu|\.wf|\.ws|\.ye|\.yt|\.yu|\.za|\.zm|\.zr|\.zw)?/i] end end @@ -259,8 +266,6 @@ def search_depth search_pdfs @r_pdfs if @r_pdfs search_txts @r_txts if @r_txts search_office_xml @r_officexs if @r_officexs - if RUBY_PLATFORM =~ /mingw|mswin/ - search_docs @r_docs if @r_docs - end + search_docs @r_docs if @r_docs end end \ No newline at end of file