Skip to content
Browse files

Merge branch 'strings'

  • Loading branch information...
2 parents 5a94335 + 2efa21b commit 25258d36371ce5a4d5dd746992817600638500d8 @meeiw meeiw committed Mar 21, 2012
View
2 colander.gemspec
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
s.require_paths = ["lib"]
- s.add_dependency "roo", "~> 1.10.1"
s.add_dependency "zip", "~> 2.0.2"
s.add_development_dependency "rspec"
+ s.add_development_dependency "pry"
end
View
25 lib/colander/parser/base.rb
@@ -8,8 +8,31 @@ def initialize(file_path)
end
def parse
+ @emails = collect_emails
+ rescue Exception => e
+ raise InvalidFile.new e
+ end
+
+ def payload
raise "plz implement me in"
end
+
+ protected
+
+ def collect_emails
+ parse_file.scan(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/).flatten.uniq
+ end
+
+ def parse_file
+ ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
+ std_out, std_err, exit_status = Open3.capture3("strings", :stdin_data => payload)
+ if exit_status == 0
+ ic.iconv(std_out)
+ else
+ raise RuntimeError.new(std_err)
+ end
+ end
+
end
end
-end
+end
View
24 lib/colander/parser/xls.rb
@@ -1,31 +1,15 @@
require 'colander/invalid_file'
require 'colander/parser/base'
-require 'roo'
-require 'iconv'
-
+require 'open3'
module Colander
module Parser
class Xls < Base
- def parse
- spreadsheet = parse_file
- @emails = collect_emails spreadsheet
- rescue Exception => e
- raise InvalidFile.new e
- end
protected
- def parse_file
- Excel.new(@file_path,nil,:ignore)
+ def payload
+ File.read(@file_path)
end
-
- def collect_emails(spreadsheet)
- spreadsheet.sheets.map do |sheet|
- spreadsheet.default_sheet = sheet
- spreadsheet.to_yaml.scan(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/)
- end.flatten
- end
-
end
end
-end
+end
View
18 lib/colander/parser/xlsx.rb
@@ -1,13 +1,21 @@
require 'colander/parser/base'
-require 'roo'
-
+require 'zip'
+require 'iconv'
module Colander
module Parser
class Xlsx < Xls
+
protected
- def parse_file
- Excelx.new(@file_path,nil,:ignore)
+
+ def payload
+ ''.tap do |string|
+ Zip::ZipInputStream::open(@file_path) do |io|
+ while (entry = io.get_next_entry)
+ string << io.read
+ end
+ end
+ end
end
end
end
-end
+end
View
2 lib/colander/version.rb
@@ -1,3 +1,3 @@
module Colander
- VERSION = "0.0.3"
+ VERSION = "0.1.0"
end
View
3 spec/spec_helper.rb
@@ -1,3 +1,4 @@
$:.push File.expand_path("../lib", __FILE__)
-require 'colander'
+require 'colander'
+require 'pry'
View
11 spec/xls_spec.rb
@@ -3,9 +3,8 @@
describe Colander::Parser::Xls do
describe "#parse" do
it "stores found emails" do
- Excel.stub(:new)
parser = Colander::Parser::Xls.new("file/path")
- parser.should_receive(:collect_emails).and_return(["bruce@wayne.com"])
+ parser.should_receive(:parse_file).and_return("bruce@wayne.com")
parser.parse
parser.emails.sort.should eql(["bruce@wayne.com"])
end
@@ -26,21 +25,19 @@
end
it "retreives emails from an 95-excel spreadsheet" do
- pending "handle encoding error"
parser = Colander::Parser::Xls.new("spec/fixtures/excel95.xls")
parser.parse
- parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
+ parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
end
it "retreives emails from an xls spreadsheet" do
parser = Colander::Parser::Xls.new("spec/fixtures/old-format.xls")
parser.parse
- parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
+ parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
end
it "retreives emails from an 95-excel spreadsheet without file suffix" do
- pending "handle encoding error"
parser = Colander::Parser::Xls.new("spec/fixtures/excel95-without-file-suffix")
parser.parse
- parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
+ parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
end
end
end
View
2 spec/xlsx_spec.rb
@@ -8,7 +8,7 @@
it "retreives emails from an xlsx spreadsheet" do
parser = Colander::Parser::Xlsx.new("spec/fixtures/new-format.xlsx")
parser.parse
- parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
+ parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
end
end
end

0 comments on commit 25258d3

Please sign in to comment.
Something went wrong with that request. Please try again.