Skip to content

Commit

Permalink
Merge branch 'strings'
Browse files Browse the repository at this point in the history
  • Loading branch information
Kristian Hellquist committed Mar 21, 2012
2 parents 5a94335 + 2efa21b commit 25258d3
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 37 deletions.
2 changes: 1 addition & 1 deletion colander.gemspec
Expand Up @@ -19,7 +19,7 @@ Gem::Specification.new do |s|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
s.require_paths = ["lib"]

s.add_dependency "roo", "~> 1.10.1"
s.add_dependency "zip", "~> 2.0.2"
s.add_development_dependency "rspec"
s.add_development_dependency "pry"
end
25 changes: 24 additions & 1 deletion lib/colander/parser/base.rb
Expand Up @@ -8,8 +8,31 @@ def initialize(file_path)
end

def parse
@emails = collect_emails
rescue Exception => e
raise InvalidFile.new e
end

def payload
raise "plz implement me in"
end

protected

def collect_emails
parse_file.scan(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/).flatten.uniq
end

def parse_file
ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
std_out, std_err, exit_status = Open3.capture3("strings", :stdin_data => payload)
if exit_status == 0
ic.iconv(std_out)
else
raise RuntimeError.new(std_err)
end
end

end
end
end
end
24 changes: 4 additions & 20 deletions lib/colander/parser/xls.rb
@@ -1,31 +1,15 @@
require 'colander/invalid_file'
require 'colander/parser/base'
require 'roo'
require 'iconv'

require 'open3'
module Colander
module Parser
class Xls < Base
def parse
spreadsheet = parse_file
@emails = collect_emails spreadsheet
rescue Exception => e
raise InvalidFile.new e
end

protected

def parse_file
Excel.new(@file_path,nil,:ignore)
def payload
File.read(@file_path)
end

def collect_emails(spreadsheet)
spreadsheet.sheets.map do |sheet|
spreadsheet.default_sheet = sheet
spreadsheet.to_yaml.scan(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/)
end.flatten
end

end
end
end
end
18 changes: 13 additions & 5 deletions lib/colander/parser/xlsx.rb
@@ -1,13 +1,21 @@
require 'colander/parser/base'
require 'roo'

require 'zip'
require 'iconv'
module Colander
module Parser
class Xlsx < Xls

protected
def parse_file
Excelx.new(@file_path,nil,:ignore)

def payload
''.tap do |string|
Zip::ZipInputStream::open(@file_path) do |io|
while (entry = io.get_next_entry)
string << io.read
end
end
end
end
end
end
end
end
2 changes: 1 addition & 1 deletion lib/colander/version.rb
@@ -1,3 +1,3 @@
module Colander
VERSION = "0.0.3"
VERSION = "0.1.0"
end
3 changes: 2 additions & 1 deletion spec/spec_helper.rb
@@ -1,3 +1,4 @@
$:.push File.expand_path("../lib", __FILE__)

require 'colander'
require 'colander'
require 'pry'
11 changes: 4 additions & 7 deletions spec/xls_spec.rb
Expand Up @@ -3,9 +3,8 @@
describe Colander::Parser::Xls do
describe "#parse" do
it "stores found emails" do
Excel.stub(:new)
parser = Colander::Parser::Xls.new("file/path")
parser.should_receive(:collect_emails).and_return(["bruce@wayne.com"])
parser.should_receive(:parse_file).and_return("bruce@wayne.com")
parser.parse
parser.emails.sort.should eql(["bruce@wayne.com"])
end
Expand All @@ -26,21 +25,19 @@
end

it "retreives emails from an 95-excel spreadsheet" do
pending "handle encoding error"
parser = Colander::Parser::Xls.new("spec/fixtures/excel95.xls")
parser.parse
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
end
it "retreives emails from an xls spreadsheet" do
parser = Colander::Parser::Xls.new("spec/fixtures/old-format.xls")
parser.parse
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
end
it "retreives emails from an 95-excel spreadsheet without file suffix" do
pending "handle encoding error"
parser = Colander::Parser::Xls.new("spec/fixtures/excel95-without-file-suffix")
parser.parse
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
end
end
end
2 changes: 1 addition & 1 deletion spec/xlsx_spec.rb
Expand Up @@ -8,7 +8,7 @@
it "retreives emails from an xlsx spreadsheet" do
parser = Colander::Parser::Xlsx.new("spec/fixtures/new-format.xlsx")
parser.parse
parser.emails.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"])
parser.emails.sort.should eql(["markus.nordin@mynewsdesk.com", "markus@hej.se", "sven@bertil.se", "Adam.A@hotmail.com", "apa@elabs.se", "liam@neeson.net", "david@mynewsdesk.com"].sort)
end
end
end

0 comments on commit 25258d3

Please sign in to comment.