Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
97 lines (77 sloc) 2.26 KB
# encoding: utf-8
require 'pdf-reader'
require 'pp'
class PDFStringReaderDone < RuntimeError; end
class PDFStringReader
attr_accessor :text
attr_accessor :debug
def initialize
@text = ''
end
def add_space(chr = ' ')
@text << chr unless text[-1] =~ /\s/
end
def add_text(text)
@text << ' ' if text =~ /^et/
@text.concat(text)
raise PDFStringReaderDone if text[-8,8] =~ /____/
end
def show_text(text)
p [:show_text, text] if debug
add_text(text)
end
def show_text_with_positioning(data)
p [:show_text_with_positioning, data] if debug
add_text data.select {|d| String === d }.join
end
def move_text_position(w,h)
p [:move_text_position, w,h] if debug
return if w > 10
add_space
end
def move_to_next_line_and_show_text(text)
p [:move_to_next_line_and_show_text, text] if debug
add_space "\n"
add_text(text)
end
def set_spacing_next_line_show_text(text)
p [:set_spacing_next_line_and_show_text, text] if debug
add_space "\n"
end
def set_text_font_and_size(*args)
p [:set_text_font_and_size, *args] if debug
add_space
end
def respond_to?(meth) debug ? true : super end
def method_missing(*args) debug ? p(args) : super end
end
class Attendance
def extract(pdf)
reader = PDFStringReader.new
begin PDF::Reader.file(pdf, reader); rescue PDFStringReaderDone; end
data = {present: [], absent: [], excused: []}
text = reader.text.gsub(/\n/, '')
text.scan(/PRÉSENCE.*?:\s*(.+?)#{END_RE}/m) do |list,|
next if $` =~ /AUTRES?\s+\Z/
data[:present] += clean_list(list)
end
text.scan(/ABSENCE(.*?):\s*(.+?)#{END_RE}/m) do |excused, list|
type = excused[/AVEC MOTIF/] ? :excused : :absent
data[type] += clean_list(list)
end
raise "No present members!" if data[:present].empty?
data
end
private
END_RE = /(?=\.?\s*PRÉSENCE|ABSENCE|AUTRE)/
SPLITTER = /\s*,\s*|\set\s/
LIST_PREFIX = /^\s*Messieurs(\s*et mesdames)?/
NAME_PREFIX = /^(?:M\.|Mme|(?i:mesdame|messieur|madame|monsieur)s?)\s*/
def clean_list(list)
list = list.sub(LIST_PREFIX, '')
list = list.sub(/\.\s*\Z/, '')
list.split(SPLITTER).
map {|v| v.strip.gsub(NAME_PREFIX, '') }.
reject {|v| v.empty? }
end
end