Skip to content

Commit

Permalink
add block reader
Browse files Browse the repository at this point in the history
  • Loading branch information
geraldb committed Jan 10, 2015
1 parent 79ca4c9 commit d7c90fa
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 2 deletions.
3 changes: 3 additions & 0 deletions Manifest.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ lib/textutils/helper/xml_helper.rb
lib/textutils/page.rb
lib/textutils/parser/name_parser.rb
lib/textutils/patterns.rb
lib/textutils/reader/block_reader.rb
lib/textutils/reader/code_reader.rb
lib/textutils/reader/fixture_reader.rb
lib/textutils/reader/hash_reader.rb
Expand All @@ -33,9 +34,11 @@ lib/textutils/title_mapper.rb
lib/textutils/utils.rb
lib/textutils/version.rb
test/data/cl_all.txt
test/data/feedburner.txt
test/helper.rb
test/test_address_helper.rb
test/test_asciify.rb
test/test_block_reader.rb
test/test_fixture_reader.rb
test/test_hypertext_helper.rb
test/test_slugify.rb
Expand Down
1 change: 1 addition & 0 deletions lib/textutils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
require 'textutils/reader/line_reader'
require 'textutils/reader/values_reader'
require 'textutils/reader/fixture_reader'
require 'textutils/reader/block_reader'

require 'textutils/classifier'
require 'textutils/title' # title table/mapper/finder utils
Expand Down
67 changes: 67 additions & 0 deletions lib/textutils/reader/block_reader.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# encoding: utf-8


# fix: move into TextUtils namespace/module!!

class BlockReader

include LogUtils::Logging

def self.from_file( path )
## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
## - see textutils/utils.rb
text = File.read_utf8( path )
self.from_string( text )
end

def self.from_string( text )
self.new( text )
end

def initialize( text )
@text = text
end

def read
## note returns an array of (line) strings e.g.
## [
## "line1\nline2", ## -- block1
## "line1\nline2\nline3" ## -- block2
## ]

blocks = []
buf = ""

@text.each_line do |line|
# comments allow:
# 1) ##### (shell/ruby style)
if line =~ /^\s*#/
# skip komments and do NOT copy to result (keep comments secret!)
logger.debug 'skipping comment line'
next
end

# if line =~ /^\s*$/
# # kommentar oder leerzeile überspringen
# logger.debug 'skipping blank line'
# next
# end

# pass 2) remove leading and trailing whitespace
line = line.strip

if line =~ /^-{3,}$/ ## three or more lines
logger.debug 'block separator'
blocks << buf.strip ## note: strip leading and trailing whitespace
buf = ""
else
buf << "#{line}\n"
end
end # each lines

blocks << buf.strip ## note: strip leading and trailing whitespace
blocks
end # method read

end # class BlockReader

4 changes: 2 additions & 2 deletions lib/textutils/version.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

module TextUtils

MAJOR = 0 ## todo: namespace inside version or something - why? why not??
MINOR = 10
MAJOR = 1 ## todo: namespace inside version or something - why? why not??
MINOR = 0
PATCH = 0
VERSION = [MAJOR,MINOR,PATCH].join('.')

Expand Down
21 changes: 21 additions & 0 deletions test/data/feedburner.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
####################################
# feedburner text pattern (regex)
#
# pattern (regex)
# ---
# test1
# ---
# test2
# ---
# etc.


<img[^>]*?
src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1
.*?>

---

<img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>


26 changes: 26 additions & 0 deletions test/test_block_reader.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
###
# to run use
# ruby -I ./lib -I ./test test/test_block_reader.rb
# or better
# rake test

require 'helper'


class TestBlockReader < MiniTest::Test

def test_feedburner
blocks = BlockReader.from_file( "#{TextUtils.root}/test/data/feedburner.txt" ).read

## note: regex - use %q - do NOT escape \. or \1 etc.
pattern = %q{<img[^>]*?src=("|')(:?http:)?//feeds\.feedburner\.com/~r/[^>]+?\1.*?>}

test1 = %q{<img src="//feeds.feedburner.com/~r/Rubyflow/~4/1wUDnBztAJY" height="1" width="1" alt=""/>}

assert_equal 2, blocks.size
assert_equal pattern, blocks[0].gsub( /[\n ]/, '' ) ## note: need to remove newlines and spaces
assert_equal test1, blocks[1]
end

end # class TestBlockReader

0 comments on commit d7c90fa

Please sign in to comment.