diff --git a/lib/markup_validity.rb b/lib/markup_validity.rb
index 593a89f..7666177 100644
--- a/lib/markup_validity.rb
+++ b/lib/markup_validity.rb
@@ -9,6 +9,7 @@ class Validator # :nodoc:
XHTML1_TRANSITIONAL = Nokogiri::XML::Schema(
File.read('xhtml1-transitional.xsd')
)
+ XHTML1_STRICT = Nokogiri::XML::Schema(File.read('xhtml1-strict.xsd'))
end
attr_reader :errors
@@ -40,4 +41,10 @@ def assert_xhtml_transitional xhtml
validator = Validator.new xhtml
assert validator.valid?, validator.inspect
end
+ alias :assert_xhtml :assert_xhtml_transitional
+
+ def assert_xhtml_strict xhtml
+ validator = Validator.new xhtml
+ assert validator.valid?, validator.inspect
+ end
end
diff --git a/lib/markup_validity/rspec.rb b/lib/markup_validity/rspec.rb
index e9f944c..3df28b0 100644
--- a/lib/markup_validity/rspec.rb
+++ b/lib/markup_validity/rspec.rb
@@ -13,5 +13,22 @@ def be_xhtml_transitional
end
end
end
+
+ def be_xhtml_strict
+ Matcher.new :be_xhtml_strict do
+ validator = nil
+ match do |xhtml|
+ validator = MarkupValidity::Validator.new(
+ xhtml,
+ MarkupValidity::Validator::XHTML1_STRICT
+ )
+ validator.valid?
+ end
+
+ failure_message_for_should do |actual|
+ validator.inspect
+ end
+ end
+ end
end
end
diff --git a/lib/xhtml1-strict.xsd b/lib/xhtml1-strict.xsd
new file mode 100644
index 0000000..206ab49
--- /dev/null
+++ b/lib/xhtml1-strict.xsd
@@ -0,0 +1,2211 @@
+
+
+
+
+
+ XHTML 1.0 (Second Edition) Strict in XML Schema
+
+ This is the same as HTML 4 Strict except for
+ changes due to the differences between XML and SGML.
+
+ Namespace = http://www.w3.org/1999/xhtml
+
+ For further information, see: http://www.w3.org/TR/xhtml1
+
+ Copyright (c) 1998-2002 W3C (MIT, INRIA, Keio),
+ All Rights Reserved.
+
+ The DTD version is identified by the PUBLIC and SYSTEM identifiers:
+
+ PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
+
+ $Id: xhtml1-strict.xsd,v 1.2 2002/08/28 08:05:44 mimasa Exp $
+
+
+
+
+
+
+
+ ================ Character mnemonic entities =========================
+
+ XHTML entity sets are identified by the PUBLIC and SYSTEM identifiers:
+
+ PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
+ SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"
+
+ PUBLIC "-//W3C//ENTITIES Special for XHTML//EN"
+ SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent"
+
+ PUBLIC "-//W3C//ENTITIES Symbols for XHTML//EN"
+ SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent"
+
+
+
+
+
+ ================== Imported Names ====================================
+
+
+
+
+
+
+ media type, as per [RFC2045]
+
+
+
+
+
+
+
+
+ comma-separated list of media types, as per [RFC2045]
+
+
+
+
+
+
+
+
+ a character encoding, as per [RFC2045]
+
+
+
+
+
+
+
+
+ a space separated list of character encodings, as per [RFC2045]
+
+
+
+
+
+
+
+
+ a language code, as per [RFC3066]
+
+
+
+
+
+
+
+
+ a single character, as per section 2.2 of [XML]
+
+
+
+
+
+
+
+
+
+
+ one or more digits
+
+
+
+
+
+
+
+
+
+
+ tabindex attribute specifies the position of the current element
+ in the tabbing order for the current document. This value must be
+ a number between 0 and 32767. User agents should ignore leading zeros.
+
+
+
+
+
+
+
+
+
+
+
+ space-separated list of link types
+
+
+
+
+
+
+
+
+ single or comma-separated list of media descriptors
+
+
+
+
+
+
+
+
+
+
+ a Uniform Resource Identifier, see [RFC2396]
+
+
+
+
+
+
+
+
+ a space separated list of Uniform Resource Identifiers
+
+
+
+
+
+
+
+
+ date and time information. ISO date format
+
+
+
+
+
+
+
+
+ script expression
+
+
+
+
+
+
+
+
+ style sheet data
+
+
+
+
+
+
+
+
+ used for titles etc.
+
+
+
+
+
+
+
+
+ nn for pixels or nn% for percentage length
+
+
+
+
+
+
+
+
+
+
+ pixel, percentage, or relative
+
+
+
+
+
+
+
+
+
+
+ integer representing length in pixels
+
+
+
+
+
+
+
+ these are used for image maps
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ comma separated list of lengths
+
+
+
+
+
+
+
+
+
+ =================== Generic Attributes ===============================
+
+
+
+
+
+
+ core attributes common to most elements
+ id document-wide unique id
+ class space separated list of classes
+ style associated style info
+ title advisory title/amplification
+
+
+
+
+
+
+
+
+
+
+
+ internationalization attributes
+ lang language code (backwards compatible)
+ xml:lang language code (as per XML 1.0 spec)
+ dir direction for weak/neutral text
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ attributes for common UI events
+ onclick a pointer button was clicked
+ ondblclick a pointer button was double clicked
+ onmousedown a pointer button was pressed down
+ onmouseup a pointer button was released
+ onmousemove a pointer was moved onto the element
+ onmouseout a pointer was moved away from the element
+ onkeypress a key was pressed and released
+ onkeydown a key was pressed down
+ onkeyup a key was released
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ attributes for elements that can get the focus
+ accesskey accessibility key character
+ tabindex position in tabbing order
+ onfocus the element got the focus
+ onblur the element lost the focus
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ =================== Text Elements ====================================
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ these can only occur at block level
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ "Inline" covers inline or "text-level" elements
+
+
+
+
+
+
+
+
+
+
+ ================== Block level elements ==============================
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ "Flow" mixes block and inline and is used for list items etc.
+
+
+
+
+
+
+
+
+
+
+
+
+ ================== Content models for exclusions =====================
+
+
+
+
+
+
+ a elements use "Inline" excluding a
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ pre uses "Inline" excluding big, small, sup or sup
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ form uses "Block" excluding form
+
+
+
+
+
+
+
+
+
+
+
+ button uses "Flow" but excludes a, form and form controls
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ================ Document Structure ==================================
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ================ Document Head =======================================
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ content model is "head.misc" combined with a single
+ title and an optional base element in any order
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The title element is not considered part of the flow of text.
+ It should be displayed, for example as the page header or
+ window title. Exactly one title is required per document.
+
+
+
+
+
+
+
+
+
+
+
+ document base URI
+
+
+
+
+
+
+
+
+
+
+
+ generic metainformation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Relationship values can be used in principle:
+
+ a) for document specific toolbars/menus when used
+ with the link element in document head e.g.
+ start, contents, previous, next, index, end, help
+ b) to link to a separate style sheet (rel="stylesheet")
+ c) to make a link to a script (rel="script")
+ d) by stylesheets to control how collections of
+ html nodes are rendered into printed documents
+ e) to make a link to a printable version of this document
+ e.g. a PostScript or PDF version (rel="alternate" media="print")
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ style info, which may include CDATA sections
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ script statements, which may include CDATA sections
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ alternate content container for non script-based rendering
+
+
+
+
+
+
+
+
+
+
+
+
+
+ =================== Document Body ====================================
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ generic language/style container
+
+
+
+
+
+
+
+
+
+
+
+
+
+ =================== Paragraphs =======================================
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ =================== Headings =========================================
+
+ There are six levels of headings from h1 (the most important)
+ to h6 (the least important).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ =================== Lists ============================================
+
+
+
+
+
+
+ Unordered list
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Ordered (numbered) list
+
+
+
+
+
+
+
+
+
+
+
+
+
+ list item
+
+
+
+
+
+
+
+
+
+
+
+
+
+ definition lists - dt for term, dd for its definition
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ =================== Address ==========================================
+
+
+
+
+
+
+ information on author
+
+
+
+
+
+
+
+
+
+
+
+
+
+ =================== Horizontal Rule ==================================
+
+
+
+
+
+
+
+
+
+
+
+ =================== Preformatted Text ================================
+
+
+
+
+
+
+ content is "Inline" excluding "img|object|big|small|sub|sup"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ =================== Block-like Quotes ================================
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ =================== Inserted/Deleted Text ============================
+
+ ins/del are allowed in block and inline content, but its
+ inappropriate to include block content within an ins element
+ occurring in inline content.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ================== The Anchor Element ================================
+
+
+
+
+
+
+ content is "Inline" except that anchors shouldn't be nested
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ===================== Inline Elements ================================
+
+
+
+
+
+
+ generic language/style container
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ I18N BiDi over-ride
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ forced line break
+
+
+
+
+
+
+
+
+
+
+ emphasis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ strong emphasis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ definitional
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ program code
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ sample
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ something user would type
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ variable
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ citation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ abbreviation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ acronym
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ inlined quote
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ subscript
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ superscript
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ fixed pitch font
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ italic font
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ bold font
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ bigger font
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ smaller font
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ==================== Object ======================================
+
+ object is used to embed objects as part of HTML pages.
+ param elements should precede other content. Parameters
+ can also be expressed as attribute/value pairs on the
+ object element itself when brevity is desired.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ param is used to supply a named property value.
+ In XML it would seem natural to follow RDF and support an
+ abbreviated syntax where the param elements are replaced
+ by attribute value pairs on the object start tag.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ =================== Images ===========================================
+
+ To avoid accessibility problems for people who aren't
+ able to see the image, you should provide a text
+ description using the alt and longdesc attributes.
+ In addition, avoid the use of server-side image maps.
+ Note that in this DTD there is no name attribute. That
+ is only available in the transitional and frameset DTD.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ usemap points to a map element which may be in this document
+ or an external document, although the latter is not widely supported
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ================== Client-side image maps ============================
+
+ These can be placed in the same document or grouped in a
+ separate document although this isn't yet widely supported
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ================ Forms ===============================================
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each label must not contain more than ONE field
+ Label elements shouldn't be nested.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ form control
+
+
+
+
+
+
+
+
+
+ the name attribute is required for all but submit & reset
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ option selector
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ option group
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ selectable choice
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ multi-line text field
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The fieldset element is used to group form fields.
+ Only one legend element should occur in the content
+ and if present should only be preceded by whitespace.
+
+ NOTE: this content model is different from the XHTML 1.0 DTD,
+ closer to the intended content model in HTML4 DTD
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ fieldset label
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Content is "Flow" excluding a, form and form controls
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ======================= Tables =======================================
+
+ Derived from IETF HTML table standard, see [RFC1942]
+
+
+
+
+
+
+ The border attribute sets the thickness of the frame around the
+ table. The default units are screen pixels.
+
+ The frame attribute specifies which parts of the frame around
+ the table should be rendered. The values are not the same as
+ CALS to avoid a name clash with the valign attribute.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The rules attribute defines which rules to draw between cells:
+
+ If rules is absent then assume:
+ "none" if border is absent or border="0" otherwise "all"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ horizontal alignment attributes for cell contents
+
+ char alignment char, e.g. char=':'
+ charoff offset for alignment char
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ vertical alignment attributes for cell contents
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Use thead to duplicate headers when breaking table
+ across page boundaries, or for static headers when
+ tbody sections are rendered in scrolling panel.
+
+ Use tfoot to duplicate footers when breaking table
+ across page boundaries, or for static footers when
+ tbody sections are rendered in scrolling panel.
+
+ Use multiple tbody sections when rules are needed
+ between groups of table rows.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ colgroup groups a set of col elements. It allows you to group
+ several semantically related columns together.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ col elements define the alignment properties for cells in
+ one or more columns.
+
+ The width attribute specifies the width of the columns, e.g.
+
+ width=64 width in screen pixels
+ width=0.5* relative width of 0.5
+
+ The span attribute causes the attributes of one
+ col element to apply to more than one column.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Scope is simpler than headers attribute for common tables
+
+
+
+
+
+
+
+
+
+
+
+
+ th is for headers, td for data and for cells acting as both
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/spec/matcher_spec.rb b/spec/matcher_spec.rb
index a944e68..c19ac52 100644
--- a/spec/matcher_spec.rb
+++ b/spec/matcher_spec.rb
@@ -1,33 +1,25 @@
-$LOAD_PATH << File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
require 'rubygems'
require 'markup_validity'
+require 'test/helper'
module Spec
module Matchers
+ extend MarkupValidity::TestHelper
describe "[actual.should] be_xhtml_transitional" do
- it "is valid xhtml" do
- xhtml = <<-eoxhtml
-
-
- hello world
-
-
-
-
- eoxhtml
- xhtml.should be_xhtml_transitional
+ it "is transitional xhtml" do
+ Matchers.valid_document.should be_xhtml_transitional
end
- it "is not valid xhtml" do
- xhtml = <<-eoxhtml
-
-
-
-
-
-
- eoxhtml
- xhtml.should_not be_xhtml_transitional
+ it "is strict xhtml" do
+ Matchers.valid_document.should be_xhtml_strict
+ end
+
+ it "is not transitional xhtml" do
+ Matchers.invalid_document.should_not be_xhtml_transitional
+ end
+
+ it "is not strict xhtml" do
+ Matchers.invalid_document.should_not be_xhtml_strict
end
end
end
diff --git a/test/helper.rb b/test/helper.rb
new file mode 100644
index 0000000..e6e18ea
--- /dev/null
+++ b/test/helper.rb
@@ -0,0 +1,26 @@
+module MarkupValidity
+ module TestHelper
+ def valid_document
+ <<-eoxhtml
+
+
+ hello world
+
+
+
+
+ eoxhtml
+ end
+
+ def invalid_document
+ <<-eoxhtml
+
+
+
+
+
+
+ eoxhtml
+ end
+ end
+end
diff --git a/test/test_markup_validity.rb b/test/test_markup_validity.rb
index d62f74b..16bfcd6 100644
--- a/test/test_markup_validity.rb
+++ b/test/test_markup_validity.rb
@@ -1,7 +1,10 @@
require "test/unit"
+require 'helper'
require "markup_validity"
class TestMarkupValidity < Test::Unit::TestCase
+ include MarkupValidity::TestHelper
+
class FakeUnit
include MarkupValidity
@@ -16,33 +19,29 @@ def assert *args
end
end
+ def setup
+ @fu = FakeUnit.new
+ end
+
def test_valid_xhtml
- fu = FakeUnit.new
- fu.assert_xhtml_transitional <<-eoxhtml
-
-
- hello world
-
-
-
-
- eoxhtml
-
- assert_equal [true, ''], fu.assertions.first
+ @fu.assert_xhtml_transitional valid_document
+ assert_equal [true, ''], @fu.assertions.first
+ end
+
+ def test_valid_xhtml_strict
+ @fu.assert_xhtml_strict valid_document
+ assert_equal [true, ''], @fu.assertions.first
end
def test_invalid_xhtml
- fu = FakeUnit.new
- fu.assert_xhtml_transitional <<-eoxhtml
-
-
-
-
-
-
- eoxhtml
-
- assert_equal false, fu.assertions.first.first
- assert_match('Missing child element', fu.assertions.first.last)
+ @fu.assert_xhtml_transitional invalid_document
+ assert_equal false, @fu.assertions.first.first
+ assert_match('Missing child element', @fu.assertions.first.last)
+ end
+
+ def test_invalid_xhtml_strict
+ @fu.assert_xhtml_strict invalid_document
+ assert_equal false, @fu.assertions.first.first
+ assert_match('Missing child element', @fu.assertions.first.last)
end
end