diff --git a/lib/markup_validity.rb b/lib/markup_validity.rb index 593a89f..7666177 100644 --- a/lib/markup_validity.rb +++ b/lib/markup_validity.rb @@ -9,6 +9,7 @@ class Validator # :nodoc: XHTML1_TRANSITIONAL = Nokogiri::XML::Schema( File.read('xhtml1-transitional.xsd') ) + XHTML1_STRICT = Nokogiri::XML::Schema(File.read('xhtml1-strict.xsd')) end attr_reader :errors @@ -40,4 +41,10 @@ def assert_xhtml_transitional xhtml validator = Validator.new xhtml assert validator.valid?, validator.inspect end + alias :assert_xhtml :assert_xhtml_transitional + + def assert_xhtml_strict xhtml + validator = Validator.new xhtml + assert validator.valid?, validator.inspect + end end diff --git a/lib/markup_validity/rspec.rb b/lib/markup_validity/rspec.rb index e9f944c..3df28b0 100644 --- a/lib/markup_validity/rspec.rb +++ b/lib/markup_validity/rspec.rb @@ -13,5 +13,22 @@ def be_xhtml_transitional end end end + + def be_xhtml_strict + Matcher.new :be_xhtml_strict do + validator = nil + match do |xhtml| + validator = MarkupValidity::Validator.new( + xhtml, + MarkupValidity::Validator::XHTML1_STRICT + ) + validator.valid? + end + + failure_message_for_should do |actual| + validator.inspect + end + end + end end end diff --git a/lib/xhtml1-strict.xsd b/lib/xhtml1-strict.xsd new file mode 100644 index 0000000..206ab49 --- /dev/null +++ b/lib/xhtml1-strict.xsd @@ -0,0 +1,2211 @@ + + + + + + XHTML 1.0 (Second Edition) Strict in XML Schema + + This is the same as HTML 4 Strict except for + changes due to the differences between XML and SGML. + + Namespace = http://www.w3.org/1999/xhtml + + For further information, see: http://www.w3.org/TR/xhtml1 + + Copyright (c) 1998-2002 W3C (MIT, INRIA, Keio), + All Rights Reserved. + + The DTD version is identified by the PUBLIC and SYSTEM identifiers: + + PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" + SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" + + $Id: xhtml1-strict.xsd,v 1.2 2002/08/28 08:05:44 mimasa Exp $ + + + + + + + + ================ Character mnemonic entities ========================= + + XHTML entity sets are identified by the PUBLIC and SYSTEM identifiers: + + PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN" + SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent" + + PUBLIC "-//W3C//ENTITIES Special for XHTML//EN" + SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent" + + PUBLIC "-//W3C//ENTITIES Symbols for XHTML//EN" + SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent" + + + + + + ================== Imported Names ==================================== + + + + + + + media type, as per [RFC2045] + + + + + + + + + comma-separated list of media types, as per [RFC2045] + + + + + + + + + a character encoding, as per [RFC2045] + + + + + + + + + a space separated list of character encodings, as per [RFC2045] + + + + + + + + + a language code, as per [RFC3066] + + + + + + + + + a single character, as per section 2.2 of [XML] + + + + + + + + + + + one or more digits + + + + + + + + + + + tabindex attribute specifies the position of the current element + in the tabbing order for the current document. This value must be + a number between 0 and 32767. User agents should ignore leading zeros. + + + + + + + + + + + + space-separated list of link types + + + + + + + + + single or comma-separated list of media descriptors + + + + + + + + + + + a Uniform Resource Identifier, see [RFC2396] + + + + + + + + + a space separated list of Uniform Resource Identifiers + + + + + + + + + date and time information. ISO date format + + + + + + + + + script expression + + + + + + + + + style sheet data + + + + + + + + + used for titles etc. + + + + + + + + + nn for pixels or nn% for percentage length + + + + + + + + + + + pixel, percentage, or relative + + + + + + + + + + + integer representing length in pixels + + + + + + + + these are used for image maps + + + + + + + + + + + + + + + + comma separated list of lengths + + + + + + + + + + =================== Generic Attributes =============================== + + + + + + + core attributes common to most elements + id document-wide unique id + class space separated list of classes + style associated style info + title advisory title/amplification + + + + + + + + + + + + internationalization attributes + lang language code (backwards compatible) + xml:lang language code (as per XML 1.0 spec) + dir direction for weak/neutral text + + + + + + + + + + + + + + + + + + attributes for common UI events + onclick a pointer button was clicked + ondblclick a pointer button was double clicked + onmousedown a pointer button was pressed down + onmouseup a pointer button was released + onmousemove a pointer was moved onto the element + onmouseout a pointer was moved away from the element + onkeypress a key was pressed and released + onkeydown a key was pressed down + onkeyup a key was released + + + + + + + + + + + + + + + + + + attributes for elements that can get the focus + accesskey accessibility key character + tabindex position in tabbing order + onfocus the element got the focus + onblur the element lost the focus + + + + + + + + + + + + + + + + + =================== Text Elements ==================================== + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + these can only occur at block level + + + + + + + + + + + + + + + + + + + + + + "Inline" covers inline or "text-level" elements + + + + + + + + + + + ================== Block level elements ============================== + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "Flow" mixes block and inline and is used for list items etc. + + + + + + + + + + + + + ================== Content models for exclusions ===================== + + + + + + + a elements use "Inline" excluding a + + + + + + + + + + + + + + + pre uses "Inline" excluding big, small, sup or sup + + + + + + + + + + + + + + + + form uses "Block" excluding form + + + + + + + + + + + + button uses "Flow" but excludes a, form and form controls + + + + + + + + + + + + + + + + + + + ================ Document Structure ================================== + + + + + + + + + + + + + + + + + ================ Document Head ======================================= + + + + + + + + + + + + + + + + + + + content model is "head.misc" combined with a single + title and an optional base element in any order + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The title element is not considered part of the flow of text. + It should be displayed, for example as the page header or + window title. Exactly one title is required per document. + + + + + + + + + + + + document base URI + + + + + + + + + + + + generic metainformation + + + + + + + + + + + + + + + + Relationship values can be used in principle: + + a) for document specific toolbars/menus when used + with the link element in document head e.g. + start, contents, previous, next, index, end, help + b) to link to a separate style sheet (rel="stylesheet") + c) to make a link to a script (rel="script") + d) by stylesheets to control how collections of + html nodes are rendered into printed documents + e) to make a link to a printable version of this document + e.g. a PostScript or PDF version (rel="alternate" media="print") + + + + + + + + + + + + + + + + + + style info, which may include CDATA sections + + + + + + + + + + + + + + + + script statements, which may include CDATA sections + + + + + + + + + + + + + + + + + + + + + + alternate content container for non script-based rendering + + + + + + + + + + + + + + =================== Document Body ==================================== + + + + + + + + + + + + + + + + + + + generic language/style container + + + + + + + + + + + + + + =================== Paragraphs ======================================= + + + + + + + + + + + + + + + + =================== Headings ========================================= + + There are six levels of headings from h1 (the most important) + to h6 (the least important). + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + =================== Lists ============================================ + + + + + + + Unordered list + + + + + + + + + + + + + + Ordered (numbered) list + + + + + + + + + + + + + + list item + + + + + + + + + + + + + + definition lists - dt for term, dd for its definition + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + =================== Address ========================================== + + + + + + + information on author + + + + + + + + + + + + + + =================== Horizontal Rule ================================== + + + + + + + + + + + + =================== Preformatted Text ================================ + + + + + + + content is "Inline" excluding "img|object|big|small|sub|sup" + + + + + + + + + + + + + + + =================== Block-like Quotes ================================ + + + + + + + + + + + + + + + + + =================== Inserted/Deleted Text ============================ + + ins/del are allowed in block and inline content, but its + inappropriate to include block content within an ins element + occurring in inline content. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ================== The Anchor Element ================================ + + + + + + + content is "Inline" except that anchors shouldn't be nested + + + + + + + + + + + + + + + + + + + + + + + + ===================== Inline Elements ================================ + + + + + + + generic language/style container + + + + + + + + + + + + + + + I18N BiDi over-ride + + + + + + + + + + + + + + + + + + + + + + + + + + forced line break + + + + + + + + + + + emphasis + + + + + + + + + + + + + + + strong emphasis + + + + + + + + + + + + + + + definitional + + + + + + + + + + + + + + + program code + + + + + + + + + + + + + + + sample + + + + + + + + + + + + + + + something user would type + + + + + + + + + + + + + + + variable + + + + + + + + + + + + + + + citation + + + + + + + + + + + + + + + abbreviation + + + + + + + + + + + + + + + acronym + + + + + + + + + + + + + + + inlined quote + + + + + + + + + + + + + + + + subscript + + + + + + + + + + + + + + + superscript + + + + + + + + + + + + + + + fixed pitch font + + + + + + + + + + + + + + + italic font + + + + + + + + + + + + + + + bold font + + + + + + + + + + + + + + + bigger font + + + + + + + + + + + + + + + smaller font + + + + + + + + + + + + + + ==================== Object ====================================== + + object is used to embed objects as part of HTML pages. + param elements should precede other content. Parameters + can also be expressed as attribute/value pairs on the + object element itself when brevity is desired. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + param is used to supply a named property value. + In XML it would seem natural to follow RDF and support an + abbreviated syntax where the param elements are replaced + by attribute value pairs on the object start tag. + + + + + + + + + + + + + + + + + + + + + + =================== Images =========================================== + + To avoid accessibility problems for people who aren't + able to see the image, you should provide a text + description using the alt and longdesc attributes. + In addition, avoid the use of server-side image maps. + Note that in this DTD there is no name attribute. That + is only available in the transitional and frameset DTD. + + + + + + + + + + + + + + + usemap points to a map element which may be in this document + or an external document, although the latter is not widely supported + + + + + + + + + + + + + + + + ================== Client-side image maps ============================ + + These can be placed in the same document or grouped in a + separate document although this isn't yet widely supported + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ================ Forms =============================================== + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each label must not contain more than ONE field + Label elements shouldn't be nested. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + form control + + + + + + + + + + the name attribute is required for all but submit & reset + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + option selector + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + option group + + + + + + + + + + + + + + + + + + + + + + selectable choice + + + + + + + + + + + + + + + + + + + + + + + + + + + multi-line text field + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The fieldset element is used to group form fields. + Only one legend element should occur in the content + and if present should only be preceded by whitespace. + + NOTE: this content model is different from the XHTML 1.0 DTD, + closer to the intended content model in HTML4 DTD + + + + + + + + + + + + + + + + + + + + fieldset label + + + + + + + + + + + + + + + + Content is "Flow" excluding a, form and form controls + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ======================= Tables ======================================= + + Derived from IETF HTML table standard, see [RFC1942] + + + + + + + The border attribute sets the thickness of the frame around the + table. The default units are screen pixels. + + The frame attribute specifies which parts of the frame around + the table should be rendered. The values are not the same as + CALS to avoid a name clash with the valign attribute. + + + + + + + + + + + + + + + + + + + The rules attribute defines which rules to draw between cells: + + If rules is absent then assume: + "none" if border is absent or border="0" otherwise "all" + + + + + + + + + + + + + + + horizontal alignment attributes for cell contents + + char alignment char, e.g. char=':' + charoff offset for alignment char + + + + + + + + + + + + + + + + + + + + + vertical alignment attributes for cell contents + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Use thead to duplicate headers when breaking table + across page boundaries, or for static headers when + tbody sections are rendered in scrolling panel. + + Use tfoot to duplicate footers when breaking table + across page boundaries, or for static footers when + tbody sections are rendered in scrolling panel. + + Use multiple tbody sections when rules are needed + between groups of table rows. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + colgroup groups a set of col elements. It allows you to group + several semantically related columns together. + + + + + + + + + + + + + + + + + + col elements define the alignment properties for cells in + one or more columns. + + The width attribute specifies the width of the columns, e.g. + + width=64 width in screen pixels + width=0.5* relative width of 0.5 + + The span attribute causes the attributes of one + col element to apply to more than one column. + + + + + + + + + + + + + + + + + + + + + + + + + + + Scope is simpler than headers attribute for common tables + + + + + + + + + + + + + th is for headers, td for data and for cells acting as both + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/spec/matcher_spec.rb b/spec/matcher_spec.rb index a944e68..c19ac52 100644 --- a/spec/matcher_spec.rb +++ b/spec/matcher_spec.rb @@ -1,33 +1,25 @@ -$LOAD_PATH << File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib')) require 'rubygems' require 'markup_validity' +require 'test/helper' module Spec module Matchers + extend MarkupValidity::TestHelper describe "[actual.should] be_xhtml_transitional" do - it "is valid xhtml" do - xhtml = <<-eoxhtml - - - hello world - - - - - eoxhtml - xhtml.should be_xhtml_transitional + it "is transitional xhtml" do + Matchers.valid_document.should be_xhtml_transitional end - it "is not valid xhtml" do - xhtml = <<-eoxhtml - - - - - - - eoxhtml - xhtml.should_not be_xhtml_transitional + it "is strict xhtml" do + Matchers.valid_document.should be_xhtml_strict + end + + it "is not transitional xhtml" do + Matchers.invalid_document.should_not be_xhtml_transitional + end + + it "is not strict xhtml" do + Matchers.invalid_document.should_not be_xhtml_strict end end end diff --git a/test/helper.rb b/test/helper.rb new file mode 100644 index 0000000..e6e18ea --- /dev/null +++ b/test/helper.rb @@ -0,0 +1,26 @@ +module MarkupValidity + module TestHelper + def valid_document + <<-eoxhtml + + + hello world + + + + + eoxhtml + end + + def invalid_document + <<-eoxhtml + + + + + + + eoxhtml + end + end +end diff --git a/test/test_markup_validity.rb b/test/test_markup_validity.rb index d62f74b..16bfcd6 100644 --- a/test/test_markup_validity.rb +++ b/test/test_markup_validity.rb @@ -1,7 +1,10 @@ require "test/unit" +require 'helper' require "markup_validity" class TestMarkupValidity < Test::Unit::TestCase + include MarkupValidity::TestHelper + class FakeUnit include MarkupValidity @@ -16,33 +19,29 @@ def assert *args end end + def setup + @fu = FakeUnit.new + end + def test_valid_xhtml - fu = FakeUnit.new - fu.assert_xhtml_transitional <<-eoxhtml - - - hello world - - - - - eoxhtml - - assert_equal [true, ''], fu.assertions.first + @fu.assert_xhtml_transitional valid_document + assert_equal [true, ''], @fu.assertions.first + end + + def test_valid_xhtml_strict + @fu.assert_xhtml_strict valid_document + assert_equal [true, ''], @fu.assertions.first end def test_invalid_xhtml - fu = FakeUnit.new - fu.assert_xhtml_transitional <<-eoxhtml - - - - - - - eoxhtml - - assert_equal false, fu.assertions.first.first - assert_match('Missing child element', fu.assertions.first.last) + @fu.assert_xhtml_transitional invalid_document + assert_equal false, @fu.assertions.first.first + assert_match('Missing child element', @fu.assertions.first.last) + end + + def test_invalid_xhtml_strict + @fu.assert_xhtml_strict invalid_document + assert_equal false, @fu.assertions.first.first + assert_match('Missing child element', @fu.assertions.first.last) end end