require 'nokogiri'
doc = Nokogiri::HTML ARGF'article, section, figure, figcaption, hgroup, mark').reverse.each do |elem|
type =
type = 'caption' if type == 'figcaption' = type == 'mark' ? 'span' : 'div'
classnames = elem['class'].to_s.lstrip.split(/\s+/)
unless classnames.include? type
classnames << type
elem['class'] = classnames.join(' ')
puts doc
