tools/docs/mk_html_docs.pl

#! perl
# Copyright (C) 2010-2011, Parrot Foundation.

=head1 NAME

tools/docs/mk_html_docs.pl - Write HTML documentation

=head1 SYNOPSIS

    % perl tools/docs/mk_html_docs.pl [--version=VERSION]

=head1 DESCRIPTION

This script writes the HTML documentation for Parrot by transforming existing POD
files into pretty HTML output. Index pages are also generated.

This script will look in the F<docs/index> directory for JSON files.  Each JSON
file will generate a separate "table of contents" page, and each referenced POD
file will generate a HTML page. Referenced files must reside within the Parrot
repo, but they may be located outside of the F</docs> directory.

=head2 JSON Syntax

Each JSON file should look similar to the following:

    {
       "page" : "index",
       "content" : [
          {
             "title" : "Introduction",
             "source" : [
                "docs/intro.pod",
                "docs/project/*.pod"
             ]
          },
          {
             "title" : "Working with Parrot",
             "source" : [
                "docs/running.pod",
                "docs/tests.pod",
                "docs/gettingstarted.pod",
                "docs/submissions.pod"
             ],
             "resource" : "docs/*.png"
          },
    }

=head3 Elements

=over

=item C<page>

The name of this page. It must be unique for all JSON pages; the generated HTML
will reside at "$target_dir/$page.html", where C<$target_dir> is the
destination for all generated content.

Each page must contain a C<content> element.

=item C<content>

An array of sections shown for this page.

Each section must be a hash that contains a C<title> and C<source> entry.

=item C<title>

The title of each section. It need not be unique.

=item C<source>

A listing of POD files. This may be an array or a string; a string will behave as
an array of one element.

Each element in the array must be a path relative to the Parrot repo, such as
"docs/pmc/default.pmc". Globbing is supported, so paths liek "docs/pmc/*.pmc" are
also allowed.

=item C<resource>

An optional listing of files that should be copied directly to C<$target_dir>. This
is useful for things like images that require no transformation, but should be accessible
to generated output.

This element behaves similarly to C<source>: a string or array may be passed, and globbing
is performed for each element.

=back

=cut

use strict;
use warnings;

# as long as we have a separate docs/ makefile, this will be run from
# inside it. So jump up a dir to catch all the files.
chdir '..';

use lib 'lib';
use Fatal qw/open close/;

use File::Basename qw/basename dirname/;
use File::Path;
use File::Copy;
use File::Spec;
use Getopt::Long;
use JSON;
use Parrot::Docs::PodToHtml;
use Parrot::Docs::HTMLPage;

my $version='';

GetOptions( 'version=s' => \$version );

my $target_dir   = 'docs/html';
my $resource_dir = '../resources';

my $json = JSON->new();

# Transform the json
my %pages;
my @json_index_files = glob 'docs/index/*.json';
foreach my $index_file (@json_index_files) {
    my $contents;
    open my $fh, '<', $index_file;
    { local $/; $contents = <$fh> }
    my $section = '';
    eval {
        $section = $json->decode($contents);
    };
    if ($@) {
        print STDERR "Error in $index_file:\n";
        die $@;
    }

    my $outfile = $section->{page} . '.html';
    my $title   = $section->{title};

    foreach my $chunk (@{$section->{content}}) {
        my @sources_list = canonicalize_files($chunk->{source});
        my @resources_list = canonicalize_files($chunk->{resource});

        my %sources;
        foreach my $file (@sources_list) {
            $sources{$file} = 1;
        }

        # These are only literals, no globs (for now?)
        if (exists $chunk->{exclude}) {
            foreach my $exclusion (@{$chunk->{exclude}}) {
                delete $sources{$exclusion}
            }
            # remove exclusions from @sources_list
            my @no_exclusions;
            foreach my $not_excluded (@sources_list) {
                push @no_exclusions, ($not_excluded) if $sources{$not_excluded};
            }
            @sources_list = @no_exclusions;
        }
        $chunk->{input_files} = [keys %sources];
        $chunk->{sorted_list} = \@sources_list;
        $chunk->{resources} = \@resources_list;
    }
    $pages{lc $section->{page}} = $section;
}

foreach my $page (keys %pages) {
    $page = $pages{$page};
    foreach my $section (@{$page->{content}}) {
        foreach my $source (@{$section->{input_files}}) {
            if ($source =~ /^:(.*)/) {
                # Indicates link to other page.
                my $link = lc $1;
                if (! exists $pages{$link}) {
                    die "invalid link $source specified.\n";
                }
                # assuming a link only in one page
                $pages{$link}->{parent} = $page;
            }
            else {
                transform_input($source, $page->{page}, $page->{title});
            }
        }
        foreach my $resource (@{$section->{resources}}) {
            my $outfile = File::Spec->catfile($target_dir, $resource);
            File::Path::mkpath(File::Basename::dirname($outfile));
            File::Copy::copy($resource, $outfile);
        }
    }
}


my %file_titles;
#
# generate index/header pages
#

foreach my $page (keys %pages) {
    my $outfilename = $page;
    $page = $pages{$page};
    my $title = $page->{title};

    my $outfile = File::Spec->catfile($target_dir, $outfilename) . '.html';

    # replace make_path with legacy functional mkpath to accommodate older versions of Perl
    my $dir = File::Path::mkpath(File::Basename::dirname($outfile));

    open my $out_fh, '>', $outfile;

    # set up and output header
    my $nav_HTML = qq{<a href="index.html">Home</a>};
    print $out_fh Parrot::Docs::HTMLPage->header($title, $nav_HTML, $resource_dir, $version);

    foreach my $section (@{$page->{content}}) {
        # output Section title
        print $out_fh "<h2>$section->{title}</h2>\n\n<ul>";

        foreach my $source (@{$section->{sorted_list}}) {
            if ($source =~ /^:(.*)/) {
                my $link = lc $1;
                # output link with title
                print $out_fh qq(<li><a href="$link.html">$pages{$link}->{title}</a></li>\n);
            }
            else {
                # output source and name (from title)
                print $out_fh qq(<li><a href="$source.html">$file_titles{$source}</a></li>\n);
            }
        }
        print $out_fh "</ul>\n\n";
    }

    # ========================================================================#
    # Note: The below is a'bit of a hack to output html links directly into   #
    #       the 'index.html' file.  The reason for this hackery is, this      #
    #       script uses json formatted files to pull in various *.pod files   #
    #       in order to create the 'index.html' (along with other *.html      #
    #       pages.  Unfortunately, there is no simple way to insert html      #
    #       markup directly into the 'index.html' file; hence, the below      #
    #       bits.  -- acy 01/25/12                                            #
    #                                                                         #
    # Note: We only want to do this with the 'index.html' page, and NOT any   #
    #       of the others pages (e.g., 'ops.html' ... 'pct_tutorial.html').   #
    # ========================================================================#
    if ($outfilename eq "index") {
      my $title = "<h2>Development Languages</h2>\n\n";
      my $lang1 = "<ul><li><a href=\"http://whiteknight.github.com/Rosella/winxed/index.html\">The Winxed Programming Language</a></li>\n";
      my $lang2 = "<li><a href=\"https://github.com/perl6/nqp\">The NQP Programming Language</a></li>\n</ul>\n\n";
      print $out_fh Parrot::Docs::HTMLPage->body($title, $lang1, $lang2);
    }

    # output footer
    print $out_fh Parrot::Docs::HTMLPage->footer('', $resource_dir, $version);
}

exit 0;

=head2 Utility Methods

=over

=item C<canonicalize_files($json_chunk)>

Process a given JSON chunk to retrieve a list of input files. Currently, this
is used to retrieve input POD files and static images.

The JSON chunk may be an array or a string (which is transformed into a
single-element array).

Each array element should be a file path relative to the parrot repo; it is not
necessary for resources to live under docs/. Globs are also allowed, so you can
include many files at once.

=cut

sub canonicalize_files {
    my ($file_chunk) = @_;

    my @raw_files;
    if (ref $file_chunk eq "ARRAY" ) {
        @raw_files = @{$file_chunk};
    }
    elsif ($file_chunk) {
        push @raw_files, $file_chunk;
    };

    my @files_list;

    foreach my $file_elem (@raw_files) {
        foreach my $file (sort glob($file_elem)) {
            push @files_list, ($file)
        }
    }

    return @files_list;
}


=item C<transform_input($input, $parent, $parent_title)>

Transform the specfied POD file into HTML. C<$input> should be a path to the
POD file, relative to the Parrot repo (e.g., "src/pmc/default.pmc"). C<$parent>
and C<$parent_title> both refer to the parent's "page" JSON chunk and "title"
JSON chunk, respectively. Parent information will be used to create breadcrumb
links.

The resulting HTML will be copied to C<$target_dir>, preserving the relative
location of the input file, for example:

    "src/pmc/default.pmc" => "$target_dir/src/pmc/default.pmc"

=cut

sub transform_input {
    my ($input, $parent, $parent_title) = @_;

    if (! -f $input) {
        die "$input not found or not a regular file\n" .
            "You might need to restrict your glob specification.";
    }

    my $formatter = Parrot::Docs::PodToHtml->new();
    $formatter->set_parent($parent, $parent_title);

    # Errata is currently noisy; e.g. complains about U<> even after
    # formatting it as expected. skip it until we can properly quiet it down.
    $formatter->no_errata_section(1);

    my $outfile = File::Spec->catfile($target_dir, $input) . '.html';

    # replace make_path with legacy functional mkpath to accommodate older versions of Perl
    my $dir = File::Path::mkpath(File::Basename::dirname($outfile));

    open my $out_fh, '>', $outfile;

    $formatter->output_fh($out_fh);
    $formatter->parse_file($input);
    warn "$input generated no HTML output\n"
        unless $formatter->content_seen;

    my $page_title = $formatter->return_page_title();

    $file_titles{$input} = $page_title;
}

=back

=cut

# Local Variables:
#   mode: cperl
#   cperl-indent-level: 4
#   fill-column: 100
# End:
# vim: expandtab shiftwidth=4: