Browse files

Initial commit

  • Loading branch information...
0 parents commit 1434a9cbc2b071392164781aac65184755336bdc @mizzy committed Jan 10, 2012
Showing with 352 additions and 0 deletions.
  1. +2 −0 .gitignore
  2. +27 −0 index.tx
  3. +52 −0 misc/getting_real_to_json.pl
  4. +69 −0 misc/sicp_to_json.pl
  5. +25 −0 ncx.tx
  6. +48 −0 opf.tx
  7. +129 −0 webiblo.pl
2 .gitignore
@@ -0,0 +1,2 @@
+out
+tmp
27 index.tx
@@ -0,0 +1,27 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<title>[% title %]</title>
+</head>
+<body>
+<h1>[% title %]</h1>
+
+<ul>
+ [% FOREACH chapter IN chapters -%]
+ <li><a href="[% chapter.href %]">[% chapter.title %]</a></li>
+ <ul>
+ [% FOREACH section IN chapter.sections -%]
+ <li><a href="[% section.href %]">[% section.title %]</li>
+ <ul>
+ [% FOREACH subsection IN section.subsections -%]
+ <li><a href="[% subsection.href %]">[% subsection.title %]</li>
+ [% END %]
+ </ul>
+ [% END -%]
+ </ul>
+ [% END -%]
+</ul>
+
+</body>
+</html>
52 misc/getting_real_to_json.pl
@@ -0,0 +1,52 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use JSON::XS;
+use LWP::Simple;
+use HTML::TreeBuilder::XPath;
+use YAML;
+
+
+my $base = 'http://gettingreal.37signals.com';
+mirror("$base/toc.php", 'toc.php') unless -f 'toc.php';
+my $tree = HTML::TreeBuilder::XPath->new;
+$tree->parse_file('toc.php');
+
+my $chapters = [];
+for my $chapter ( $tree->findnodes('//h2') ) {
+ my @contents = $chapter->content_list;
+ my $name = $contents[0]->attr('name');
+ next if !$name or $name !~ /^ch\d+/;
+ my $title = $contents[1];
+ $title =~ s/\s+$//;
+
+ my $sections = [];
+ for my $section ( $chapter->right->findnodes('li/a') ) {
+ push @$sections, {
+ uri => $base . '/' . $section->attr('href'),
+ title => $section->as_text,
+ };
+ }
+
+ push @$chapters, {
+ title => $title,
+ sections => $sections,
+ };
+}
+
+my $json = JSON::XS->new;
+$json->indent(1);
+
+print $json->encode({
+ title => 'Getting Real',
+ authors => ['37signals'],
+ date => '2012/1/9',
+ chapters => $chapters,
+ content_xpath => q{//div[@class="content"]},
+ exclude_xpath => q{//div[@class="next"]},
+ cover_image => 'http://ec2.images-amazon.com/images/I/31jvYr2h6GL._SS500_.jpg',
+});
+
+exit;
69 misc/sicp_to_json.pl
@@ -0,0 +1,69 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use LWP::Simple;
+use HTML::TreeBuilder::XPath;
+use JSON;
+
+my $base = 'http://mitpress.mit.edu/sicp/full-text/book';
+
+my $book = {
+ title => 'Structure and Interpretation of Computer Programs',
+ authors => [ 'Harold Abelson', 'Gerald Jay Sussman', 'Julie Sussman' ],
+ cover_image => 'http://mitpress.mit.edu/sicp/full-text/book/cover.jpg',
+ exclude_xpath => q{//div[@class="navigation"]},
+};
+
+my $chapters = [];
+
+my $contents = get('http://mitpress.mit.edu/sicp/full-text/book/book-Z-H-4.html');
+
+my $tree = HTML::TreeBuilder::XPath->new;
+$tree->parse($contents);
+$tree->eof;
+
+for my $a ( $tree->findnodes('//a') ) {
+ my $name = $a->attr('name');
+ next if !$name or $name !~ /\%_toc/ or $name eq '%_toc_start';
+
+ my $text = $a->as_text;
+ $text =~ s/\240/ /g;
+ my $href = "$base/" . $a->attr('href');
+
+ if ( $name =~ /^\%_toc_\%_chap_Temp/ ) {
+ push @$chapters, {
+ title => $text,
+ uri => $href,
+ };
+ next;
+ }
+ # entering the chapter
+ elsif ( $name =~ /^\%_toc_\%_chap_\d$/ ) {
+ push @$chapters, {
+ title => $text,
+ uri => $href,
+ sections => [],
+ };
+ }
+ # entering the section
+ elsif ( $name =~ /\%_toc_\%_sec_\d\.\d$/ ) {
+ push @{ $chapters->[-1]->{sections} }, {
+ title => $text,
+ uri => $href,
+ subsections => [],
+ };
+ }
+ # entring the subsection
+ elsif ( $name =~ /\%_toc_\%_sec_\d\.\d\.\d$/ ) {
+ push @{ $chapters->[-1]->{sections}->[-1]->{subsections} }, {
+ title => $text,
+ uri => $href,
+ };
+ }
+}
+
+$book->{chapters} = $chapters;
+
+print to_json($book);
+
25 ncx.tx
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
+<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
+ <docTitle><text>[% title %]</docTitle>
+ <navMap>
+ <navPoint id="index" playOrder="0">
+ <navLabel><text>[% title %] - Table of Contents</text></navLabel><content src="index.html"/>
+ </navPoint>
+[% num = 1 -%]
+[% FOREACH chapter IN chapters -%]
+ [% IF chapter.href -%]
+ <navPoint id="item[% num %]" playOrder="[% num %]">
+ <navLabel><text>[% chapter.title %]</text></navLabel><content src="[% chapter.href %]"/>
+ </navPoint>
+ [% num = num + 1 -%]
+ [% END -%]
+ [% FOREACH section IN chapter.sections -%]
+ <navPoint id="item[% num %]" playOrder="[% num %]">
+ <navLabel><text>[% section.title %]</text></navLabel><content src="[% section.href %]"/>
+ </navPoint>
+ [% num = num + 1 -%]
+ [% END -%]
+[% END -%]
+ </navMap>
+</ncx>
48 opf.tx
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="utf-8"?>
+<package unique-identifier="uid">
+ <metadata>
+ <dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core"
+ xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/">
+ <dc:Title>[% title %]</dc:Title>
+ <dc:Language>en-us</dc:Language>
+ [% FOREACH author IN authors -%]
+ <dc:Creator>[% author %]</dc:Creator>
+ [% END -%]
+ <dc:Description>[% description %]</dc:Description>
+ <dc:Date>[% date %]</dc:Date>
+ </dc-metadata>
+ <x-metadata>
+ <output encoding="utf-8" content-type="text/x-oeb1-document"></output>
+ <EmbeddedCover>[% cover_file %]</EmbeddedCover>
+ </x-metadata>
+ </metadata>
+ <manifest>
+ <item id="index" media-type="text/x-oeb1-document" href="index.html"></item>
+ <item id="toc" media-type="application/x-dtbncx+xml" href="toc.ncx"></item>
+[% num = 1 -%]
+[% FOREACH chapter IN chapters -%]
+ [% IF chapter.file -%]
+ <item id="item[% num %]" media-type="text/x-oeb1-document" href="[% chapter.file %]"></item>
+ [% num = num + 1 -%]
+ [% END -%]
+ [% FOREACH section IN chapter.sections -%]
+ <item id="item[% num %]" media-type="text/x-oeb1-document" href="[% section.file %]"></item>
+ [% num = num + 1 -%]
+ [% END -%]
+[% END -%]
+ </manifest>
+ <spine toc="toc">
+[% num = 1 -%]
+[% FOREACH chapter IN chapters -%]
+ [% FOREACH section IN chapter.sections -%]
+ <itemref idref="item[% num %]" />
+ [% num = num + 1 -%]
+ [% END -%]
+[% END -%]
+ </spine>
+ <tours></tours>
+ <guide>
+ <reference type="toc" title="Table of Contents" href="index.html"></reference>
+ <reference type="start" title="Startup Page" href="[% chapters.0.sections.0.href %]"></reference>
+ </guide>
+</package>
129 webiblo.pl
@@ -0,0 +1,129 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use JSON::Syck;
+use LWP::Simple;
+use URI;
+use HTML::TreeBuilder::XPath;
+use Text::Xslate;
+use Image::Resize;
+
+my $style = HTML::Element->new('style');
+$style->attr('type', 'text/css');
+$style->push_content(<<STYLE);
+h1, h2, h3, h4, h5, h6, p, ul, ol, dl, pre, blockquote, table
+{margin-top:0.6em; text-indent:0em;}
+.font_size
+{font-size:x-large;}
+STYLE
+
+my $book = JSON::Syck::Load(do { local $/; <STDIN>});
+
+mkdir 'tmp' unless -d 'tmp';
+mkdir 'out' unless -d 'out';
+
+# Get cover image
+if ( $book->{cover_image} ) {
+ my $uri = URI->new($book->{cover_image});
+ my $file = ($uri->path_segments)[-1];
+ mirror($uri, "out/$file") unless -f "out/$file";
+ $book->{cover_file} = $file;
+ my $image = Image::Resize->new("out/$file");
+ my $gd = $image->resize(600, 800);
+ open my $out, '>', "out/$file" or die $!;
+ print $out $gd->jpeg;
+ close $out;
+}
+
+for my $chapter ( @{ $book->{chapters} } ) {
+ get_content($chapter);
+ for my $section ( @{ $chapter->{sections} } ) {
+ get_content($section);
+ for my $subsection ( @{ $section->{subsections} } ) {
+ get_content($subsection);
+ }
+ }
+}
+
+my $tx = Text::Xslate->new( syntax => 'TTerse' );
+
+warn "Writing index.html ...\n";
+open my $out, '>', 'out/index.html' or die $!;
+print $out $tx->render('index.tx', $book);
+close $out;
+
+warn "Writing toc.ncx ...\n";
+open $out, '>', 'out/toc.ncx' or die $!;
+print $out $tx->render('ncx.tx', $book);
+close $out;
+
+my $book_title = $book->{title};
+$book_title =~ s/\s/_/g;
+
+warn "Writing ${book_title}.opf ...\n";
+open $out, '>', "out/${book_title}.opf" or die $!;
+print $out $tx->render('opf.tx', $book);
+close $out;
+
+warn "Executing kindlegen ...\n";
+`kindlegen out/${book_title}.opf`;
+
+exit;
+
+sub get_content {
+ my $object = shift;
+ return if !$object->{uri};
+
+ warn "Getting $object->{title} ...\n";
+
+ my $uri = URI->new($object->{uri});
+ my $file = ($uri->path_segments)[-1];
+ my $fragment = $uri->fragment;
+
+ mirror($uri, "tmp/$file") unless -f "tmp/$file";
+
+ my $tree = HTML::TreeBuilder::XPath->new;
+ $tree->parse_file("tmp/$file");
+
+ if ( $book->{content_xpath} ) {
+ my $content = ($tree->findnodes($book->{content_xpath}))[0];
+ $tree = HTML::TreeBuilder::XPath->new;
+ $tree->parse($content->as_XML);
+ $tree->eof;
+ }
+
+ if ( $book->{exclude_xpath} ) {
+ my @excludes = ($tree->findnodes($book->{exclude_xpath}));
+ for my $exclude ( @excludes ) {
+ $exclude->detach;
+ }
+ }
+
+ my $head = ($tree->findnodes('/html/head'))[0];
+ $head->push_content($style);
+
+ my @images = $tree->findnodes('//img');
+ for my $image ( @images ) {
+ my $base = $uri->as_string;
+ $base =~ s{/[^/]+$}{};
+ get_image(URI->new("$base/" . $image->attr('src')));
+ }
+
+ $file =~ s/\..+/.html/ unless $file =~ /\.html$/;
+
+ open my $out, '>', "out/$file" or die $!;
+ print $out $tree->as_XML;
+ close $out;
+
+ $object->{file} = $file;
+ $file .= "#$fragment" if $fragment;
+ $object->{href} = $file;
+}
+
+sub get_image {
+ my $uri = shift;
+ warn "Getting $uri ...\n";
+ my $file = ($uri->path_segments)[-1];
+ mirror($uri, "out/$file") unless -f "out/$file";
+}

0 comments on commit 1434a9c

Please sign in to comment.