Permalink
Browse files

various updates

  • Loading branch information...
masak committed Jan 2, 2011
1 parent 89a5e29 commit 42120bd9bdeffc32e44d32414bf078a823f124bf
Showing with 176 additions and 53 deletions.
  1. +162 −39 create-new-structure
  2. +1 −1 posts/37976
  3. +1 −1 posts/38212
  4. +6 −6 posts/39716
  5. +1 −1 posts/39872
  6. +1 −1 posts/40107
  7. +1 −1 posts/40361
  8. +3 −3 posts/40516
View
@@ -2,17 +2,19 @@
use 5.010;
use strict;
use YAML;
+use Encode;
-sub titlify {
+my %old2new;
+
+sub urlify {
my ($old_title) = @_;
my $title = $old_title;
$title = lc($title);
- $title =~ s/[,:'\-!"?#;]//g;
- $title =~ s/\.(?!\w)//g;
+ $title =~ s/[,:'\-!"?#;.]//g;
$title =~ s/ +/-/g;
$title =~ s/\[|\]|\(|\)//g;
die $old_title
- if $title !~ /^[\w\-.]+$/;
+ if $title !~ /^[\w\-]+$/;
return $title;
}
@@ -39,6 +41,7 @@ sub markdownify {
if ($stack[-1]->{'children'}[-1]{'name'} eq 'nobr') {
# kill with fire
pop @{$stack[-1]->{'children'}};
+ push @{$stack[-1]->{'children'}}, ' ';
}
}
else {
@@ -65,12 +68,12 @@ sub markdownify {
if ($content ne '') {
push @{$dom->{'children'}}, $content;
}
- my $markdown = handle_divs($dom->{'children'});
+ my $markdown = handle_divs($dom->{'children'}, $filename);
return $markdown;
}
sub handle_divs {
- my ($childrenref) = @_;
+ my ($childrenref, $filename) = @_;
my @children = @{$childrenref};
my $result = '';
if (@children && !ref $children[0]) {
@@ -84,6 +87,19 @@ sub handle_divs {
&& $children[0]->{'name'} ne 'blockquote';
unshift @children, { name => 'p', children => [@really_p] };
}
+ for my $i (1..@children-1) {
+ my ($prevnode, $node) = @children[$i - 1, $i];
+ if (ref($prevnode) && $prevnode->{'name'} eq 'div'
+ && ref($node) && $node->{'name'} eq 'div'
+ && $prevnode->{'attrs'}{'class'} eq 'quote'
+ && $node->{'attrs'}{'class'} eq 'quote') {
+
+ unshift @{$children[$i]->{'children'}},
+ @{$children[$i-1]->{'children'}};
+ $children[$i-1] = '[[GONE]]';
+ }
+ }
+ @children = grep { $_ ne '[[GONE]]' } @children;
for my $node (@children) {
if (ref $node eq 'HASH') {
given ($node->{'name'}) {
@@ -94,7 +110,9 @@ sub handle_divs {
if (@p_children == 1
&& ref($p_children[0]) eq 'HASH'
&& ($p_children[0]->{'name'} eq 'strong'
- || $p_children[0]->{'name'} eq 'b')) {
+ || $p_children[0]->{'name'} eq 'b')
+ && $p_children[0]->{'children'}[0] !~ /Update/
+ && $filename ne 'posts/40167') {
$result .= '## '
. handle_spans($p_children[0]->{'children'})
. "\n\n";
@@ -103,32 +121,51 @@ sub handle_divs {
&& ref($p_children[0]) eq 'HASH'
&& ($p_children[0]->{'name'} eq 'code'
|| $p_children[0]->{'name'} eq 'tt')) {
- my @fragments = @{$p_children[0]->{'children'}};
- my $line = '';
- my @lines;
- for my $fragment (@fragments) {
- if (!ref $fragment) {
- $fragment =~ s/ / /g;
- $fragment =~ s/&lt;/</g;
- $fragment =~ s/&gt;/>/g;
- $line .= $fragment;
+ if (grep { ref($_) && ($_->{'name'} eq 'a'
+ || $_->{'name'} eq 'strong'
+ || $_->{'name'} eq 'b')}
+ @{$p_children[0]->{'children'}}) {
+ $p_children[0]->{'name'} = 'code';
+ $p_children[0]->{'children'}
+ = [grep { !ref($_) || $_->{'name'} ne 'br' }
+ @{$p_children[0]->{'children'}}];
+ $p_children[0]->{'children'}[0]
+ =~ s/^\n//;
+ $result .= '<pre>';
+ $result .= handle_verbatim($p_children[0]);
+ $result .= '</pre>';
+ $result .= "\n\n";
+ }
+ else {
+ my @fragments = @{$p_children[0]->{'children'}};
+ my $line = '';
+ my @lines;
+ for my $fragment (@fragments) {
+ if (!ref $fragment) {
+ $fragment =~ s/&nbsp;/ /g;
+ $fragment =~ s/&lt;/</g;
+ $fragment =~ s/&gt;/>/g;
+ $fragment =~ s/&amp;/&/g;
+ $fragment =~ s/&#(\d+);/chr($1)/ge;
+ $line .= $fragment;
+ }
+ elsif ($fragment->{'name'} eq 'br') {
+ $line =~ s/^\s*\n//;
+ push @lines, $line;
+ $line = '';
+ }
+ elsif ($fragment->{'name'} eq 'a') {
+ $line .= handle_spans([$fragment]);
+ }
}
- elsif ($fragment->{'name'} eq 'br') {
+ if ($line ne '') {
$line =~ s/^\s*\n//;
push @lines, $line;
- $line = '';
- }
- elsif ($fragment->{'name'} eq 'a') {
- $line .= handle_spans([$fragment]);
}
+ $result .= " $_\n"
+ for @lines;
+ $result .= "\n";
}
- if ($line ne '') {
- $line =~ s/^\s*\n//;
- push @lines, $line;
- }
- $result .= " $_\n"
- for @lines;
- $result .= "\n";
}
elsif (@p_children == 1
&& ref($p_children[0]) eq 'HASH'
@@ -140,6 +177,12 @@ sub handle_divs {
$result .= handle_spans($p_children[0]->{'children'});
}
}
+ elsif (@p_children == 1
+ && $p_children[0] eq '&#10086;') {
+ $node->{'attrs'}{'class'} = 'separator';
+ $result .= handle_verbatim($node)
+ . "\n\n";
+ }
else {
$result .= handle_spans(\@p_children)
. "\n\n";
@@ -205,10 +248,10 @@ sub handle_divs {
}
}
elsif ($elem->{'name'} eq 'dt') {
- $result .= "<dt>" . handle_spans($elem->{'children'}) . "</dt>";
+ $result .= handle_verbatim($elem);
}
elsif ($elem->{'name'} eq 'dd') {
- $result .= "<dd>" . handle_spans($elem->{'children'}) . "</dd>";
+ $result .= handle_verbatim($elem);
}
else {
die "Unknown ", $elem->{'name'}, " in dl";
@@ -218,10 +261,29 @@ sub handle_divs {
$result .= "</dl>\n\n";
}
when ('div') {
- $result .= handle_divs($node->{'children'});
+ if (exists $node->{'attrs'}{'class'}
+ && $node->{'attrs'}{'class'} eq 'quote') {
+
+ $result .= handle_verbatim($node);
+ $result .= "\n\n";
+ }
+ else {
+ $result .= handle_divs($node->{'children'});
+ }
}
when ('blockquote') {
- $result .= handle_divs($node->{'children'});
+ if (@{$node->{'children'}} > 1
+ && $node->{'children'}[0]{'name'} eq 'div'
+ && @{$node->{'children'}[0]{'children'}} == 1
+ && $node->{'children'}[0]{'children'}[0]{'name'} eq 'p'
+ && @{$node->{'children'}[0]{'children'}[0]{'children'}} > 1
+ && $node->{'children'}[0]{'children'}[0]{'children'}[1]{'name'} eq 'tt') {
+ $result .= handle_divs($node->{'children'}[0]{'children'});
+ }
+ else {
+ $result .= handle_verbatim($node);
+ $result .= "\n\n";
+ }
}
default {
die "Encountered a $_, don't know how to handle", Dump($childrenref);
@@ -245,7 +307,12 @@ sub handle_spans {
next if $node->{'name'} eq 'nobr';
given ($node->{'name'}) {
when ('a') {
- $result .= '[' . handle_spans($node->{'children'}) . '](' . $node->{'attrs'}{'href'} . ')';
+ my $href = $node->{'attrs'}{'href'};
+ if ($href =~ m[http://use.perl.org/~masak/journal/(\d+)]
+ && exists $old2new{$1}) {
+ $href = "http://strangelyconsistent.org/blog/" . $old2new{$1};
+ }
+ $result .= '[' . handle_spans($node->{'children'}) . '](' . $href . ')';
}
when (['em', 'i']) {
$result .= '*' . handle_spans($node->{'children'}) . '*';
@@ -254,9 +321,23 @@ sub handle_spans {
$result .= '**' . handle_spans($node->{'children'}) . '**';
}
when (['code', 'tt']) {
- $result .= '`'
- . handle_spans($node->{'children'})
- . '`';
+ if (@{$node->{'children'}} == 3
+ && ref $node->{'children'}[1] eq 'HASH'
+ && $node->{'children'}[1]{'name'} eq 'a') {
+ # Special case for inside-out <a>/<code>
+ $result .= handle_spans([
+ {
+ name => 'a',
+ attrs => { href => $node->{'children'}[1]{'attrs'}{'href'} },
+ children => $node->{'children'}[1]{'children'}
+ }
+ ]);
+ }
+ else {
+ $result .= '`'
+ . handle_spans($node->{'children'})
+ . '`';
+ }
}
when ('br') {
$result .= "<br>\n";
@@ -274,13 +355,48 @@ sub handle_spans {
$text =~ s/&nbsp;/ /g;
$text =~ s/&lt;/</g;
$text =~ s/&gt;/>/g;
+ $text =~ s/&amp;/&/g;
+ $text =~ s/&#(\d+);/chr($1)/ge;
+ $text =~ s/fix:<([^>]+)>/fix:&lt;$1&gt;/g;
$result .= $text;
}
}
$result =~ s/\n+$//;
return $result;
}
+sub handle_verbatim {
+ my ($node) = @_;
+ my $name = $node->{'name'};
+ my %attrs = %{$node->{'attrs'}};
+ my $result = "<$name";
+ for my $attr (keys %attrs) {
+ next if $attr eq 'rel';
+ my $value = $attrs{$attr};
+ if (-1 == index($value, "'")) {
+ $result .= " $attr='$value'";
+ }
+ else {
+ $result .= qq[ $attr="$value"];
+ }
+ }
+ $result .= ">";
+ my @children = @{$node->{'children'}};
+ for my $child (@children) {
+ if (ref $child eq 'HASH') {
+ $result .= handle_verbatim($child);
+ }
+ else {
+ my $text = $child;
+ $text =~ s/_/\\_/g;
+ $text =~ s/\&nbsp;/ /g;
+ $result .= $text;
+ }
+ }
+ $result .= "</$name>";
+ return $result;
+}
+
my @months = qw<January February March April May June July
August September October November December>;
my %month_number_of = map { $months[$_] => $_ + 1 } 0..@months-1;
@@ -304,11 +420,15 @@ for my $filename (@filenames) {
my $minute = $2;
my $date = "$year-$month-$day_of_month";
- my $timestamp = $date . "T$hour:$minute CEST";
+ my $offset = $month > 3 && $month < 11 ? "+02:00" : "+01:00";
+ my $timestamp = $date . "T$hour:$minute:00$offset";
1 while shift(@lines) !~ /<h3>([^<]+)<\/h3>/;
my $title = $1;
- my $new_filename = 'new-structure/' . titlify($title) . '.markdown';
+ my $url = urlify($title);
+ $title =~ s/ -- / &#8212; /;
+ $old2new{substr($filename, index($filename, '/') + 1)} = $url;
+ my $new_filename = 'new-structure/' . $url . '.markdown';
1 while shift(@lines) !~ /<div class="intro">(.*)/;
my $content = $1;
@@ -326,11 +446,14 @@ for my $filename (@filenames) {
die "$new_filename already exists"
if !@ARGV && -e $new_filename;
+ my $author = decode("utf-8", "Carl Mäsak");
+
open my $OUTFILE, '>', $new_filename or die $!;
+ binmode $OUTFILE, ":utf8";
print $OUTFILE <<"EOF";
---
title: $title
-author: Carl Mäsak
+author: $author
created: $timestamp
---
$content
View
@@ -274,7 +274,7 @@
]
</div>
- <div class="intro"><p>131 years ago today, Thomas Edison demonstrated for the first time a device he called a "<a href="http://en.wikipedia.org/wiki/Phonograph" rel="nofollow">phonograph</a>", which plays back recorded sound. <a href="http://en.wikipedia.org/wiki/Phonograph#First_phonograph" rel="nofollow">Wikipedia</a>:</p><blockquote><div><p>Edison's early phonographs recorded onto a tinfoil sheet phonograph cylinder using an up-down ("hill-and-dale") motion of the stylus. The tinfoil sheet was wrapped around a grooved cylinder, and the sound was recorded as indentations into the foil. Edison's early patents show that he also considered the idea that sound could be recorded as a spiral onto a disc, but Edison concentrated his efforts on cylinders, since the groove on the outside of a rotating cylinder provides a constant velocity to the stylus in the groove, which Edison considered more "scientifically correct". Edison's patent specified that the audio recording be embossed, and it was not until 1886 that vertically modulated engraved recordings using wax coated cylinders were patented by Chichester Bell and Charles Sumner Tainter. They named their version the Graphophone. Emile Berliner patented his Gramophone in 1887.</p></div></blockquote><p>Dang, I kinda liked the sound of "graphophone". &#21704;&#21704;</p><p>&#10086;</p><p>Today, in order to reduce some of the <a href="http://github.com/viklund/november/tree/d58040420ba34561cf8213dfa96455cb5e7b5c7c/p6w/t/markup/mediawiki/07-italic-and-bold.t" rel="nofollow">repetitiveness</a> of the MediaWiki markup parser test suite (set input, set expected output, calculate actual output, test, rinse, repeat), I created the module <code> <a href="http://github.com/viklund/november/tree/mediawiki-markup/p6w/Test/InputOutput.pm" rel="nofollow">Test::InputOutput</a> </code>, which resembles CPAN's <code>Test::Base</code> a bit, minus the syntactic relief. Some of the MediaWiki tests now look <a href="http://github.com/viklund/november/tree/f882a653455e6eb3370f4c997fac6b52d02a2726/p6w/t/markup/mediawiki/07-italic-and-bold.t" rel="nofollow">like this</a> instead. Typical example of separating the algorithm from the specifics.</p><p>Since that wasn't what I set out to do today, I'm going to stop here, and do that instead. I want to continue passing tests concerning italic and bold.</p><p>Also, ihrd++ merged his dispatch branch today into master, so I'll review that merge.</p><p>In short, if you haven't downloaded November yet, you should. If nothing else, there's a lot of working Perl 6 code to look at. It's not even hard: look, <a href="http://github.com/viklund/november/" rel="nofollow">a link</a>! The kind folks over at github even provide a <a href="http://github.com/viklund/november/zipball/master" rel="nofollow">zipball</a> and a <a href="http://github.com/viklund/november/tarball/master" rel="nofollow">tarball</a> for those who don't have git yet. That's what I call service.</p><p>Enjoy!</p></div>
+ <div class="intro"><p>131 years ago today, Thomas Edison demonstrated for the first time a device he called a "<a href="http://en.wikipedia.org/wiki/Phonograph" rel="nofollow">phonograph</a>", which plays back recorded sound. <a href="http://en.wikipedia.org/wiki/Phonograph#First_phonograph" rel="nofollow">Wikipedia</a>:</p><blockquote><div><p>Edison's early phonographs recorded onto a tinfoil sheet phonograph cylinder using an up-down ("hill-and-dale") motion of the stylus. The tinfoil sheet was wrapped around a grooved cylinder, and the sound was recorded as indentations into the foil. Edison's early patents show that he also considered the idea that sound could be recorded as a spiral onto a disc, but Edison concentrated his efforts on cylinders, since the groove on the outside of a rotating cylinder provides a constant velocity to the stylus in the groove, which Edison considered more "scientifically correct". Edison's patent specified that the audio recording be embossed, and it was not until 1886 that vertically modulated engraved recordings using wax coated cylinders were patented by Chichester Bell and Charles Sumner Tainter. They named their version the Graphophone. Emile Berliner patented his Gramophone in 1887.</p></div></blockquote><p>Dang, I kinda liked the sound of "graphophone". &#21704;&#21704;</p><p>&#10086;</p><p>Today, in order to reduce some of the <a href="http://github.com/viklund/november/tree/d58040420ba34561cf8213dfa96455cb5e7b5c7c/p6w/t/markup/mediawiki/07-italic-and-bold.t" rel="nofollow">repetitiveness</a> of the MediaWiki markup parser test suite (set input, set expected output, calculate actual output, test, rinse, repeat), I created the module <a href="http://github.com/viklund/november/tree/mediawiki-markup/p6w/Test/InputOutput.pm" rel="nofollow"><code>Test::InputOutput</code></a>, which resembles CPAN's <code>Test::Base</code> a bit, minus the syntactic relief. Some of the MediaWiki tests now look <a href="http://github.com/viklund/november/tree/f882a653455e6eb3370f4c997fac6b52d02a2726/p6w/t/markup/mediawiki/07-italic-and-bold.t" rel="nofollow">like this</a> instead. Typical example of separating the algorithm from the specifics.</p><p>Since that wasn't what I set out to do today, I'm going to stop here, and do that instead. I want to continue passing tests concerning italic and bold.</p><p>Also, ihrd++ merged his dispatch branch today into master, so I'll review that merge.</p><p>In short, if you haven't downloaded November yet, you should. If nothing else, there's a lot of working Perl 6 code to look at. It's not even hard: look, <a href="http://github.com/viklund/november/" rel="nofollow">a link</a>! The kind folks over at github even provide a <a href="http://github.com/viklund/november/zipball/master" rel="nofollow">zipball</a> and a <a href="http://github.com/viklund/november/tarball/master" rel="nofollow">tarball</a> for those who don't have git yet. That's what I call service.</p><p>Enjoy!</p></div>
Oops, something went wrong.

0 comments on commit 42120bd

Please sign in to comment.