Permalink
Browse files

initial digpicz import

  • Loading branch information...
0 parents commit 467231d5030787a154fc7259540042b8c87b2ea2 @pkrumins committed Nov 29, 2009
Showing with 6,938 additions and 0 deletions.
  1. +33 −0 bin/readme.txt
  2. +16 −0 compiled.entries/readme.txt
  3. +16 −0 compiled.pages/readme.txt
  4. BIN db/media.db
  5. +23 −0 db/readme.txt
  6. +17 −0 locks/readme.txt
  7. +46 −0 readme.txt
  8. +175 −0 scripts/ImageFinder.pm
  9. +339 −0 scripts/NetPbm.pm
  10. +349 −0 scripts/ThumbExtractor.pm
  11. +206 −0 scripts/ThumbMaker.pm
  12. +146 −0 scripts/db_inserter.pl
  13. +228 −0 scripts/digg_extractor.pl
  14. +961 −0 scripts/page_gen.pl
  15. +302 −0 scripts/reddit_extractor.pl
  16. +61 −0 scripts/test_insert_entries.pl
  17. +35 −0 templates/footer.html
  18. +42 −0 templates/header.html
  19. +15 −0 templates/index.html
  20. +25 −0 templates/index_entry.html
  21. +9 −0 templates/link.html
  22. +25 −0 templates/link_entry.html
  23. +5 −0 templates/navigation.html
  24. +38 −0 templates/sidebar.html
  25. +17 −0 tmp.www/readme.txt
  26. BIN www/allpics.gif
  27. BIN www/bookmark.gif
  28. BIN www/catonmat.gif
  29. BIN www/comment.gif
  30. BIN www/email.gif
  31. BIN www/favicon.ico
  32. BIN www/icons/delicious.gif
  33. BIN www/icons/feed.gif
  34. BIN www/icons/local.gif
  35. +33 −0 www/icons/master/bin/readme.txt
  36. +16 −0 www/icons/master/compiled.entries/readme.txt
  37. +16 −0 www/icons/master/compiled.pages/readme.txt
  38. BIN www/icons/master/db/media.db
  39. +23 −0 www/icons/master/db/readme.txt
  40. +17 −0 www/icons/master/locks/readme.txt
  41. +46 −0 www/icons/master/readme.txt
  42. +175 −0 www/icons/master/scripts/ImageFinder.pm
  43. +339 −0 www/icons/master/scripts/NetPbm.pm
  44. +349 −0 www/icons/master/scripts/ThumbExtractor.pm
  45. +206 −0 www/icons/master/scripts/ThumbMaker.pm
  46. +146 −0 www/icons/master/scripts/db_inserter.pl
  47. +228 −0 www/icons/master/scripts/digg_extractor.pl
  48. +961 −0 www/icons/master/scripts/page_gen.pl
  49. +302 −0 www/icons/master/scripts/reddit_extractor.pl
  50. +61 −0 www/icons/master/scripts/test_insert_entries.pl
  51. +35 −0 www/icons/master/templates/footer.html
  52. +42 −0 www/icons/master/templates/header.html
  53. +15 −0 www/icons/master/templates/index.html
  54. +25 −0 www/icons/master/templates/index_entry.html
  55. +9 −0 www/icons/master/templates/link.html
  56. +25 −0 www/icons/master/templates/link_entry.html
  57. +5 −0 www/icons/master/templates/navigation.html
  58. +38 −0 www/icons/master/templates/sidebar.html
  59. +17 −0 www/icons/master/tmp.www/readme.txt
  60. BIN www/icons/master/www/allpics.gif
  61. BIN www/icons/master/www/bookmark.gif
  62. BIN www/icons/master/www/catonmat.gif
  63. BIN www/icons/master/www/comment.gif
  64. BIN www/icons/master/www/email.gif
  65. BIN www/icons/master/www/favicon.ico
  66. BIN www/icons/master/www/icons/delicious.gif
  67. BIN www/icons/master/www/icons/feed.gif
  68. BIN www/icons/master/www/icons/local.gif
  69. BIN www/icons/master/www/icons/picture-big.gif
  70. BIN www/icons/master/www/icons/picture.gif
  71. BIN www/icons/master/www/icons/pictures-big.gif
  72. BIN www/icons/master/www/icons/pictures.gif
  73. BIN www/icons/master/www/icons/video-big.gif
  74. BIN www/icons/master/www/icons/video.gif
  75. BIN www/icons/master/www/icons/videos-big.gif
  76. BIN www/icons/master/www/icons/videos.gif
  77. +17 −0 www/icons/master/www/image.cache/readme.txt
  78. BIN www/icons/master/www/link-line.gif
  79. BIN www/icons/master/www/logo-cat.gif
  80. BIN www/icons/master/www/logo.gif
  81. +20 −0 www/icons/master/www/readme.txt
  82. +303 −0 www/icons/master/www/style.css
  83. BIN www/icons/master/www/subscribe.gif
  84. BIN www/icons/master/www/time.gif
  85. BIN www/icons/picture-big.gif
  86. BIN www/icons/picture.gif
  87. BIN www/icons/pictures-big.gif
  88. BIN www/icons/pictures.gif
  89. BIN www/icons/video-big.gif
  90. BIN www/icons/video.gif
  91. BIN www/icons/videos-big.gif
  92. BIN www/icons/videos.gif
  93. +17 −0 www/image.cache/readme.txt
  94. BIN www/link-line.gif
  95. BIN www/logo-cat.gif
  96. BIN www/logo.gif
  97. +20 −0 www/readme.txt
  98. +303 −0 www/style.css
  99. BIN www/subscribe.gif
  100. BIN www/time.gif
@@ -0,0 +1,33 @@
+(c) Peteris Krumins (peter@catonmat.net), 2007.
+http://www.catonmat.net - good coders code, great reuse
+
+digg's missing picture section website generator:
+http://digpicz.com
+-----------------------------------------------------------------
+/bin
+
+This directory should contain the following executables from
+Netpbm tools:
+
+pamfile
+ppmmake
+pamflip
+pnmcat
+pamscale
+jpegtopnm
+giftopnm
+pngtopnm
+pnmtojpeg
+pamcut
+
+These tools are used by /scripts/Netpbm.pm module which in
+turn is used by /scripts/ThumbMaker.pm module which creates
+small thumbnails for media links on digpicz website.
+
+Compile these tools yourself.
+
+They can be downloaded at:
+http://netpbm.sourceforge.net/
+
+-----------------------------------------------------------------
+http://www.catonmat.net/blog/designing-digg-picture-website
@@ -0,0 +1,16 @@
+(c) Peteris Krumins (peter@catonmat.net), 2007.
+http://www.catonmat.net - good coders code, great reuse
+
+digg's missing picture section website generator:
+http://digpicz.com
+-----------------------------------------------------------------
+/compiled.entries
+
+This directory should contains the cached (compiled) HTML
+templates for each reddit link.
+
+The entries get cached to minimize page regeneration.
+
+-----------------------------------------------------------------
+http://www.catonmat.net/blog/designing-digg-picture-website
+
@@ -0,0 +1,16 @@
+(c) Peteris Krumins (peter@catonmat.net), 2007.
+http://www.catonmat.net - good coders code, great reuse
+
+digg's missing picture section website generator:
+http://digpicz.com
+-----------------------------------------------------------------
+/compiled.pages
+
+This directory should contains the cached (compiled) HTML
+templates for each reddit link.
+
+The entries get cached to minimize page regeneration.
+
+-----------------------------------------------------------------
+http://www.catonmat.net/blog/designing-digg-picture-website
+
Binary file not shown.
@@ -0,0 +1,23 @@
+(c) Peteris Krumins (peter@catonmat.net), 2007.
+http://www.catonmat.net - good coders code, great reuse
+
+digg's missing picture section website generator
+http://digpicz.com
+-----------------------------------------------------------------
+/db
+
+This directory contains the sqlite database with digg's stories
+linking to pictures
+
+I use SQLite Database Browser GUI tool for quickly viewing and
+editing the database.
+
+It is available at:
+http://sqlitebrowser.sourceforge.net/
+
+I included database with 12 entries so you got the idea how
+it looked.
+
+-----------------------------------------------------------------
+http://www.catonmat.net/blog/designing-digg-picture-website
+
@@ -0,0 +1,17 @@
+(c) Peteris Krumins (peter@catonmat.net), 2007.
+http://www.catonmat.net - good coders code, great reuse
+
+digg's missing picture section website generator:
+http://digpicz.com
+-----------------------------------------------------------------
+/locks
+
+This directory contains exclusive locks created by the scripts to
+make sure two copies of the same script do not get executed at
+the same time.
+
+Actually the only script creating a lock file is page_gen.pl ;)
+
+-----------------------------------------------------------------
+http://www.catonmat.net/blog/designing-digg-picture-website
+
@@ -0,0 +1,46 @@
+This is the digpicz.com website that I created back in 2007. It got massive
+attention back then because Digg didn't have picture section then.
+
+One day I had to close it becuase I got an email from Digg's lawyers, which
+said that I was abusing Digg's trademarks.
+
+The site was completely static and was generated by a bunch of Perl scripts.
+
+Please visit http://digg.picurls.com/ to see how it looked. I saved a
+screenshot for history.
+
+Also see these two articles on how it was created and how it received 100,000
+visitors in the first day (it got massively popular!):
+
+ http://www.catonmat.net/blog/designing-digg-picture-website/
+ http://www.catonmat.net/blog/few-words-about-digpicz-dot-com/
+
+Digpicz.com was created by Peteris Krumins (peter@catonmat.net).
+His blog is at http://www.catonmat.net - good coders code, great reuse
+
+------------------------------------------------------------------------------
+
+/
+
+This is a complete package which generates digpicz.com website.
+
+Each directory except /templates and /scripts contains a readme
+with basic description of what the dir was made for.
+
+/templates directory contains HTML template fragments which
+Perl's Template::Toolkit puts together to create pages seen on
+http://digpicz.com
+
+/scripts directory contains all the Perl scripts and modules
+which do all the generation and data mining together.
+
+See the article on how it was designed for much more detailed information:
+
+ http://www.catonmat.net/blog/designing-digg-picture-website/
+
+------------------------------------------------------------------------------
+
+Sincerely,
+Peteris Krumins
+http://www.catonmat.net
+
@@ -0,0 +1,175 @@
+#!/usr/bin/perl
+#
+# Copyright (C) 2007 Peteris Krumins (peter@catonmat.net)
+# http://www.catonmat.net - good coders code, great reuse
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+package ImageFinder;
+
+#
+# This package was written as a part of "reddit media: intelligent fun online"
+# website generator.
+# This website can be viewed here: http://redditmedia.com
+#
+# See http://www.catonmat.net/designing-reddit-media-website for more info.
+#
+
+use warnings;
+use strict;
+
+#
+# This module find "best" image on a web page.
+# Since this package was written for purpose of redditmedia.com website,
+# the "best" means a picture which is most likely to be posted on the site
+# for others to enjoy.
+#
+# People enjoy big pictures, so this package finds the image on a webpage which
+# has biggest area (width * height).
+#
+
+use File::Temp 'mktemp';
+use LWP::UserAgent;
+use HTML::TreeBuilder;
+use URI;
+
+use NetPbm;
+
+sub new {
+ my $this = shift;
+ my $class = ref($this) || $this;
+ my %args = @_;
+
+ my $self;
+ $self->{netpbm} = NetPbm->new(netpbm => $args{'netpbm'});
+ $self->{ua} = LWP::UserAgent->new(
+ agent => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) Gecko/20070515 Firefox/2.0.0.4',
+ timeout => 5
+ );
+
+ bless $self, $this;
+}
+
+#
+# find_best_image
+#
+# Given a URL address to a website, the function gets all the images on the page
+# and figures out which one is the best.
+#
+sub find_best_image {
+ my ($self, $url) = @_;
+
+ my $content = $self->_get_page($url);
+ return undef unless defined $content;
+
+ my $tree = HTML::TreeBuilder->new;
+ $tree->parse($content);
+
+ # find all img tags
+ my @imgs = $tree->look_down(_tag => 'img');
+ unless (@imgs) {
+ $tree->delete;
+ return undef
+ }
+
+ # download all images
+ my @downloaded_images;
+ foreach my $img (@imgs) {
+ my $src = $img->attr('src'); # could be relative path, fix
+ next unless $src;
+
+ my $abs_src = URI->new_abs($src, $url)->as_string;
+
+ my $tmp_file = $self->_get_temp_file;
+ my $resp = $self->{ua}->get($abs_src, ":content_file" => $tmp_file);
+ next unless $resp->is_success;
+
+ unless (-s $tmp_file) { # skip empty files
+ unlink $tmp_file;
+ next;
+ }
+ push @downloaded_images, $tmp_file;
+ }
+
+ return undef unless @downloaded_images; # huh, no images?
+
+ return $self->_biggest_image(@downloaded_images);
+}
+
+#
+# _biggest_image
+#
+# Given a list of images, finds the biggest image (width * height maximum)
+#
+sub _biggest_image {
+ my ($self, @images) = @_;
+
+ my $netpbm = $self->{netpbm};
+ # convert all images to PNM format
+ my @pnms;
+ foreach (@images) {
+ my $pnm_file = $netpbm->img2pnm($_);
+ unlink $_;
+ if ($netpbm->is_error) {
+ print STDERR $netpbm->get_error, "\n";
+ $netpbm->clear_error;
+ next;
+ }
+ push @pnms, $pnm_file;
+ }
+
+ my @img_infos;
+ foreach (@pnms) {
+ my %info = $netpbm->get_img_info($_);
+ if ($netpbm->is_error) {
+ print STDERR $netpbm->get_error, "\n";
+ $netpbm->clear_error;
+ next;
+ }
+ push @img_infos, {
+ info => \%info,
+ path => $_
+ };
+ }
+
+ my @sorted_by_area = sort {
+ $b->{info}{width} * $b->{info}{height} <=> $a->{info}{width} * $a->{info}{height}
+ } @img_infos;
+
+ unlink $_->{path} foreach @sorted_by_area[1..$#sorted_by_area];
+
+ return $sorted_by_area[0]->{path};
+}
+
+sub _get_page {
+ my ($self, $url) = @_;
+ my $resp = $self->{ua}->get($url);
+
+ if ($resp->is_success) {
+ return $resp->content;
+ }
+ return undef;
+}
+
+#
+# _get_temp_file
+#
+# Creates and returns the path to a new temporary file
+#
+sub _get_temp_file {
+ return mktemp("/tmp/imageIFXXXXXXXX");
+}
+
+1;
Oops, something went wrong.

0 comments on commit 467231d

Please sign in to comment.