Permalink
Browse files

Seesaw search support

Not yet part of the main search; needs explicit query starting with
'seesaw'. Still needs some improvement as Seesaw seem to OR search terms
so it often returns too many results. (Well arguably seesaw don't really
have a search feature, it's just using their lame JavaScript
'suggestion' thing.)
  • Loading branch information...
1 parent b64fd29 commit 4754d90687522bc07ccd2226a42fd45a23c7d80d @monsieurvideo monsieurvideo committed Apr 5, 2010
Showing with 117 additions and 13 deletions.
  1. +105 −0 FlashVideo/Site/Seesaw.pm
  2. +2 −12 FlashVideo/Site/Wat.pm
  3. +10 −1 FlashVideo/Utils.pm
View
@@ -4,6 +4,7 @@ package FlashVideo::Site::Seesaw;
use strict;
use FlashVideo::Utils;
use HTML::Entities qw(decode_entities);
+use URI::Escape qw(uri_escape);
my @res = (
{ name => "lowResUrl", resolution => [ 512, 288 ] },
@@ -54,4 +55,108 @@ sub find_video {
}
}
+sub search {
+ my($self, $search, $type) = @_;
+
+ my $episode = $search =~ s/episode (\d+)// ? $1 : "";
+ my $series = $search =~ s/series (\d+)// ? $1 : "";
+
+ my $browser = FlashVideo::Mechanize->new;
+
+ _update_with_content($browser,
+ "http://www.seesaw.com/start.layout.searchsuggest:inputtextevent?search="
+ . uri_escape($search));
+
+ # Find links to programmes
+ my @urls = map {
+ chomp(my $name = $_->text);
+ { name => $name, url => $_->url_abs->as_string }
+ } $browser->find_all_links(text_regex => qr/.+/);
+
+ if(@urls == 1) {
+ $browser->get($urls[0]->{url});
+ # We are now at the episode page.
+ my $main_title = ($browser->content =~ /<h1>\s*(?:<!--.*?-->\s*)?(.*?)\n/)[0];
+
+ # Parse the list of series
+ my $cur_series = ($browser->content =~ /<li class="current">.*?>\w+ (\d+)/i)[0];
+
+ my %series = reverse(
+ ($browser->content =~ m{<ul class="seriesList">(.*?)</ul>}i)[0]
+ =~ /<li.*?href="\?([^"]+)".*?>\s*(?:series\s*)?([^<]+)/gi);
+
+ # Go to the correct series
+ my $episode_list;
+ if($series && $cur_series ne $series) {
+ if(!$series{$series}) {
+ error "No such series number ($series).";
+ return;
+ }
+ _update_with_content($browser, $series{$series});
+ $episode_list = $browser->content;
+ $cur_series = $series;
+
+ } elsif(!$series) {
+ my @series = map { s/series\s+//i; $_ } keys %series;
+ info "Viewing series $cur_series; series " . join(", ", @series) . " also available.";
+ info "Search for 'seesaw $main_title series $series[0]' to view a specific series.";
+ }
+
+ if(!$episode_list) {
+ # Grab the episodes for the current series from the page
+ $episode_list = ($browser->content
+ =~ m{<table id="episodeListTble">(.*?)</table>}is)[0];
+ }
+
+ # Parse list of episodes
+ @urls = ();
+ for my $episode_html($episode_list =~ m{<tr.*?</tr>}gis) {
+ # Each table row here
+ my %info;
+ for(qw(number date title action)) {
+ my $class = "episode" . ucfirst;
+ $episode_html =~ m{<td class=['"]$class['"]>(.*?)</td>}gis
+ && ($info{$_} = $1);
+ }
+
+ $info{number} = ($info{number} =~ /ep\.?\w*\s*(\d+)/i)[0];
+ $info{date} = ($info{date} =~ />(\w+[^<]+)/)[0];
+ $info{title} = ($info{title} =~ />\s*([^<].*?)\s*</s)[0];
+ $info{url} = ($info{action} =~ /href=['"]([^'"]+)/)[0];
+
+ my $title = join " - ", $main_title,
+ sprintf("S%02dE%02d", $cur_series, $info{number}), $info{title};
+
+ my $result = {
+ name => $title,
+ url => URI->new_abs($info{url}, $browser->uri)
+ };
+
+ if($episode && $info{number} == $episode) {
+ # Exact match
+ return $result;
+ }
+
+ push @urls, $result;
+ }
+ } else {
+ info "Please specify a more specific title to download a particular programme." if @urls > 1;
+ }
+
+ return @urls;
+}
+
+sub _update_with_content {
+ my($browser, $url) = @_;
+
+ $browser->get($url,
+ X_Requested_With => 'XMLHttpRequest',
+ X_Prototype_Version => '1.6.0.3');
+
+ my($content) = $browser->content =~ /content":\s*"(.*?)"\s*}/;
+ $content = json_unescape($content);
+ debug "Content is '$content'";
+ $browser->update_html($content);
+}
+
1;
View
@@ -15,8 +15,8 @@ sub find_video {
$browser->get("http://www.wat.tv/interface/contentv2/$video_id");
- my $title = json_escape(($browser->content =~ /title":"(.*?)",/)[0]);
- my $url = json_escape(($browser->content =~ /files.*?url":"(.*?)",/)[0]);
+ my $title = json_unescape(($browser->content =~ /title":"(.*?)",/)[0]);
+ my $url = json_unescape(($browser->content =~ /files.*?url":"(.*?)",/)[0]);
my $filename = title_to_filename($title);
@@ -25,14 +25,4 @@ sub find_video {
return $url, $filename;
}
-# Maybe should use a proper JSON parser, but want to avoid the dependency for now..
-sub json_escape {
- my($s) = @_;
-
- $s =~ s/\\u([0-9a-f]{1,4})/chr hex $1/eg;
- $s =~ s/\\//g;
-
- return $s;
-}
-
1;
View
@@ -14,7 +14,7 @@ use constant MAX_REDIRECTS => 5;
our @EXPORT = qw(debug info error
extract_title extract_info title_to_filename get_video_filename url_exists
swfhash swfhash_data EXTENSIONS get_user_config_dir get_win_codepage
- is_program_on_path get_terminal_width);
+ is_program_on_path get_terminal_width json_unescape);
my $HAS_READKEY = eval { require Term::ReadKey };
@@ -264,4 +264,13 @@ sub get_terminal_width {
}
}
+# Maybe should use a proper JSON parser, but want to avoid the dependency for now..
+sub json_unescape {
+ my($s) = @_;
+
+ $s =~ s/\\u([0-9a-f]{1,4})/chr hex $1/ge;
+ $s =~ s{(\\[\\/rnt"])}{"\"$1\""}gee;
+ return $s;
+}
+
1;

0 comments on commit 4754d90

Please sign in to comment.