diff --git a/Changes b/Changes new file mode 100644 index 0000000..6ed1b43 --- /dev/null +++ b/Changes @@ -0,0 +1,3 @@ +0.9.001 Feb 18, 2005 +---------------------- +Yahoo!-Internal release diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..10b3336 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,11 @@ +Yahoo/Search.pm +Yahoo/Search/Request.pm +Yahoo/Search/Response.pm +Yahoo/Search/Result.pm +Yahoo/Search/XML.pm +Changes +Makefile.PL +MANIFEST +README +test.pl +META.yml diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..2b24940 --- /dev/null +++ b/META.yml @@ -0,0 +1,13 @@ +# http://module-build.sourceforge.net/META-spec.html +#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# +name: Yahoo-Search +version: 1.0.0 +version_from: Yahoo/Search.pm +installdirs: site +requires: + HTTP::Request: 0 + LWP::UserAgent: 0 + URI: 0 + +distribution_type: module +generated_by: ExtUtils::MakeMaker version 6.17 diff --git a/Makefile.PL b/Makefile.PL new file mode 100755 index 0000000..9f0ee87 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,16 @@ +#!/usr/local/bin/perl -w +use ExtUtils::MakeMaker; + +# See lib/ExtUtils/MakeMaker.pm for details of how to influence +# the contents of the Makefile that is written. +WriteMakefile( + NAME => 'Yahoo::Search', + ABSTRACT_FROM => 'Yahoo/Search.pm', + VERSION_FROM => 'Yahoo/Search.pm', + PREREQ_PM => { + 'LWP::UserAgent' => 0, + 'HTTP::Request' => 0, + 'URI' => 0, + }, + AUTHOR => 'Jeffrey Friedl ', +); diff --git a/README b/README new file mode 100644 index 0000000..ed4eec5 --- /dev/null +++ b/README @@ -0,0 +1,17 @@ +This is the Perl interface to Yahoo! Search's web services. + +This package provides a simple but complete programmatic interface to +Yahoo's document search, image search, video search, news search, and +"Local" search, as well as its "related terms" and "did you misspell?" +features. + +It uses their XML web services behind the scenes (no screen scraping), so +is fast and consistent. + +For more info about the underlying service this package uses, see +http://developer.yahoo.net/ + + +Written by Jeffrey Friedl +The entire work Copyright (C) 2005 Yahoo! Inc. + diff --git a/Yahoo/Search.pm b/Yahoo/Search.pm new file mode 100644 index 0000000..23c5097 --- /dev/null +++ b/Yahoo/Search.pm @@ -0,0 +1,1626 @@ +package Yahoo::Search; +use strict; +use Carp; +use Yahoo::Search::Request; + +## +## This is the interface to Yahoo!'s web search API. +## Written by Jeffrey Friedl +## Copyright (C) 2005 Yahoo! Inc. +## + +our $VERSION = '1.0.0'; # last num increases monotonically across all versions + +## +## CLASS OVERVIEW +## +## The main class +## Yahoo::Search +## begets +## Yahoo::Search::Request +## which begets +## Yahoo::Search::Response +## which begets a bunch of +## Yahoo::Search::Result +## which beget urls, summaries, etc. +## +## There are plenty of "convenience functions" which appear to bypass some +## of these steps as far as the user's view is concerned. +## + +## +## Configuration details for each search space (Doc, Images, Video, ## etc.)... +## +my %Config = +( + ###################################################################### + # Normal web search + # + Doc => + { + Url => 'http://api.search.yahoo.com/WebSearchService/V1/webSearch', + + MaxCount => 50, + + ## The 'Defaults' keys indicate the universe of allowable arguments, + ## while the values are the defaults for those arguments. + Defaults => { + Mode => undef, + Count => 10, + Start => 0, + Type => 'all', + AllowAdult => 0, + AllowSimilar => 0, + Language => undef, + }, + + AllowedMode => { + all => 1, + any => 1, + phrase => 1, + }, + AllowedType => { + all => 1, + + html => 1, + msword => 1, + pdf => 1, + ppt => 1, + rss => 1, + txt => 1, + xls => 1, + }, + }, + + + ###################################################################### + # Image search + # + Image => + { + Url => 'http://api.search.yahoo.com/ImageSearchService/V1/imageSearch', + MaxCount => 50, + + Defaults => { + Mode => undef, + Count => 10, + Start => 0, + Type => 'all', + AllowAdult => 0, + }, + + AllowedMode => { + all => 1, + any => 1, + phrase => 1, + }, + + AllowedType => { + all => 1, + + bmp => 1, + gif => 1, + jpeg => 1, + png => 1, + }, + }, + + ###################################################################### + # Video file search + # + Video => + { + Url => 'http://api.search.yahoo.com/VideoSearchService/V1/videoSearch', + + MaxCount => 50, + + Defaults => { + Mode => undef, + Count => 10, + Start => 0, + Type => 'all', + AllowAdult => 0, + }, + + AllowedMode => { + all => 1, + any => 1, + phrase => 1, + }, + + AllowedType => { + all => 1, + + avi => 1, + flash => 1, + mpeg => 1, + msmedia => 1, + quicktime => 1, + realmedia => 1, + }, + }, + + + ###################################################################### + # "Y! Local" (like Yellow Pages) search + # + Local => + { + Url => 'http://api.local.yahoo.com/LocalSearchService/V1/localSearch', + + MaxCount => 20, + + Defaults => { + Count => 10, + Start => 0, + Mode => undef, + Radius => undef, + Street => undef, + City => undef, + State => undef, + PostalCode => undef, + Location => undef, + }, + }, + + + ###################################################################### + # News search + # + News => + { + Url => 'http://api.search.yahoo.com/NewsSearchService/V1/newsSearch', + + MaxCount => 50, + + Defaults => { + Mode => undef, + Count => 10, + Start => 0, + Sort => undef, + Language => undef, + }, + + AllowedMode => { + all => 1, + any => 1, + phrase => 1, + }, + + AllowedSort => { + rank => 1, + date => 1, + }, + }, + + Spell => + { + Url => 'http://api.search.yahoo.com/WebSearchService/V1/spellingSuggestion', + }, + + Related => + { + Url => 'http://api.search.yahoo.com/WebSearchService/V1/relatedSuggestion', + }, +); + +## +## These args are allowed for any Query() +## +my @ExtraQueryArgs = qw[AutoContinue Debug AppId]; + +## +## Global defaults -- this list may be modified via import() +## and Default(); +## +my %GlobalDefault = +( + ## + ## Debug is a string with any of: url (show the url as fetched) + ## xml (show the resulting xml) + ## hash (show the resulting hash) + ## stdout (show to stdout instead of stderr) + ## e.g. "url hash stdout" + Debug => "", + + ## + ## if AutoCarp is true (as it is by default), carp on programming errors + ## (but not 404s, etc.) + ## + AutoCarp => 1, + + AutoContinue => 0, + + PreRequestCallback => undef, +); + +## +## Helper function to set $@ and, if needed, carp. +## +sub _carp_on_error($) +{ + $@ = shift; + if ($GlobalDefault{AutoCarp}) { + carp $@; + } + + return (); +} + + +## +## The following private subs are used to validate arguments. They are +## generally called with two args: the search space (Doc, Image, etc.), and +## the text to validate. +## +## If called without the "text to validate" arg, they return a description +## of what args are allowed (tailored to the search space, if appropriate). +## +## Otherwise, they return ($valid, $value). +## +my $allow_positive_integer = sub +{ + my $space = shift; # unused + if (not @_) { + return "positive integer"; + } + my $val = shift; + + if (not $val =~ m/^\d+$/) { + return (0); # invalid: not a number + } elsif ($val == 0) { + return (0); # invalid: not positive + } else { + return (1, $val); + } +}; + +my $allow_nonnegative_integer = sub +{ + my $space = shift; # unused + if (not @_) { + return "non-negative integer"; + } + my $val = shift; + + if (not $val =~ m/^\d+$/) { + return (0); # invalid: not a number + } else { + return (1, $val); + } +}; + +my $allow_positive_float = sub +{ + my $space = shift; # unused + if (not @_) { + return "positive number"; + } + my $val = shift; + + if (not $val =~ m/^(?: \d+(?: \.\d* )?$ | \.\d+$ )/x) { + return (0); # invalid: not a number + } elsif ($val == 0) { + return (0); # invalid: not positive + } else { + return (1, $val); + } +}; + +## This has different args than the others -- has $hashref prepended +my $allow_from_hash = sub +{ + my $hashref = shift; #hash in which to check + my $space = shift; #unused + + if (not @_) { + return join '|', sort keys %$hashref; + } + my $val = shift; + + if (not $hashref) { + return (1, $val); # can't tell, so say it's valid + } elsif ($hashref->{$val}) { + return (1, $val); # is specifically valid + } else { + return (0); # not valid + } +}; + +my $allow_boolean = sub +{ + my $space = shift; #unused + if (not @_) { + return "true or false"; + } + my $val = shift; + return (1, $val ? 1 : 0); +}; + +my $allow_any = sub +{ + my $space = shift; #unused + if (not @_) { + return "any value"; + } + my $val = shift; + return (1, $val); +}; + +my $allow_postal_code = sub +{ + my $space = shift; #unused + ## only U.S. Zone Improvement Program codes allowed + if (not @_) { + return "a US ZIP code" + } + + my $val = shift; + if ($val =~ m/^\d\d\d\d\d(?:-?\d\d\d\d)?$/) { + return (1, $val); + } else { + return (0); + } +}; + +my $allow_coderef = sub +{ + my $space = shift; #unused + my $val = shift; + if (ref($val) eq 'CODE') { + return (1, $val); + } else { + return (0); + } +}; + +my $allow_appid = sub +{ + my $space = shift; #unused + + if (not @_) { + return "something which matches /^[- A-Za-z0-9_()[\\]*+=,.:\@\\\\]{8,40}\$/"; + } + + my $val = shift; + if ($val =~ m/^[- A-Za-z0-9_()\[\]*+=,.:\@\\]{8,40}$/) { + return (1, $val); + } else { + return (0); + } +}; + +our %KnownLanguage = +( + default => 'any/all languages', + + ar => 'Arabic', + bg => 'Bulgarian', + ca => 'Catalan', + szh => 'Chinese (simplified)', + tzh => 'Chinese (traditional)', + hr => 'Croatian', + cs => 'Czech', + da => 'Danish', + nl => 'Dutch', + en => 'English', + et => 'Estonian', + fi => 'Finnish', + fr => 'French', + de => 'German', + el => 'Greek', + he => 'Hebrew', + hu => 'Hungarian', + is => 'Icelandic', +# id => 'Indonesian', + it => 'Italian', + ja => 'Japanese', + ko => 'Korean', + lv => 'Latvian', + lt => 'Lithuanian', + no => 'Norwegian', + fa => 'Persian', + pl => 'Polish', + pt => 'Portuguese', + ro => 'Romanian', + ru => 'Russian', +# sr => 'Serbian', + sk => 'Slovak', + sl => 'Slovenian', + es => 'Spanish', + sv => 'Swedish', + th => 'Thai', + tr => 'Turkish', +); + +## +## Mapping from arg name to value validation routine. +## +my %ValidateRoutine = +( + Count => $allow_positive_integer, + Start => $allow_nonnegative_integer, + + Radius => $allow_positive_float, + + AllowAdult => $allow_boolean, + AllowSimilar => $allow_boolean, + + Street => $allow_any, + City => $allow_any, + State => $allow_any, + Location => $allow_any, + + PostalCode => $allow_postal_code, + + Mode => sub { $allow_from_hash->($Config{$_[0]}->{AllowedMode}, @_) }, + Sort => sub { $allow_from_hash->($Config{$_[0]}->{AllowedSort}, @_) }, + Type => sub { $allow_from_hash->($Config{$_[0]}->{AllowedType}, @_) }, + Language => sub { $allow_from_hash->(\%KnownLanguage, @_) }, + + Debug => $allow_any, + AutoContinue => $allow_boolean, + AutoCarp => $allow_boolean, + + AppId => $allow_appid, + + PreRequestCallback => $allow_coderef, +); + +## +## returns ($newvalue, $error); +## +sub _validate($$$;$) +{ + my $global = shift; # true if for a global setting + my $space = shift; # Doc, Image, etc. + my $key = shift; # "Count", "State", etc. + my $have_val = @_ ? 1 : 0; + my $val = shift; + + if (not $ValidateRoutine{$key}) { + return (undef, "unknown argument '$key'"); + } + + if (not $global and $key eq 'AutoCarp') { + return (undef, "AutoCarp is a global setting which can not be used in this context"); + } + + if (not $have_val) { + return (1); + } + + my ($valid, $newval) = $ValidateRoutine{$key}->($space, $val); + + if ($valid) { + return ($newval, undef); + } + + my $expected = $ValidateRoutine{$key}->($space); + if ($space) { + return (undef, "invalid value \"$val\" for $space\'s \"$key\" argument, expected: $expected"); + } else { + return (undef, "invalid value \"$val\" for \"$key\" argument, expected: $expected"); + } +} + + +## +## 'import' accepts key/value pairs: +## +sub import +{ + my $class = shift; + + if (@_ % 2 != 0) { + Carp::confess("bad number of args to 'use $class'"); + } + my %Args = @_; + + while (my ($key, $val) = each %Args) + { + my ($newval, $error) = _validate(1, undef, $key, $val); + if ($error) { + Carp::confess("$error, in 'use $class'"); + } else { + $GlobalDefault{$key} = $newval; + } + } +} + + +## +## Get (or set) one of the default global values. They can be set this way +## (either as Yahoo::Search->Default or $SearchEngine->Default), or via +## Yahoo::Search->new(), or on the 'use' line. +## +## When used with a $SearchEngine object, the value returned is the value +## in effect, which is the global one if the $SearchEngine does not have +## one itself. +## +sub Default +{ + my $class_or_obj = shift; # Yahoo::Search->Default or $SearchEngine->Default + my $key = shift; + my $have_val = @_ ? 1 : 0; + my $val = shift; + + my $global = not ref $class_or_obj; + + my $old; + if ($global or not exists $class_or_obj->{$key}) { + $old = $GlobalDefault{$key}; + } else { + $old = $class_or_obj->{$key}; + } + + if ($have_val) + { + my ($newval, $error) = _validate($global, undef, $key, $val); + if ($error) { + return _carp_on_error($error); + } + + if (ref $class_or_obj) { + $class_or_obj->{$key} = $newval; + } else { + $GlobalDefault{$key} = $newval; + } + } + else + { + my ($okay, $error) = _validate($global, undef, $key); + if ($error) { + return _carp_on_error($error); + } + } + + return $old; +} + + + +## +## Maps Yahoo::Search->Query arguments to Y! API parameters. +## +my %ArgToParam = +( + Mode => 'type', + Count => 'results', + Start => 'start', + Type => 'format', + AllowAdult => 'adult_ok', + AllowSimilar => 'similar_ok', + Language => 'language', + Sort => 'sort', + Radius => 'radius', + Street => 'street', + City => 'city', + State => 'state', + PostalCode => 'zip', + Location => 'location', + AppId => 'appid', +); + + +## +## The search-engine constructor. +## +## No args are needed, but any of %ValidateRoutine keys except AutoCarp are +## allowed (they'll be used as the defaults when queries are later +## constructed via this object). +## +sub new +{ + my $class = shift; + + if (@_ % 2 != 0) { + return _carp_on_error("wrong arg count to $class->new"); + } + + my $SearchEngine = { @_ }; + + for my $key (keys %$SearchEngine) + { + my ($newval, $error) = _validate(0, undef, $key, $SearchEngine->{$key}); + if ($error) { + return _carp_on_error("$error, in call to $class->new"); + } + $SearchEngine->{$key} = $newval; + } + + return bless $SearchEngine, $class; +} + +## +## Request method (can also be called like a constructor). +## Specs to a specific query are provided, and a Request object is returned. +## +sub Request +{ + my $SearchEngine = shift; # self + my $SearchSpace = shift; # "Doc", "Image", "News", etc.. + my $QueryText = shift; # "Briteny", "egregious compensation semel", etc. + + if (@_ % 2 != 0) { + return _carp_on_error("wrong arg count"); + } + + my %Args = @_; + + if (not defined $SearchSpace or not $Config{$SearchSpace}) { + my $list = join '|', sort keys %Config; + return _carp_on_error("bad search-space identifier, expecting one of: $list"); + } + + if (not defined($QueryText) or length($QueryText) == 0) { + return _carp_on_error("missing query"); + } + + ## + ## %Param holds the key/vals we'll send in the request to Yahoo! + ## + my %Param = ( query => $QueryText ); + + ## + ## This can be called as a constructor -- if so, $SearchEngine will be + ## the class name, and we'll want to turn into an object. + ## + if (not ref $SearchEngine) { + $SearchEngine = $SearchEngine->new(); + } + + my %OtherRequestArgs; + + ## + ## Go through most allowed args, taking the value from this call's arg + ## list if provided, from the defaults that were registered with the + ## SearchEngine, or failing those, the defaults for this type of query. + ## + for my $key (keys %{ $Config{$SearchSpace}->{Defaults} }, @ExtraQueryArgs) + { + ## + ## Isolate the value we'll use for this request: from our args, + ## from the defaults registered with the search-engine, or from + ## the search-space defaults. + ## + my $val; + if (exists $Args{$key}) { + $val = delete $Args{$key}; + } elsif (exists $SearchEngine->{$key}) { + $val = $SearchEngine->{$key}; + } elsif (exists $GlobalDefault{$key}) { + $val = $GlobalDefault{$key}; + } elsif (exists $Config{$SearchSpace}->{Defaults}->{$key}) { + $val = $Config{$SearchSpace}->{Defaults}->{$key}; + } else { + $val = undef; + } + + if (defined $val) + { + my ($newval, $error) = _validate(0, $SearchSpace, $key, $val); + + if ($error) { + return _carp_on_error($error); + } + + if (my $param = $ArgToParam{$key}) { + $Param{$param} = $newval; + } else { + $OtherRequestArgs{$key} = $newval; + } + } + } + + ## + ## Any leftover args are bad + ## + if (%Args) { + my $list = join(', ', keys %Args); + return _carp_on_error("unknown args for '$SearchSpace' query: $list"); + } + + ## + ## An AppId is required for all calls + ## + if (not $Param{'appid'}) + { + return _carp_on_error("an AppId is required -- please make one up"); + } + + ## + ## Do some special per-arg-type processing + ## + + ## ensure that the Count, if given, is not over max + if (defined $Param{count} and $Param{count} > $Config{$SearchSpace}->{MaxCount}) { + return _carp_on_error("maximum allowed Count for a $SearchSpace search is $Config{$SearchSpace}->{MaxCount}"); + } + + ## In Perl universe, Start is 0-based, but the Y! API's "start" is 1-based. + $Param{start}++; + + # 'Local' has special required parameters + if ($SearchSpace eq 'Local') + { + if (not $Param{location} + and + not $Param{'zip'} + and + not $Param{'state'} + and + not $Param{'city'}) + { + ## + ## The diff between $Param{} references in the if() above, and + ## the arg names in the error below, is the %ArgToParam mapping + ## + return _carp_on_error("a 'Local' query must have Location, PostalCode, or City+State"); + } + } + + ## + ## Okay, we have everything we need to make a specific request object. + ## Make it and return. + ## + return Yahoo::Search::Request->new( + SearchEngine => $SearchEngine, + Space => $SearchSpace, + Action => $Config{$SearchSpace}->{Url}, + Params => \%Param, + %OtherRequestArgs, + ); +} + +## +## A way to bypass an explicit Request object, jumping from a SearchEngine +## (or nothing) directly to a Response object. +## +sub Query +{ + my $SearchEngine = shift; + ## + ## Can be called as a constructor -- if so, $SearchEngine will be the + ## class name + ## + if (not ref $SearchEngine) { + $SearchEngine = $SearchEngine->new(); + } + + if (my $Request = $SearchEngine->Request(@_)) { + return $Request->Fetch(); + } else { + # $@ already set + return (); + } +} + + +## +## A way to bypass explicit Request and Response objects, jumping from a +## SearchEngine (or nothing) directly to a list of Result objects. +## +sub Results +{ + my $Response = Query(@_); + + if (not $Response) { + # $@ already set + return (); + } + return $Response->Results; +} + +## +## A way to bypass explicit Request and Response objects, jumping from a +## SearchEngine (or nothing) directly to a list of links. +## +sub Links +{ + return map { $_->Link } Results(@_); +} + + +## +## A way to bypass explicit Request and Response objects, jumping from a +## SearchEngine (or nothing) directly to a bunch of html results. +## +sub HtmlResults +{ + return map { $_->as_html } Results(@_); +} + +## +## A way to bypass explicit Request and Response objects, jumping from a +## SearchEngine (or nothing) directly to a list of terms +## (For Spell and Related searches) +## +sub Terms +{ + return map { $_->Term } Results(@_); +} + + +sub MaxCount +{ + if (@_) { + ## + ## We'll use only the last arg -- it can be called as either + ## Yahoo::Search::MaxCount($SearchSpace) or + ## Yahoo:Search->MaxCount($SearchSpace) and we don't care which. + ## In either case, the final arg is the search space. + ## + my $SearchSpace = $_[-1]; + if ($Config{$SearchSpace} and $Config{$SearchSpace}->{MaxCount}) { + return $Config{$SearchSpace}->{MaxCount}; + } + } + return (); # bad/missing arg +} + + + +1; +__END__ + +=head1 NAME + +Yahoo::Search - Perl interface to Yahoo! Search's public API. + +The following search spaces are supported: + +=over 3 + +=item Doc + +Common web search for documents (html, pdf, doc, ...) + +=item Image + +Image search (jpeg, png, gif, ...) + +=item Video + +Video file search (avi, mpeg, realmedia, ...) + +=item News + +News article search + +=item Local + +Yahoo! Local area (ZIP-code-based Yellow-Page like search) + +=item Spell + +A pseudo-search to fetch a "did you mean?" spelling suggestion for a search term. + +=item Related + +A pseudo-search to fetch "also try" related-searches for a search term. + +=back + +(Note: what this Perl API calls "Doc" Search is what Yahoo! calls "Web" +Search. But gee, aren't all web searches "Web" search, including +Image/News/Video/etc?) + +Yahoo!'s raw API, which this package uses, is described at: + + http://developer.yahoo.net/ + +=head1 DOCS + +The full documentation for this suite of classes is spread among these packages: + + Yahoo::Search + Yahoo::Search::Request + Yahoo::Search::Response + Yahoo::Search::Result + +However, you need C only B, which brings in the others as needed. + +=head1 SYNOPSIS + +Yahoo::Search provides a rich and full-featured set of classes for +accessing the various features of Yahoo! Search, and also offers a variety +of has shortcuts to allow simple access, such as the following I +search: + + use Yahoo::Search; + my @Results = Yahoo::Search->Results(Doc => "Britney latest marriage", + AppId => "YahooDemo", + # The following args are optional. + # (Values shown are package defaults). + Mode => 'all', + Count => 10, + Start => 0, + Type => 'all', + AllowAdult => 0, + AllowSimilar => 0, + Language => undef, + ); + warn $@ if $@; # report any errors + + for my $Result (@Results) + { + printf "Result: #%d\n", $Result->I + 1, + printf "Url:%s\n", $Result->Url; + printf "%s\n", $Result->ClickUrl; + printf "Summary: %s\n", $Result->Summary; + printf "Title: %s\n", $Result->Title; + printf "In Cache: %s\n", $Result->CacheUrl; + print "\n"; + } + +The first argument to C indicates which search space is to be +queried (in this case, I). The second argument is the search term or +phrase (described in detail in the next section). Subsequent arguments are +optional key/value pairs (described in detail in the section after that) -- +the ones shown in the example are those allowed for a I query, with +the values shown being the defaults. + +C returns a list of Yahoo::Search::Result objects, one per item +(in the case of a I search, an item is a web page, I document, +I document, etc.). The methods available to a C object are +dependent upon the search space of the original query -- see +Yahoo::Search::Result documentation for the complete list. + +=head1 Search term / phrase + +Within a search phrase ("C" in the example +above), words that you wish to be included even if they would otherwise be +eliminated as "too common" should be proceeded with a "C<+>". Words that you +wish to exclude should be proceeded with a "C<->". Words can be separated +with "C" (the default for the C Mode, described below), and can be +wrapped in double quotes to identify an exact phrase (the default with the +C Mode, also described below). + +There are also a number of "Search Meta Words", as described at +http://help.yahoo.com/help/us/ysearch/basics/basics-04.html and +http://help.yahoo.com/help/us/ysearch/tips/tips-03.html , which can stand +along or be combined with C searches (and, to some extent, some of the +others -- YMMV): + +=over 4 + +=item B + +allows one to find all documents within a particular domain and all its +subdomains. Example: B + +=item B + +allows one to find all documents from a particular host only. +Example: B + +=item B + +allows one to find documents that link to a particular url. +Example: B + +=item B + +allows one to find a specific document in Yahoo!'s index. +Example: B + +=item B + +allows one to find a specific keyword as part of indexed urls. +Example: B + +=item B + +allows one to find a specific keyword as part of the indexed titles. +Example: B + +=back + +As an example combining a number of different search styles, consider + + my @Results = Yahoo::Search->Results(Doc => 'site:TheSmokingGun.com "Michael Jackson" -arrest', + AppId => "YahooDemo"); + +This returns data about pages at TheSmokingGun.com about Michael Jackson +that don't contain the word "arrest" (yes, there are actually a few such +pages). + +=head1 Query arguments + +As mentioned above, the arguments allowed in a C call depend upon +the search space of the query. Here is a table of the possible arguments, +showing which apply to queries of which search space: + + Doc Image Video News Local Spell Related + ----- ----- ----- ----- ----- ----- ------- + AppId [X] [X] [X] [X] [X] [X] [X] + Mode [X] [X] [X] [X] [X] . . + Start [X] [X] [X] [X] [X] . . + Count [X] [X] [X] [X] [X] . . + + AllowSimilar [X] . . . . . . + AllowAdult [X] [X] [X] . . . . + Type [X] [X] [X] . . . . + Sort . . . [X] . . . + Language [X] . . [X] . . . + + Street . . . . [X] . . + City . . . . [X] . . + State . . . . [X] . . + PostalCode . . . . [X] . . + Location . . . . [X] . . + Radius . . . . [X] . . + + AutoContinue [X] [X] [X] [X] [X] [X] [X] + Debug [X] [X] [X] [X] [X] [X] [X] + PreRequestCallback [X] [X] [X] [X] [X] [X] [X] + +Here are details of each: + +=over 4 + +=item AppId + +A 8-40 character string which identifies the application making use of the +Yahoo! Search API. (Think of it along the lines of an HTTP User-Agent +string.) + +The characters allowed are space, plus C + +This argument is required of all searches (sorry). You can make up whatever +AppId you'd like, but you are encouraged to register it via the link on + + http://developer.yahoo.net/ + +especially if you are creating something that will be widly distributed. + +As mentioned below in I, it's particularly +convenient to get the C out of the way by putting it on the C +line, e.g. + + use Yahoo::Search AppId => 'just testing'; + +It then applies to all queries unless explicitly overridden. + +=item Mode + +Must be one of: C (the default), C, or C. Indicates how +multiple words in the search term are used: search for documents with +I words, documents with I words, or documents that contain the +search term as an exact I. + +=item Start + +Indicates the ordinal of the first result to be returned, e.g. the "30" of +"showing results 30-40" (except that C is zero-based, not +one-based). The default is zero, meaning that the primary results will be +returned. + +=item Count + +Indicates how many items should be returned. The default is 10. The maximum +allowed depends on the search space being queried: B<20> for C +searches, and B<50> for others which support the C argument. + +Note that + + Yahoo::Search::MaxCount($SearchSpace) + +and + + $SearchEngine->MasCount($SearchSpace) + +return the maximum count allowed for the given C<$SearchSpace>. + +=item AllowSimilar + +If this boolean is true (the default is false), similar results which would +otherwise not be returned are included in the result set. + +=item AllowAdult + +If this boolean is false (the default), results considered to be "adult" +(i.e. porn) are not included in the result set. Set to true to allow +unfiltered results. + +Standard precautions apply about how the "is adult?" determination is not +perfect. + +=item Type + +This argument can be used to restrict the results to only a specific file +type. The default value, C, allows any type (associated with the +search space) to be returned. Otherwise, the values allowed depend on the +search space: + + Search space Allowed Type values + ============ ======================================================== + Doc all html msword pdf ppt rss txt xls + Img all bmp gif jpeg png + Video all avi flash mpeg msmedia quicktime realmedia + News N/A + Local N/A + Spell N/A + Related N/A + +=item Sort + +For I searches, the sort may be C (the default) or +C. + +=item Language + +If provided, restricts the results to documents in the given language. The +value is an language code such as C (English), C (Japanese), etc +(mostly ISO 639-1 codes). These are the codes supported: + + code language + ---- --------- + sq Albanian + ar Arabic + bg Bulgarian + ca Catalan + szh Chinese (simplified) + tzh Chinese (traditional) + hr Croatian + cs Czech + da Danish + nl Dutch + en English + et Estonian + fi Finnish + fr French + de German + el Greek + he Hebrew + hu Hungarian + is Icelandic + it Italian + ja Japanese + ko Korean + lv Latvian + lt Lithuanian + no Norwegian + fa Persian + pl Polish + pt Portuguese + ro Romanian + ru Russian + sk Slovak + sl Slovenian + es Spanish + sv Swedish + th Thai + tr Turkish + +In addition, the code "default" is the same as the lack of a language +specifier, and seems to mean a mix of major world languages, skewed toward +English. + +=item Street + +=item City + +=item State + +=item PostalCode + +=item Location + +These items are for a I query, and specify the epicenter of the +search. The epicenter must be provided in one of a variety of ways: via the +free-text C, via C + C, via C + +C + C, via C alone, or via C + C +alone. + +C is the street address, e.e. "701 First Ave". C is a +US 5-digit or 9-digit ZIP code (e.g. "94089" or "94089-1234"). + +If C is provided, it supersedes the others. It should be a string +along the lines of "701 First Ave, Sunnyvale CA, 94089". The following forms +are recognized: + + city state + city state zip + zip + street, city state + street, city state zip + street, zip + +Searches that include a street address (either in the C, or if +C is empty, in C) provide for a more detailed epicenter +specification. + +=item Radius + +For I searches, indicates how wide an area around the epicenter to +search. The value is the radius of the search area, in miles. The default +radius depends on the search location (urban areas tend to have a smaller +default radius). + +=item Debug + +C is a string (defaults to an empty string). If the substring +"C" is found anywhere in the string, the url of the Yahoo! request is +printed on stderr. If "C", the raw xml received is printed to stderr. +If "C", the raw Perl hash, as converted from the XML, is Data::Dump'd +to stderr. + +Thus, to print all debugging, you'd set C to a value such as "C". + +=item AutoContinue + +A boolean (default off). If true, turns on the B +auto-continuation, as described in the docs for C in +Yahoo::Search::Response. + +=back + +=head1 Class Hierarchy Details + +The Y! Search API class system supports the following objects (all loaded +as needed via Yahoo::Search): + + Yahoo::Search + Yahoo::Search::Request + Yahoo::Search::Response + Yahoo::Search::Result + +Here is a summary of them: + +=over 10 + +=item Yahoo::Search + +A "search engine" object which can hold user-specified default values for +search-query arguments. Often not used explicitly. + +=item Yahoo::Search::Request + +An object which holds the information needed to make one search-query +request. Often not used explicitly. + +=item Yahoo::Search::Response + +An object which holds the results of a query (including a bunch of +C objects). + +=item Yahoo::Search::Result + +An object representing one query result (one image, web page, etc., as +appropriate to the original search space). + +=back + +=head1 "The Long Way", and Common Practice + +The explicit way to perform a query and access the results is to first +create a "Search Engine" object: + + my $SearchEngine = Yahoo::Search->new(); + +Optionally, you can provide C with key/value pairs as described in the +I section above. Those values will then be available as +default values during subsequent request creation. (More on this later.) + +You then use the search-engine object to create a request: + + my $Request = $SearchEngine->Request(Doc => Britney); + +You then actually make the request, getting a response: + + my $Response = $Request->Fetch(); + +You can then access the set of C objects in a number of ways, +either all at once + + my @Results = $Response->Results(); + +or iteratively: + + + while (my $Result = $Response->NextResult) { + : + : + } + +B + +In practice, one often does not need to go through all these steps +explicitly. The only reason to create a search-engine object, for example, +is to hold default overrides (to be made available to subsequent requests +made via the search-engine object). For example: + + use Yahoo::Search; + my $SearchEngine = Yahoo::Search->new(AppId => "Bobs Fish Mart", + Count => 25, + AllowAdult => 1, + PostalCode => 95014); + +Now, calls to the various query functions (C, C) via this +C<$SearchEngine> will use these defaults (I searches, for example, +will be with C set to true, and I searches will be +centered at ZIP code 95014.) All will return up to 25 results. + +In this example: + + my @Results = $SearchEngine->Results(Image => "Britney", + Count => 20); + +The query is made with C as 'C' and C +true (both via C<$SearchEngine>), but C is 20 because explicit args +override the default in C<$SearchEngine>. The C arg does not +apply too an I search, so the default provided from C +is not needed with this particular query. + +B + +You can also provide the same defaults on the C line. The following +example has the same result as the previous one: + + use Yahoo::Search AppId => 'Bobs Fish Mart', + Count => 25, + AllowAdult => 1, + PostalCode => 95014; + + my @Results = Yahoo::Search->Results(Image => "Britney", + Count => 20); + +=head1 Functions and Methods + +Here, finally, are the functions and methods provided by Yahoo::Search. +In all cases, "...args..." are any of the key/value pairs listed in the +I section of this document (e.g. "Count => 20") + + +=over 4 + +=item $SearchEngine = Yahoo::Search->new(...args...) + +Creates a search-engine object (a container for defaults). +On error, sets C<$@> and returns nothing. + + + +=item $Request = $SearchEngine->Request($space => $query, ...args...) + +=item $Request = Yahoo::Search->Request($space => $query, ...args...) + +Creates a C object representing a search of the named search space +(I, I, etc.) of the given query string. + +On error, sets C<$@> and returns nothing. + +B: all arguments are in key/value pairs, but the C<$space>/C<$query> +pair (which is required) is required to appear first. + + + + +=item $Response = $SearchEngine->Query($space => $query, ...args...) + +=item $Response = Yahoo::Search->Query($space => $query, ...args...) + +Creates an implicit C object, and fetches it, returning the +resulting C. + +On error, sets C<$@> and returns nothing. + +B: all arguments are in key/value pairs, but the C<$space>/C<$query> +pair (which is required) is required to appear first. + + + + + +=item @Results = $SearchEngine->Results($space => $query, ...args...) + +=item @Results = Yahoo::Search->Results($space => $query, ...args...) + +Creates an implicit C object, then C object, +in the end returning a list of C objects. + +On error, sets C<$@> and returns nothing. + +B: all arguments are in key/value pairs, but the C<$space>/C<$query> +pair (which is required) is required to appear first. + + + + +=item @links = $SearchEngine->Links($space => $query, ...args...) + +=item @links = Yahoo::Search->Links($space => $query, ...args...) + +A super shortcut which goes directly from the query args to a list of + + ... + +links. Essentially, + + map { $_->Link } Yahoo::Search->Results($space => $query, ...args...); + +or, more explicitly: + + map { $_->Link } Yahoo::Search->new()->Request($space => $query, ...args...)->Fetch->Results(@_); + +See C in the documentation for Yahoo::Search::Result. + +B: all arguments are in key/value pairs, but the C<$space>/C<$query> +pair (which is required) is required to appear first. + + + + + +=item @links = $SearchEngine->Terms($space => $query, ...args...) + +=item @links = Yahoo::Search->Terms($space => $query, ...args...) + +A super shortcut for I and I search spaces, returns the +list of spelling-or related-search suggestions, respectively. + +B: all arguments are in key/value pairs, but the C<$space>/C<$query> +pair (which is required) is required to appear first. + + + + + +=item @html = $SearchEngine->HtmlResults($space => $query, ...args...) + +=item @html = Yahoo::Search->HtmlResults($space => $query, ...args...) + +Like C, but returns a list of html strings (one representing each +result). See C in the documentation for Yahoo::Search::Result. + +A simple result display might look like + + print join "

", Yahoo::Search->HtmlResults(....); + +or, perhaps + + if (my @HTML = Yahoo::Search->HtmlResults(....)) + { + print "

    "; + for my $html (@HTML) { + print "
  • ", $html; + } + print "
"; + } + +As an example, here's a complete CGI which shows results from an +image-search, where the search term is in the 'C' query string: + + #!/usr/local/bin/perl -w + use CGI; + my $cgi = new CGI; + print $cgi->header(); + + use Yahoo::Search AppId => 'my-search-app'; + if (my $term = $cgi->param('s')) { + print join "

", Yahoo::Search->HtmlResults(Img => $term); + } + +The results, however, do look better with some style-sheet attention, such +as: + + + + + +B: all arguments are in key/value pairs, but the C<$space>/C<$query> +pair (which is required) is required to appear first. + + + +=item @html = $SearchEngine->MaxCount($space) + +=item @html = Yahoo::Search->MaxCount($space) + +Returns the maximum allowed C query-argument for the given search space. + + + +=item $SearchEngine->Default($key [ => $val ]); + +If a new value is given, update the <$SearchEngine>'s value for the named +C<$key>. + +In either case, the old value for C<$key> in effect is returned. If the +C<$SearchEngine> had a previous value, it is returned. Otherwise, the +global value in effect is returned. + +As always, the key is from among those mentioned in the I +section above. + +The old value is returned. + + +=item Yahoo::Search->Default($key [ => $val ]); + +Update or, if no new value is given, check the global default value for the +named argument. The key is from among those mentioned in the I section above, as well as C (discussed below). + +=back + + +=head1 Defaults and Default Overrides + +All key/value pairs mentioned in the I section may appear +on the C line, in the call to the C constructor, or in requests +that create a query explicitly or implicitly (C, C, +C, C, or C). + +Each argument's value takes the first of the following which applies +(listed in order of precedence): + +=over 6 + +=item 4) + +The actual arguments to a function which creates (explicitly or implicitly) +a request. + +=item 3) + +Search-engine default overrides, set when the Yahoo::Search C +constructor is used to create a search-engine object, or when that object's +C method is called. + +=item 2) + +Global default overrides, set on the C line or via + + Yahoo::Search->Default() + +=item 1) + +Defaults hard-coded into these packages (e.g. C defaults to 10). + +=back + +It's particularly convenient to put the C on the C line, +e.g. + + use Yahoo::Search AppId => 'just testing'; + +=head1 AutoCarp + +By default, detected errors that would be classified as programming errors +(e.g. use of incorrect args) are automatically spit out to stderr besides +being returned via C<$@>. This can be turned off via + + use Yahoo::Search AutoCarp => 0; + +or + + Yahoo::Search->Default(AutoCarp => 0); + +The default of true is somewhat obnoxious, but hopefully helps create +better programs by forcing the programmer to actively think about error +checking (if even long enough to turn off error reporting). + +=head1 Copyright + +Copyright (C) 2005 Yahoo! Inc. + +=head1 Author + +Jeffrey Friedl (jfriedl@yahoo.com) + +$Id: Search.pm 2 2005-01-28 04:27:46Z jfriedl $ + +=cut diff --git a/Yahoo/Search/Request.pm b/Yahoo/Search/Request.pm new file mode 100644 index 0000000..a4319e8 --- /dev/null +++ b/Yahoo/Search/Request.pm @@ -0,0 +1,368 @@ +package Yahoo::Search::Request; +use strict; + +use Yahoo::Search::Response; +use Yahoo::Search::XML; +use LWP::UserAgent; +use HTTP::Request; +use URI; + +=head1 NAME + +Yahoo::Search::Request -- container object for a Yahoo! Search request. + +=head1 Package Use + +You never need to C this package directly -- it is loaded +automatically by Yahoo::Search. + +=head1 Object Creation + +In practice, this class is generally not dealt with explicitly, but rather +implicitly via functions in Yahoo::Search such as C and C, +which build and use a C object under the hood. + +You also have access to the C object via C method of +resulting C and C objects. + +To be clear, C objects are created by the C method of a +Search Engine object (Yahoo::Search). + +=cut + +sub new +{ + my $class = shift; + my %Args = @_; + + ## + ## Don't want to keep any arg that begins with '_' (e.g. _Url). + ## + for my $key (grep { /^_/ } keys %Args) { + delete $Args{$key}; + } + + return bless \%Args, $class; +} + + +=head1 Methods + +A C object provides the following methods: + +=over 4 + +=cut + +########################################################################### + +=item $Request->Uri + +Returns the URI::http object representing the url fetched (or to be +fetched) from Yahoo's Search servers. The url is actually fetched when the +C object's C method is called. + +=cut + +sub Uri +{ + my $Request = shift; # self + + if (not $Request->{_Uri}) + { + ## + ## Create the URI (action + query string) + ## + $Request->{_Uri} = URI->new($Request->{Action}, "http"); + $Request->{_Uri}->query_form(%{$Request->{Params}}); + } + return $Request->{_Uri}; +} + + + +########################################################################### + +=item $Request->Url + +Like the C method, but returns a string with the full url +fetched (or to be fetched). + +=cut + +sub Url +{ + my $Request = shift; # self + return $Request->Uri->as_string; +} + + + +########################################################################### + +=item $Request->SearchSpace + +Returns the search space the request represents (I, I, etc.) + +=cut + + +sub SearchSpace +{ + my $Request = shift; # self + return $Request->{Space} +} + + + +########################################################################### + +=item $Request->SearchEngine + +Returns the Yahoo::Search "search engine" object used in creating this +request. + +=cut + +sub SearchEngine +{ + my $Request = shift; # self + return $Request->{SearchEngine}; +} + + + +## +## The Spell and Related search spaces have very simple data -- +## they are simple text phrases, and not further nested xml. +## +my %SimpleResultSpace = +( + Spell => 1, + Related => 1, +); + + +########################################################################### + +=item $Request->Fetch + +Actually contact the Yahoo Search servers, returning a C +(Yahoo::Search::Result) object. + +=cut + + + +sub Fetch +{ + my $Request = shift; # self + ## no other args + + ## + ## Fetch -- get the response (which contains xml, hopefully) + ## + my $url = $Request->Url; + + if (my $callback = $Request->SearchEngine->Default('PreRequestCallback')) + { + if (not $callback->($Request)) { + return (); + } + } + + warn "Fetching url: $url\n" if $Request->{Debug} =~ m/url/x; + my $response = LWP::UserAgent->new(agent => "Yahoo::Search ($Yahoo::Search::VERSION)")->request(HTTP::Request->new(GET => $url)); + + ## + ## Ensure we have a good result + ## + if (not $response) { + $@ = "couldn't make request"; + return (); + } + + if (not $response->is_success) { + $@ = "ERROR: " . $response->status_line; + return (); + } + + ## + ## Nab (and if debugging, report) the xml + ## + my $xml = $response->content; + warn $xml, "\n" if $Request->{Debug} =~ m/xml/x; + + if (not $xml) { + $@ = "empty response from Yahoo server"; + return (); + } + + ## + ## Turn the XML into a Perl hash... + ## + my $ResultHash; + if ($Yahoo::Search::UseXmlSimple) { + require XML::Simple; + $ResultHash = eval { XML::Simple::XMLin($xml) }; + } else { + $ResultHash = eval { Yahoo::Search::XML::Parse($xml) }; + } + + if (not $ResultHash) { + $@ = "error processing xml: $@"; + return (); + } + + ## + ## If there is only one result, $ResultHash->{Result} will be a hash + ## ref rather than the ref to an array of hash refs that we would + ## otherwise expect, so we'll fix that here. + ## + if (not exists $ResultHash->{Result}) { + $ResultHash->{Result} = [ ]; + } elsif (ref($ResultHash->{Result}) ne "ARRAY") { + $ResultHash->{Result} = [ $ResultHash->{Result} ]; + } + + ## + ## The mention of "hash ref" in the previous comment doesn't apply + ## to Spell and Related spaces -- let's fix that. + ## + if ($SimpleResultSpace{$Request->SearchSpace}) + { + my @Results; + for my $item (@{ $ResultHash->{Result}}) { + push @Results, { Term => $item }; + } + $ResultHash->{Result} = \@Results; + + + ## + ## These are not part of what's returned, but it makes it easier + ## for us if they're there, so fake'em. + ## + $ResultHash->{firstResultPosition} = @Results ? 1 : 0; + $ResultHash->{totalResultsAvailable} = scalar @Results; + + ## + ## Add this hint to the rest of the code to not allow + ## further requests (e.g. via AutoContinue). + ## + $ResultHash->{_NoFurtherRequests} = 1; + } + + ## + ## Report if needed. + ## + if ($Request->{Debug} =~ m/hash/x) { + require Data::Dumper; + local($Data::Dumper::Terse) = 1; + warn "Grokked Hash: ", Data::Dumper::Dumper($ResultHash), "\n"; + } + + $ResultHash->{_Request} = $Request; + $ResultHash->{_XML} = $xml; + + ## + ## Create (and return) a new Response object from the request and the + ## returned hash. + ## + return Yahoo::Search::Response->new($ResultHash); +} + + + +########################################################################### + +=item $Request->RelatedRequest + +=item $Request->RelatedResponse + +Perform a I request for search terms related to the query phrase +of the current request, returning the new C or C object, +respectively. + +Both return nothing if the current request is already for a I +search. + +=cut + + +sub RelatedRequest +{ + my $Request = shift; + + if ($Request->SearchSpace eq "Related") { + return (); + } else { + return $Request->SearchEngine->Request(Related => $Request->{Params}->{query}); + } +} + +sub RelatedResponse +{ + my $Request = shift; + if (my $new = $Request->RelatedRequest) { + return $new->Fetch(); + } else { + return (); + } +} + + +########################################################################### + +=item $Request->SpellRequest + +=item $Request->SpellResponse + +Perform a I request for a search term that may reflect proper +spelling of the query phrase of the current request, returning the new +C or C object, respectively. + +Both return nothing if the current request is already for a I +search. + +=cut + + +sub SpellRequest +{ + my $Request = shift; + + if ($Request->SearchSpace eq "Spell") { + return (); + } else { + return $Request->SearchEngine->Request(Spell => $Request->{Params}->{query}); + } +} + +sub SpellResponse +{ + my $Request = shift; + if (my $new = $Request->SpellRequest) { + return $new->Fetch(); + } else { + return (); + } +} + + +=pod + +=back + +=head1 Copyright + +Copyright (C) 2005 Yahoo! Inc. + +=head1 Author + +Jeffrey Friedl (jfriedl@yahoo.com) + +$Id: Request.pm 3 2005-01-28 04:29:54Z jfriedl $ + +=cut + +1; diff --git a/Yahoo/Search/Response.pm b/Yahoo/Search/Response.pm new file mode 100644 index 0000000..ba70aa5 --- /dev/null +++ b/Yahoo/Search/Response.pm @@ -0,0 +1,699 @@ +package Yahoo::Search::Response; +use strict; +use Yahoo::Search::Result; + +=head1 NAME + +Yahoo::Search::Response -- container object for the result set of one query +to the Yahoo! Search API. + +=head1 Package Use + +You never need to C this package directly -- it is loaded +automatically by Yahoo::Search. + +=head1 Object Creation + +C objects are created by the C method of a C +(Yahoo::Search::Request) object, e.g. by + + my $Response = Yahoo::Search->new(...)->Request()->Fetch(); + +or by shortcuts to the same, such as: + + my $Response = Yahoo::Search->Query(...); + +=cut + +## +## Called from Request.pm after grokking the xml returned as the results of +## a specific Request. +## +sub new +{ + my $class = shift; + my $Response = shift; # hashref of info + + ## We have the data; now bless it + bless $Response, $class; + + ## Initialize iterator for NextResult() method + $Response->{_NextIterator} = 0; + + ## But do a bit of cleanup and other preparation.... + if (not $Response->{firstResultPosition}) { + ## Y! server bug -- this is sometimes empty + $Response->{firstResultPosition} = 1; + } + + ## + ## Fix up and bless each internal "Result" item -- turn into a Result + ## object. Set the ordinal to support the i() and I() methods. + ## + for (my $i = 0; $i < @{$Response->{Result}}; $i++) + { + my $Result = $Response->{Result}->[$i]; + + $Result->{_ResponseOrdinal} = $i; + $Result->{_Response} = $Response; + + ## + ## Something like + ## + ## ends up being a ref to an empty hash. We'll remove those. + ## + for my $key (keys %$Result) + { + if (ref($Result->{$key}) eq "HASH" + and + not keys %{$Result->{$key}}) + { + delete $Result->{$key}; + } + } + + bless $Result, "Yahoo::Search::Result"; + } + + return $Response; +} + + +=head1 Methods + +A C object has the following methods: + +=over 4 + +=cut + + +########################################################################### + +=item $Response->Count() + +Returns the number of C objects available in this C. See +Yahoo::Search::Result for details on C objects. + +=cut + +sub Count +{ + my $Response = shift; #self; + return scalar @{$Response->{Result}}; +} + + + + +########################################################################### +sub _commaize($$) +{ + my $num = shift; + my $comma = shift; # "," (English), "." (European), undef..... + + if ($comma) { + $num =~ s/(?<=\d)(?=(?:\d\d\d)+$)/$comma/g; + } + return $num; +} +########################################################################### + +=item $Response->FirstOrdinal([ I ]) + +Returns the index of the first C object (e.g. the "30" of I). This is the same as the C arg of the +C that generated this C. + +If an optional argument is given and is true, it is used as a separator +every three digits. In the US, one would use + + $Response->FirstOrdinal(',') + +to return, say, "1,230" instead of the "1230" that + + $Response->FirstOrdinal() + +might return. + +=cut + +sub FirstOrdinal +{ + my $Response = shift; #self; + my $Comma = shift; # optional + + ## do the '-1' to convert from Y!'s 1-based system to our 0-based system + return _commaize(($Response->{firstResultPosition}||0) - 1, $Comma); +} + + + +########################################################################### + +=item $Response->CountAvail([ I ]) + +Returns an approximate number of total search results available, were you +to ask for them all (e.g. the "5329" of the I). + +If an optional argument is given and is true, it is used as a separator +every three digits. In the US, one would use + + $Response->CountAvail(',') + +to return, say, "5,329" instead of the "5329" that + + $Response->CountAvail() + +might return. + +=cut + +sub CountAvail +{ + my $Response = shift; #self; + my $Comma = shift; # optional + return _commaize($Response->{totalResultsAvailable} || 0, $Comma) +} + + + +########################################################################### + +=item $Response->Links() + +Returns a list of links from the response (one link per result): + + use Yahoo::Search; + if (my $Response = Yahoo::Search->Query(Doc => 'Britney')) + { + for my $link ($Response->Links) { + print "
$link\n"; + } + } + +This prints one + +
title of the link + +line per result returned from the query. + +(I and B search results>) + +=cut + +sub Links +{ + my $Response = shift; #self; + return map { $_->Link } $Response->Results; +} + + + + +########################################################################### + +=item $Response->Terms() + +(I and B search results>) + +Returns a list of text terms. + +=cut + +sub Terms +{ + my $Response = shift; #self; + return map { $_->Terms } $Response->Results; +} + + + + +########################################################################### + +=item $Response->Results() + +Returns a list of Yahoo::Search::Result C objects representing +all the results held in this C. For example: + + use Yahoo::Search; + if (my $Response = Yahoo::Search->Query(Doc => 'Britney')) + { + for my $Result ($Response->Results) { + printf "%d: %s\n", $Result->I, $Result->Url; + } + } + +This is not valid for I and I searches. + +=cut + +sub Results +{ + my $Response = shift; #self; + return @{$Response->{Result}}; +} + + + + +########################################################################### + +=item $Response->NextResult(options) + +Returns a C object, or nothing. (On error, returns nothing and sets +C<$@>.) + +The first time C is called for a given C object, it +returns the C object for the first result in the set. Returns +subsequent C objects for subsequent calls, until there are none +left, at which point what is returned depends upon whether the +auto-continuation feature is turned on (more on that in a moment). + +The following produces the same results as the C example above: + + use Yahoo::Search; + if (my $Response = Yahoo::Search->Query(Doc => 'Britney')) { + while (my $Result = $Response->NextResult) { + printf "%d: %s\n", $Result->I, $Result->Url; + } + } + +B + +If auto-continuation is turned on, then upon reaching the end of the result +set, C automatically fetches the next set of results and +returns I first result. + +This can be convenient, but B, as it means that a +loop which calls C, unless otherwise exited, will fetch results +from Yahoo! until there are no more results for the query, or until you +have exhausted your access limits. + +Auto-continuation can be turned on in several ways: + +=over 3 + +=item * + +On a per C basis by calling as + + $Response->NextResult(AutoContinue => 1) + +as with this example + + use Yahoo::Search; + ## + ## WARNING: DANGEROUS DANGEROUS DANGEROUS + ## + if (my $Response = Yahoo::Search->Query(Doc => 'Britney')) { + while (my $Result = $Response->NextResult(AutoContinue => 1)) { + printf "%d: %s\n", $Result->I, $Result->Url; + } + } + + +=item * + +By using + + AutoContinue => 1 + +when creating the request (e.g. in a Yahoo::Search->Query call), as +with this example: + + use Yahoo::Search; + ## + ## WARNING: DANGEROUS DANGEROUS DANGEROUS + ## + if (my $Response = Yahoo::Search->Query(Doc => 'Britney', + AutoContinue => 1)) + { + while (my $Result = $Response->NextResult) { + printf "%d: %s\n", $Result->I, $Result->Url; + } + } + +=item * + +By creating a query via a search-engine object created with + + AutoContinue => 1 + +as with this example: + + use Yahoo::Search; + ## + ## WARNING: DANGEROUS DANGEROUS DANGEROUS + ## + my $SearchEngine = Yahoo::Search->new(AutoContinue => 1); + + if (my $Response = $SearchEngine->Query(Doc => 'Britney')) { + while (my $Result = $Response->NextResult) { + printf "%d: %s\n", $Result->I, $Result->Url; + } + } + + +=item * + +By creating a query when Yahoo::Search had been loaded via: + + use Yahoo::Search AutoContinue => 1; + +as with this example: + + use Yahoo::Search AutoContinue => 1; + ## + ## WARNING: DANGEROUS DANGEROUS DANGEROUS + ## + if (my $Response = Yahoo::Search->Query(Doc => 'Britney')) { + while (my $Result = $Response->NextResult) { + printf "%d: %s\n", $Result->I, $Result->Url; + } + } + + +=back + + +All these examples are dangerous because they loop through results, +fetching more and more, until either all results that Yahoo! has for the +query at hand have been fetched, or the Yahoo! Search server access limits +have been reached and further access is denied. So, be sure to rate-limit +the accesses, or explicitly break out of the loop at some appropriate +point. + +=cut + +sub NextResult +{ + my $Response = shift; #self; + if (@_ % 2 != 0) { + return Yahoo::Search::_carp_on_error("wrong number of args to NextResult"); + } + my $AutoContinue = $Response->{_Request}->{AutoContinue}; + + ## isolate args we allow... + my %Args = @_; + if (exists $Args{AutoContinue}) { + $AutoContinue = delete $Args{AutoContinue}; + } + + ## anything left over is unexpected + if (%Args) { + my $list = join ', ', keys %Args; + return Yahoo::Search::_carp_on_error("unexpected args to NextResult: $list"); + } + + ## + ## Setup is done -- now the real thing. + ## If the next slot is filled, return the result sitting there. + ## + if ($Response->{_NextIterator} < @{$Response->{Result}}) + { + return $Response->{Result}->[$Response->{_NextIterator}++]; + } + + ## + ## If we're auto-continuing and there is another response... + ## + if ($AutoContinue and my $next = $Response->NextResponse) + { + ## replace this $Response with the new one, _in_place_ + ## (this destroys the old one) + %$Response = %$next; + + ## and return the first result from it... + return $Response->NextResult; + } + + ## + ## Oh well, reset the iterator and return nothing. + ## + $Response->{_NextIterator} = 0; + return (); +} + + +########################################################################### + +=item $Response->Reset() + +Rests the iterator so that the next C returns the first of the +C object's C objects. + +=cut ' + +sub Reset +{ + my $Response = shift; #self; + $Response->{_NextIterator} = 0; +} + + + +########################################################################### + +=item $Response->Request() + +Returns the C object from which this C object was +derived. + +=cut + +sub Request +{ + my $Response = shift; #self; + return $Response->{_Request}; +} + + +########################################################################### + +=item $Response->NextRequest() + +Returns a C object which will fetch the subsequent set of results +(e.g. if the current C object represents the first 10 query +results, C returns a C object that represents a +query for the I 10 results.) + +Returns nothing if there were no results in the current C object +(thereby eliminating the possibility of there being a I result set). +On error, sets C<$@> and returns nothing. + +=cut + +sub NextRequest +{ + my $Response = shift; #self + + if (not $Response->Count) { + ## No results last time, so can't expect any next time + return (); + } + + if ($Response->FirstOrdinal + $Response->Count >= $Response->CountAvail) + { + ## we have them all, so no reason to get more + return (); + } + + if ($Response->{_NoFurtherRequests}) { + ## no reason to get more + return (); + } + + + ## Make a copy of the request + my %Request = %{$Response->{_Request}}; + ## want that copy to be deep + $Request{Params} = { %{$Request{Params}} }; + + ## update the 'start' param + $Request{Params}->{start} += $Response->Count; + + return Yahoo::Search::Request->new(%Request); +} + + + +########################################################################### + +=item $Response->NextResponse() + +Like C, but goes ahead and calls the C object's +C method to return the C object for the next set of results. + +=cut ' + +sub NextResponse +{ + my $Response = shift; #self + + if (my $Request = $Response->NextRequest) { + return $Request->Fetch(); + } else { + # $@ must already be set + return (); + } +} + +########################################################################### + +=item $Response->Uri() + +Returns the C object that was fetched to create this response. +It is the same as: + + $Response->Request->Uri() + +=cut + +sub Uri +{ + my $Response = shift; #self; + return $Response->{_Request}->Uri; +} + + + + +########################################################################### + +=item $Response->Url() + +Returns the url that was fetched to create this response. +It is the same as: + + $Response->Request->Url() + +=cut + +sub Url +{ + my $Response = shift; #self; + return $Response->Request->Url; +} + + + +########################################################################### + +=item $Response->RawXml() + +Returns a string holding the raw xml returned from the Yahoo! Search +servers. + +=cut + +sub RawXml +{ + my $Response = shift; #self; + return $Response->{_XML}; +} + +############################################################################## + +=item $Response->MapUrl() + +Valid only for a I search, returns a url to a map showing all +results. (This is the same as each C object's C method.) + +=cut + +sub MapUrl +{ + my $Response = shift; #self; + return $Response->{ResultSetMapUrl}; +} + + + + +############################################################################## + +=item $Response->RelatedRequest + +=item $Response->RelatedResponse + +Perform a I request for search terms related to the query phrase +of the current request, returning the new C or C object, +respectively. + +Both return nothing if the current request is already for a I +search. + +For example: + + print "Did you mean ", join(" or ", $Response->RelatedResponse->Terms()), "?"; + +=cut + +sub RelatedRequest +{ + my $Response = shift; + return $Response->Request->RelatedRequest; +} + +sub RelatedResponse +{ + my $Response = shift; + return $Response->Request->RelatedResponse; +} + + +############################################################################## + +=item $Response->SpellRequest + +=item $Response->SpellResponse + +Perform a I request for a search term that may reflect proper +spelling of the query phrase of the current request, returning the new +C or C object, respectively. + +Both return nothing if the current request is already for a I +search. + +=cut + + +sub SpellRequest +{ + my $Response = shift; + return $Response->Request->SpellRequest; +} + +sub SpellResponse +{ + my $Response = shift; + return $Response->Request->SpellResponse; +} + + + +############################################################################## + + + +=pod + +=back + +=head1 Copyright + +Copyright (C) Yahoo! Inc + +=head1 Author + +Copyright (C) 2005 Yahoo! Inc. + +$Id: Response.pm 3 2005-01-28 04:29:54Z jfriedl $ + +=cut + + +1; diff --git a/Yahoo/Search/Result.pm b/Yahoo/Search/Result.pm new file mode 100644 index 0000000..3cd11a2 --- /dev/null +++ b/Yahoo/Search/Result.pm @@ -0,0 +1,1538 @@ +package Yahoo::Search::Result; +use strict; + +=head1 NAME + +Yahoo::Search::Result -- class representing a single result (single web +page, image, video file, etc) from a Yahoo! search-engine query. + +=head1 Package Use + +You never need to C this package directly -- it is loaded +automatically by Yahoo::Search. + +=head1 Object Creation + +C objects are created automatically when a C object is +created (when a C object's C method is called, either +directly, or indirectly via a shortcut such as +CQuery()>. + +=head1 Methods Overview + +This table shows the methods available on a per-search-space basis: + + Doc Image Video News Local Spell Related + ----- ----- ----- ----- ----- ----- ------- + Next [X] [X] [X] [X] [X] [X] [X] + Prev [X] [X] [X] [X] [X] [X] [X] + Response [X] [X] [X] [X] [X] [X] [X] + Request [X] [X] [X] [X] [X] [X] [X] + SearchSpace [X] [X] [X] [X] [X] [X] [X] + + * I [X] [X] [X] [X] [X] [X] [X] + * i [X] [X] [X] [X] [X] [X] [X] + as_html [X] [X] [X] [X] [X] [X] [X] + as_string [X] [X] [X] [X] [X] [X] [X] + Data [X] [X] [X] [X] [X] [X] [X] + + * Url [X] [X] [X] [X] [X] . . + * ClickUrl [X] [X] [X] [X] [X] . . + * Title [X] [X] [X] [X] [X] . . + TitleAsHtml [X] [X] [X] [X] [X] . . + Link [X] [X] [X] [X] [X] . . + * Summary [X] [X] [X] [X] . . . + SummaryAsHtml [X] [X] [X] [X] . . . + + * CacheUrl [X] . . . . . . + * CacheSize [X] . . . . . . + * ModTimestamp [X] . . [X] . . . + + * Width . [X] [X] . . . . + * Height . [X] [X] . . . . + + * ThumbUrl . [X] [X] [X] . . . + * ThumbWidth . [X] [X] [X] . . . + * ThumbHeight . [X] [X] [X] . . . + ThumbImg . [X] [X] [X] . . . + ThumbLink . [X] [X] [X] . . . + + * HostUrl . [X] [X] . . . . + * Copyright . [X] [X] . . . . + * Publisher . [X] [X] . . . . + * Restrictions . [X] [X] . . . . + + * Type [X] [X] [X] . . . . + * Bytes . [X] [X] . . . . + * Channels . . [X] . . . . + * Seconds . . [X] . . . . + * Duration . . [X] . . . . + * Streaming . . [X] . . . . + + * SourceName . . . [X] . . . + SourceNameAsHtml . . . [X] . . . + * SourceUrl . . . [X] . . . + * Language . . . [X] . . . + * PublishTime . . . [X] . . . + * PublishWhen . . . [X] . . . + + * Address . . . . [X] . . + * City . . . . [X] . . + * State . . . . [X] . . + * Phone . . . . [X] . . + * Miles . . . . [X] . . + * Kilometers . . . . [X] . . + * Rating . . . . [X] . . + * MapUrl . . . . [X] . . + * BusinessUrl . . . . [X] . . + * BusinessClickUrl . . . . [X] . . + * AllMapUrl . . . . [X] . . + + * Term . . . . . [X] [X] + TermAsHtml . . . . . [X] [X] + +Those items marked with a '*' are also available via the C method + +=cut ' + + + +my @DOW = qw[x Sun Mon Tue Wed Thu Fri Sat]; +my @MON = qw[x Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec]; + +## helper function -- returns the given text cooked for html +sub _cook_for_html($) +{ + my $text = shift; + + #die join(',', caller) if not defined $text; + + $text =~ s/&/&/g; + $text =~ s//>/g; + return $text; +} + +## +## helper function -- given a key in a result object, a result object (the +## "self" from a method), and an indication of whether we want text or +## html, return the appropriate text or html. +## +sub _text_or_html($@) +{ + my $Key = shift; + my $Result = shift; + my $AsHtml = shift; #optional + + my $Text = $Result->{$Key}; + + if (not defined $Text) { + return (); + } elsif ($AsHtml) { + return _cook_for_html($Text); + } else { + return $Text; + } +} + + +## +## helper function -- if passed one arg, it's a url, and simply return it. +## +## If passed multiple args, the 2nd is an attribute (e.g. "href", "src"), +## which causes the return of a string like +## href="$url" +## where we're sure the quoting of the url is safe. +## +sub _url($@) +{ + my $Url = shift; + my $Attrib = shift; + + if (not $Url) + { + return (); + } + elsif (not $Attrib) + { + return $Url; + } + elsif (not $Url =~ m/\x22/) { + return qq/$Attrib="$Url"/; + } elsif (not $Url =~ m/\x27/) { + return qq/$Attrib='$Url'/; + } else { + $Url =~ s/\x22/%22/g; # double qoute + $Url =~ s/\x27/%27/g; # single quote + return qq/$Attrib="$Url"/; + } +} + + +## +## Want to be able to dump a hash of most data, so note which items are +## available and interesting on a per-search-space basis. +## +my @CommonItems = qw[Url ClickUrl Summary Title i I]; + +my %ItemsBySpace = +( + Video => [@CommonItems, qw"Type Bytes HostUrl Copyright Publisher Restrictions Channels Seconds Duration Streaming Width Height ThumbUrl ThumbWidth ThumbHeight"], + Image => [@CommonItems, qw"Type Bytes HostUrl Copyright Publisher Restrictions Width Height ThumbUrl ThumbWidth ThumbHeight"], + Doc => [@CommonItems, qw"Type CacheUrl CacheSize ModTimestamp"], + Local => [@CommonItems, qw"Address City State Phone Miles Kilometers Rating MapUrl AllMapUrl"], + News => [@CommonItems, qw"SourceName SourceUrl Language ModTimestamp PublishTime ThumbUrl ThumbWidth ThumbHeight"], + Spell => [@CommonItems, "Term"], + Related => [@CommonItems, "Term"], +); + + + + +=head1 METHODS + +=over 4 + +=cut + +############################################################################## + +=item $Result->Next([I]) + +Returns the next C object from among the list of result objects +that are part of one C object. + +Returns nothing when called on the last result in a response, unless +auto-continuation is turned on, in which case the next set is automatically +fetched and the first C from that set's C is returned. + +An optional defined boolean argument turns auto-continuation on (true) or +off (false). If the argument is not defined, or not provided, the value for +the original request's C option (default off) is used. + +Note that using auto-continuation can be dangerous. See the docs for +C in Yahoo::Search::Response. + +=cut + +sub Next +{ + my $Result = shift; # self + my $AutoContinue = shift; + + if ($Result->{_ResponseOrdinal} < $#{ $Result->{_Response}->{Result} }) + { + return $Result->{_Response}->{Result}->[$Result->{_ResponseOrdinal} + 1]; + } + else + { + if (not defined $AutoContinue) { + $AutoContinue = $Result->{_Response}->{_Request}->{AutoContinue}; + } + + if ($AutoContinue + and + my $NextResponse = $Result->{_Response}->NextSet) + { + return $NextResponse->NextResult(); + } + else + { + return () + } + } +} + + + + +############################################################################## + +=item $Result->Prev + +The opposite of C. No auto-continuation feature. + +=cut + +## does not auto-fetch when fetching result[-1] +sub Prev +{ + my $Result = shift; # self + + if ($Result->{_ResponseOrdinal} == 0) { + return (); + } else { + return $Result->{_Response}->{Result}->[$Result->{_ResponseOrdinal} - 1]; + } +} + + + +############################################################################## + +=item $Result->Response + +Returns the C object of which this C object is a part. + +=cut + +sub Response +{ + my $Result = shift; # self + return $Result->{_Response}; +} + + + +############################################################################## + +=item $Result->Request + +Returns the original C object from which this C object's +C was derived. + +=cut ' + +sub Request +{ + my $Result = shift; # self + return $Result->{_Response}->{_Request}; +} + + + + +############################################################################## + +=item $Result->SearchSpace + +Returns a string which indicates the search space of the original query +that this result was part of. (That is, it returns C, C, +C, C, or C


", map { $_->as_html } Yahoo::Search->Results(@_); + } + +(Also see CHtmlResults>) + +The HTML returned by C contains class references, thereby allowing +the look-and-feel to be easily adjusted. Here's a style sheet that makes +Image search results look palatable. + + + +B: English-centric + +=cut ' + +sub as_html +{ + my $Result = shift; # self + my $SearchSpace = $Result->SearchSpace; + my $summary = $Result->Summary(1); + + if ($SearchSpace eq 'Doc') + { + my $link = $Result->Link; + my $url = $Result->Url; + + my $html = "$link
$url
"; + if ($summary) { + $html .= "
$summary
"; + } + return "$html"; + } + + if ($SearchSpace eq 'Video') + { + my $HREF = $Result->ClickUrl('HREF'); + my $title = $Result->Title(1); + my $html; + if (my $img = $Result->ThumbImg) { + $html = "$img $title"; + } else { + $html = "$title"; + } + + $html .= "
" . $Result->Url . "
"; + + my @extra; + if (my $duration = $Result->Duration) { + push @extra, "Duration: $duration"; + } + + if (my $width = $Result->Width and my $height = $Result->Height) { + push @extra, "Video resolution $width x $height"; + } + + if (my $size = $Result->Bytes) { + push @extra, "File size: $size"; + } + + if (my $chan = $Result->Channels) { + push @extra, "$chan-channel audio"; + } + + if (my $HREF = $Result->HostUrl('href')) { + push @extra, "Source page"; + } + if (@extra) { + $html .= "
" . join(" | ", @extra) . "
"; + } + + if ($summary) { + $html .= "
$summary
"; + } + return "$html"; + } + + if ($SearchSpace eq 'Image') + { + my $HREF = $Result->ClickUrl('href'); + my $title = $Result->Title(1); + my $html; + if (my $img = $Result->ThumbImg) { + $html = "$img $title"; + } else { + $html = "$title"; + } + + $html .= "
" . $Result->Url . "
"; + + my @extra; + if (my $size = $Result->Bytes) { + push @extra, "File size: $size"; + } + if (my $width = $Result->Width and my $height = $Result->Height) { + push @extra, "Image size: $width x $height"; + } + if (my $HREF = $Result->HostUrl('HREF')) { + push @extra, "Source page"; + } + if (@extra) { + $html .= "
" . join(" | ", @extra) . "
"; + } + + if ($summary) { + $html .= "
$summary
"; + } + return "$html"; + } + + if ($SearchSpace eq "News") + { + my $HREF = $Result->ClickUrl('HREF'); + my $title = $Result->Title(1); + my $html = ""; + if (my $img = $Result->ThumbImg) { + $html .= "$img $title"; + } else { + $html .= "$title"; + } + my $src_name = $Result->SourceNameAsHtml; + my $src_href = $Result->SourceUrl('HREF'); + if ($src_name and $src_href) { + $html .= "
" . _cook_for_html($src_name) . "
"; + } + if (my $when = $Result->PublishWhen) { + $html .= " ($when)"; + } + + if ($summary) { + $html .= "
$summary
"; + } + return "$html"; + } + + if ($SearchSpace eq "Local") + { + my $html = $Result->Link; + + if (my $addr = join(', ', grep { $_ } $Result->Address, $Result->City . " " . $Result->State)) { + $html .= "
$addr
"; + } + + my @extra; + if (my $phone = $Result->Phone) { + push @extra, "$phone"; + } + if (my $HREF = $Result->MapUrl('href')) { + push @extra, "Map"; + } + if (@extra) { + $html .= "
" . join(" | ",@extra) . "
"; + } + + if ($summary) { + $html .= "
$summary
"; + } + return "$html"; + } + + if ($SearchSpace eq "Spell") + { + my $item = $Result->TermAsHtml; + return "Did you mean $item?"; + } + + if ($SearchSpace eq "Related") + { + my $item = $Result->TermAsHtml; + return "Also try: $item"; + } + + return "???"; +} + + +############################################################################## + +=item $Result->as_string + +Returns a textual representation of the C, which may be useful for +quick prototyping or debugging. + +=cut + + +## must create, for all spaces +sub as_string +{ + my $Result = shift; # self + my $ref = $Result->Data; + + my $txt = ""; + + for my $item (@{$ItemsBySpace{$Result->SearchSpace}}) + { + if (defined(my $val = $Result->$item)) { + $txt .= "$item: $val\n"; + } + } + return $txt; +} + +############################################################################## + +=item $Result->Data + +Returns a list of key/value pairs containing the fundamental data for the +result (those items marked with '*' in the table at the start of this +document). + + my %Data = $Result->Data; + +=cut + + +sub Data +{ + my $Result = shift; # self + my %Data; + + for my $item (@{$ItemsBySpace{$Result->SearchSpace}}) + { + $Data{$item} = $Result->$item; + } + return %Data; +} + + + +############################################################################## + +=item $Result->Url + +=item $Result->ClickUrl + +C returns the raw url of the item (web page, image, etc.), appropriate +for display to the user. + +C returns a url appropriate for the href attribute of a link. + +In some cases, the two return the same url. + +As with all Result-object methods which return a url of some sort, you can +provide a single argument such as C and receive a string such as + href="..." +appropriate to be used directly in html. For example, + + my $HREF = $Result->ClickUrl('href'); + print "click"; + +is preferable to + + my $url = $Result->ClickUrl; + print "click"; + +since the latter would break if C<$url> contains a singlequote. + +=cut + +sub Url +{ + my $Result = shift; # self + return _url($Result->{Url} || $Result->{ClickUrl}, @_); +} + +sub ClickUrl +{ + my $Result = shift; # self + return _url($Result->{ClickUrl} || $Result->{Url}, @_); +} + + + + + +############################################################################## + +=item $Result->Title([ I ]) + +=item $Result->TitleAsHtml + +C returns the raw title text associated with the result. If an +optional argument is provided and is true, the title text is returned as +html. + +C<TitleAsHtml> is the same as + + $Result->Title(1) + +=cut + +sub Title +{ + return _text_or_html(Title => @_); +} + +sub TitleAsHtml +{ + my $Result = shift; #self + return $Result->Title(1); +} + + + + +############################################################################## + +=item $Result->Link + +Returns a link made from the C<ClickUrl> and the C<Title>, with class +"yLink", e.g. + + <a class=yLink href='$URL'>$TITLE</a> + +=cut + +sub Link +{ + my $Result = shift; # self + + if (my $HREF = $Result->ClickUrl('href') + and + my $title = $Result->Title(1)) + { + return "<a class=yLink $HREF>$title</a>"; + } + else + { + return (); + } +} + + + +############################################################################## + +=item $Result->Summary([ I<as_html> ]) + +=item $Result->SummaryAsHtml + +Like C<Title> and C<TitleAsHtml>, but for the summary associated with the +result. + +=cut + +sub Summary +{ + return _text_or_html(Summary => @_); +} + +sub SummaryAsHtml +{ + my $Result = shift; #self + return $Result->Summary(1); +} + + +=item $Result->CacheUrl + +=item $Result->CacheSize + +(I<Appropriate for B<Doc> search results>) + +C<CacheUrl> returns the url of the document in the Yahoo! cache. +See the documentation for the C<Url> method for information on the +one-argument version of this method. + +C<CacheSize> returns the size (as a string like "22k"). + +=cut + +sub CacheUrl +{ + my $Result = shift; # self + return _url($Result->{Cache} ? $Result->{Cache}->{Url} : (), @_) +} + +sub CacheSize +{ + my $Result = shift; # self + return $Result->{Cache} ? $Result->{Cache}->{Size} : (); +} + + + +############################################################################## + +=item $Result->ModTimestamp + +(I<Appropriate for B<Doc> and B<News> search results>) + +The Unix timestamp of the Last-Modified time associated with the the url +when it was last checked by Yahoo!'s backend crawlers. + +=cut + +sub ModTimestamp +{ + my $Result = shift; # self + return defined($Result->{ModificationDate}) ? $Result->{ModificationDate}: (); +} + + +############################################################################## + +=item $Result->Width + +=item $Result->Height + +(I<Appropriate for B<Image> and B<Video> search results>) + +The width and height (in pixels) of the image or video. + +=cut + +## for image, video +sub Width +{ + my $Result = shift; # self + return defined($Result->{Width}) ? $Result->{Width} : (); +} + +sub Height +{ + my $Result = shift; # self + return defined($Result->{Height}) ? $Result->{Height} : (); +} + + + +############################################################################## + +=item $Result->ThumbUrl + +=item $Result->ThumbWidth + +=item $Result->ThumbHeight + +(I<Appropriate for B<Image>, B<Video>, and B<News> search results>) + +The url of a thumbnail image, and its width and height. + +(Note: few I<News> results have a thumbnail, but some do.) + +See the documentation for the C<Url> method for information on the +one-argument version of C<ThumbUrl>. + +=cut + +sub ThumbUrl +{ + my $Result = shift; # self + return _url($Result->{Thumbnail} ? $Result->{Thumbnail}->{Url} : (), @_); +} + +sub ThumbWidth +{ + my $Result = shift; # self + return $Result->{Thumbnail} ? $Result->{Thumbnail}->{Width} : (); +} + +sub ThumbHeight +{ + my $Result = shift; # self + return $Result->{Thumbnail} ? $Result->{Thumbnail}->{Height} : (); +} + + +############################################################################## + +=item $Result->ThumbImg + +(I<Appropriate for B<Image>, B<Video>, and B<News> search results>) + +Returns a C<E<lt>imgE<gt>> tag representing the thumbnail image, e.g. + + <img class=yImg src='$IMGURL' width=$WIDTH height=$HEIGHT> + +=cut + + +sub ThumbImg +{ + my $Result = shift; # self + + my $SRC = $Result->ThumbUrl('src'); + my $Width = $Result->ThumbWidth; + my $Height = $Result->ThumbHeight; + + if ($SRC) { + return "<img class=yImg $SRC width=$Width height=$Height>"; + } else { + return (); + } +} + + +############################################################################## + +=item $Result->ThumbLink + +(I<Appropriate for B<Image>, B<Video>, and B<News> search results>) + +Returns a link from the thumbnail to the C<ClickUrl> of the result, +e.g. + + <a class=yLink href='$CLICKURL'> + <img class=yImg src='$IMGURL' width=$WIDTH height=$HEIGHT> + </a> + +=cut + + +sub ThumbLink +{ + my $Result = shift; # self + my $HREF = $Result->ClickUrl('href'); + my $img = $Result->ThumbImg; + if ($HREF and $img) { + return "<a class=yLink $HREF>$img</a>"; + } else { + return (); + } +} + + + +############################################################################## + +=item $Result->HostUrl + +(I<Appropriate for B<Image> and B<Video> search results>) + +Returns the url of the web page containing a link to the image/video +item that the C<Result> represents. + +See the documentation for the C<Url> method for information on the +one-argument version of this method. + +=cut + +sub HostUrl +{ + my $Result = shift; # self + return _url($Result->{RefererUrl}, @_); +} + +=cut + + + +########################################################################### + +=item $Result->Type + +(<Appropriate for B<Doc>, B<Image>, and B<Video> search results>) + +Returns a string representing the file type of the item to which +C<$Result-E<gt>Url> points. For C<Doc> searches, the MIME type (e.g. +"text/html") is returned. + +For other search spaces, here are the possible return values: + + Video: avi flash mpeg msmedia quicktime realmedia + Image: bmp gif jpg png. + +Yahoo! Search derives these Video/Image C<Type> value by actually +inspecting the file contents, and as such it is more reliable than looking +at the file extension. + +=cut + +sub Type +{ + my $Result = shift; #self + if (defined $Result->{MimeType}) { + return $Result->{MimeType}; + } elsif (defined $Result->{FileFormat}) { + return $Result->{FileFormat}; + } else { + return (); + } +} + + + +########################################################################### + +=item $Result->Copyright([ I<as_html> ]) + +(<Appropriate for B<Image> and B<Video> search results>) + +Returns any copyright notice associated with the result. If an optional +argument is provided and is true, the copyright text is returned as html. + +=cut + +sub Copyright +{ + return _text_or_html(Copyright => @_); +} + + + +########################################################################### + +=item $Result->Publisher([ I<as_html> ]) + +(<Appropriate for B<Image>, and B<Video> search results>) + +Returns any publisher information (as a string) associated with the result. +If an optional argument is provided and is true, the publisher information +is returned as html. + +=cut + +sub Publisher +{ + return _text_or_html(Publisher => @_); +} + + + +########################################################################### + +=item $Result->Restrictions + +(<Appropriate for B<Image>, and B<Video> search results>) + +A (possibly zero-length) string containing zero or more of the following +space-separated words: + + noframe + noinline + +See Yahoo!'s web site (http://developer.yahoo.net/) for information on them. + +=cut + +sub Restrictions +{ + my $Result = shift; #self + if (not defined $Result->{Restrictions}) { + return ""; + } else { + return $Result->{Restrictions}; + } +} + + + +############################################################################## + +=item $Result->Bytes + +(I<Appropriate for B<Image>, and B<Video> search results>) + +The size of the image/video item, in bytes. + +=cut + +sub Bytes +{ + my $Result = shift; #self + + if ($Result->{FileSize}) { + return $Result->{FileSize}; + } else { + return (); + } +} + + + + +############################################################################## + +=item $Result->Channels + +(I<Appropriate for B<Video> search results>) + +Returns the number of channels in the audio, if known. +Examples are "1", "2", "4.1", "5.1", etc.... + +=cut + +sub Channels +{ + my $Result = shift; # self + if ($Result->{Channels}) { + return $Result->{Channels}; + } else { + return (); + } +} + + + +############################################################################## + +=item $Result->Seconds + +(I<Appropriate for B<Video> search results>) + +Returns the duration of the video clip, if known, in (possibly fractional) +seconds. + +=cut + +sub Seconds +{ + my $Result = shift; #self + + if ($Result->{Duration}) { + return $Result->{Duration}; + } + return (); +} + + + +############################################################################## + +=item $Result->Duration + +(I<Appropriate for B<Video> search results>) + +Returns a string representing the duration of the video clip, if known, in +the form of "37 sec", "1:23", or "4:56:23", as appropriate. + +B<Bugs>: English-centric + +=cut + +sub Duration +{ + my $Result = shift; #self + + if (my $sec = $Result->Seconds) + { + if ($sec < 60) { + return sprintf "%d sec", $sec; + } + if ($sec < 3600) { + return sprintf "%d:%02d", int($sec/60), $sec%60; + } + my $hours = int($sec/3600); + $sec = $sec % 3600; + return sprintf "%d:%02d:%02d", $hours, int($sec/60), $sec%60; + } + + return (); +} + + + +############################################################################## + +=item $Result->Streaming + +(I<Appropriate for B<Video> search results>) + +Returns "1" if the multimedia is streaming, "0" if not. +If not known, an empty list is returned. + +=cut + +sub Streaming +{ + my $Result = shift; #self + + my $Stream = $Result->{Streaming} || ''; + if ($Stream eq 'true') { + return 1; + } elsif ($Stream eq 'false') { + return 0; + } else { + return (); + } +} + + + +############################################################################## + +=item $Result->SourceUrl + +(I<Appropriate for B<News> search results>) + +The main url of the news provider hosting the article that the C<Result> +refers to. + +See the documentation for the C<Url> method for information on the +one-argument version of this method. + +=cut + +sub SourceUrl +{ + my $Result = shift; # self + return _url($Result->{NewsSourceUrl}, @_); +} + + + + +############################################################################## + +=item $Result->SourceName([ I<as_html> ]) + +=item $Result->SourceNameAsHtml + +(I<Appropriate for B<News> search results>) + +Similar to C<Title> and C<TitleAsHtml>, but the name of the organization +associated with the news article (and, by extension, with C<SourceUrl>). + +=cut + +sub SourceName +{ + return _text_or_html(NewsSource => @_); +} + +sub SourceNameAsHtml +{ + my $Result = shift; # self + return $Result->SourceName(1); +} + + + +############################################################################## + +=item $Result->Language + +(I<Appropriate for B<News> search results>) + +A code representing the language in which the article is written (e.g. "en" +for English, "ja" for Japanese, etc.). See the list of language codes at +C<perldoc> Yahoo::Search. + +=cut + +sub Language +{ + my $Result = shift; # self + return $Result->{Language}; +} + + +############################################################################## + +=item $Result->PublishTime + +=item $Result->PublishWhen + +(I<Appropriate for B<News> search results>) + +C<PublishTime> is the Unix time associated with the article, e.g. + + print "Published ", scalar(localtime $Result->PublishTime), "\n"; + +C<PublishWhen> gives a string along the lines of + + 3h 25m ago (if less than 12 hours ago) + Tue 9:47am (if less than 5 days ago) + Sat, Dec 25 (if less than 100 days ago) + Sat, Dec 25, 2004 (if >= 100 days ago) + +B<Bug>: C<PublishWhen> is English-centric. + +=cut + +sub PublishTime +{ + my $Result = shift; # self + if (defined $Result->{PublishDate}) { + return $Result->{PublishDate}; + } else { + return (); + } +} + +sub PublishWhen +{ + my $Result = shift; #self + + my $time = $Result->PublishTime; + if (not $time) { + return (); + } + + my $delta = time - $time; + if ($delta < 3600 * 12) + { + my $h = int( $delta / 3600); + my $m = int(($delta % 3600)/60 + 0.5); + return "${h}h ${m}m ago"; + } + + if ($delta < 5 * 3600 * 24) + { + ## give day and time + my ($m,$h, $DOW) = (localtime $time)[1,2,6]; + my $ampm = "am"; + if ($h == 0) { + $h = 12; + } elsif ($h >= 12) { + $ampm = "pm"; + if ($h > 12) { + $h -= 12; + } + } + return sprintf("%s %d:%02d%s", $DOW[$DOW], $h, $m, $ampm); + } + + if ($delta < 100 * 3600 * 24) + { + my ($D,$M,$Y,$DOW) = (localtime $time)[3..6]; + return sprintf("%s %s %d", $DOW[$DOW], $MON[$M], $D); + } + else + { + my ($D,$M,$Y,$DOW) = (localtime $time)[3..6]; + return sprintf("%s %s %d, %04d", $DOW[$DOW], $MON[$M], $D, $Y+1900); + } +} + + +############################################################################## + +=item $Result->Address + +=item $Result->City + +=item $Result->State + +=item $Result->Phone + +(I<Appropriate for B<Local> search results>) + +Location and Phone number for the business that the C<Result> refers to. + +=cut + +## for local +sub Address +{ + my $Result = shift; # self + return $Result->{Address}; +} + +sub City +{ + my $Result = shift; # self + return $Result->{City}; +} + +sub State +{ + my $Result = shift; # self + return $Result->{State}; +} + +sub Phone +{ + my $Result = shift; # self + return $Result->{Phone}; +} + + + + + +############################################################################## + +=item $Result->Miles + +=item $Result->Kilometers + +(I<Appropriate for B<Local> search results>) + +The distance (in miles and kilometers) from the location used to make the +query to the location of this result. + +=cut + +sub Kilometers +{ + my $Result = shift; # self + return defined($Result->{Distance}) ? $Result->{Distance} * 1.609 : (); +} + +sub Miles +{ + my $Result = shift; # self + return defined($Result->{Distance}) ? $Result->{Distance} : (); +} + + + + + +############################################################################## + +=item $Result->Rating + +(I<Appropriate for B<Local> search results>) + +Returns the rating associated with the result, if there is one. If there is +a rating, it is from 1 (lowest) to 5 (highest) in 0.5-sized steps. + +=cut + +sub Rating +{ + my $Result = shift; # self + return defined($Result->{Rating}) ? $Result->{Rating} : (); +} + + + +############################################################################## + +=item $Result->MapUrl + +=item $Result->AllMapUrl + +(I<Appropriate for B<Local> search results>) + +C<MapUrl> is a url to a Yahoo! Maps map showing the business' location. + +C<AllMapUrl> is a url to a Yahoo! Maps map showing all the businesses +found in the same result-set that the current C<Result> was part of. + +See the documentation for the C<Url> method for information on the +one-argument versions of these methods. + +=cut + +sub MapUrl +{ + my $Result = shift; # self + return _url($Result->{MapUrl}, @_); +} + +sub AllMapUrl +{ + my $Result = shift; # self + return _url($Result->Response->MapUrl, @_); +} + + + +############################################################################## + +=item $Result->BusinessUrl + +=item $Result->BusinessClickUrl + +(I<Appropriate for B<Local> search results>) + +The business' home page, if available. C<BusinessUrl> is appropriate for +display, while C<BusinessClickUrl> is appropriate for the href of a link. + +See the documentation for the C<Url> method for information on the +one-argument versions of these methods. + +=cut + +sub BusinessUrl +{ + my $Result = shift; # self + return _url($Result->{BusinessUrl}, @_); +} + +sub BusinessClickUrl +{ + my $Result = shift; # self + return _url($Result->{BusinessClickUrl} || $Result->{BusinessUrl}, @_); +} + + + +############################################################################## + +=item $Result->Term([ I<as_html> ]) + +=item $Result->TermAsHtml + +(I<Appropriate for B<Spell> and B<Related> search results>) + +C<Term> returns the term associated with the result. If an optional +argument is provided and is true, the title text is returned as html. + +C<TermAsHtml> is the same as + + $Result->Term(1) + +=cut + +sub Term +{ + _text_or_html(Term => @_); +} + +sub TermAsHtml +{ + my $Result = shift; #self + return $Result->Term(1); +} + + +############################################################################## + +=pod + +=back + +=head1 Copyright + +Copyright (C) 2005 Yahoo! Inc. + +=head1 Author + +Jeffrey Friedl (jfriedl@yahoo.com) + +$Id: Result.pm 3 2005-01-28 04:29:54Z jfriedl $ + + +=cut + + +1; diff --git a/Yahoo/Search/XML.pm b/Yahoo/Search/XML.pm new file mode 100644 index 0000000..7cfa6ae --- /dev/null +++ b/Yahoo/Search/XML.pm @@ -0,0 +1,198 @@ +package Yahoo::Search::XML; +use strict; + +=head1 NAME + +Yahoo::Search::XML -- simple routines for parsing XML from Yahoo! Search. + +=head1 DESCRIPTION + +The XML sent back from Yahoo! is fairly simple, and is guaranteed to be +well formed, so we really don't need much more than to make the data easily +available. I'd like to use XML::Simple, but it uses XML::Parser, which +suffers from crippling memory leaks (in one test, 36k was lost with each +parsing of a 7k xml file), so I've rolled my own simple version that might +be called, uh, XML::SuperDuperSimple. + +The end result is identical to what XML::Simple would produce, at least for +the XML the Yahoo! sends back. It may well be useful for other things that +use a similarly small subset of XML notation. This does not support +comments or CDATA, for example, because Yahoo! doesn't send it back. + +This package is also much faster than XML::Simple / XML::Parser, producing +the same output 41 times faster, in my tests. That's the benefit of not +having to handle everything, I guess. + +=head1 AUTHOR + +Jeffrey Friedl <jfriedl@yahoo.com> +Kyoto, Japan +Feb 2005 + +=cut + +my @stack; + +## +## Process a start tag. +## +sub Start +{ + my ($tag, %attr) = @_; + + my $node = { + Tag => $tag, + Char => "", + }; + + if (%attr) { + $node->{Data} = \%attr; + } + + push @stack, $node; +} + +## +## Process raw text +## +sub Char +{ + my ($str) = @_; + $stack[-1]->{Char} .= $str; +} + + +## +## Process an end tag +## +sub End +{ + my ($tag) = @_; + my $node = pop @stack; + + my $val; + + if ($node->{Data}) + { + die "oops" if $node->{Char} ne ""; + $val = $node->{Data}; + } + elsif ($node->{Char} ne "") + { + die "oops" if $node->{Data}; + $val = $node->{Char}; + } + else + { + $val = ""; + } + + ## + ## Shove this data ($val) into the previous node, named for this $tag + ## + if (not $stack[-1]->{Data}->{$node->{Tag}}) { + $stack[-1]->{Data}->{$node->{Tag}} = $val; + } elsif (ref($stack[-1]->{Data}->{$node->{Tag}}) eq "ARRAY") { + push @{ $stack[-1]->{Data}->{$node->{Tag}} }, $val; + } else { + $stack[-1]->{Data}->{$node->{Tag}} = [ $stack[-1]->{Data}->{$node->{Tag}}, $val ]; + } +} + +my %EntityDecode = +( + amp => '&', + lt => '<', + gt => '>', + apos => "'", + quot => '"', #" +); + +sub _entity($) +{ + my $name = shift; + if (my $val = $EntityDecode{$name}) { + return $val; + } elsif ($val =~ m/^\d+$/) { + return chr($val); + } else { + die "unknown entity &$name;"; + } +} + +sub de_grok($) +{ + my $text = shift; + $text =~ s/&([^;]+);/_entity($1)/gxe; + return $text; +} + +sub Parse($) +{ + my $xml = shift; + + @stack = {}; + + ## get rid of leading <?xml> tag + $xml =~ m/\A <\?xml.*?> /xgc; + + while (pos($xml) < length($xml)) + { + ## + ## Nab <open>, </close>, and <unary/> tags... + ## + if ($xml =~ m{\G + <(/?) # $1 - true if an ending tag + (\w+) # $2 - tag name + (\s[^>]*)? # $3 - attributes (and possible final '/') + >}xgc) + { + my ($IsEnd, $TagName, $Attribs) = ($1, $2, $3); + + my $IsImmediateEnd = 1 if ($Attribs and $Attribs =~ s{/$}{}); + + if ($IsEnd) { + End($TagName); + } else { + my %A; + if ($Attribs) + { + while ($Attribs =~ m/(\w+)=(?: "([^\"]*)" | '([^\']*)' )/xg) { + $A{$1} = de_grok(defined($3) ? $3 : $2); + } + } + Start($TagName, %A); + if ($IsImmediateEnd) { + End($TagName); + } + } + } + elsif ($xml =~ m/\G<!--.*?-->/xgc) + { + ## comment -- ignore + } + ## + ## Nab raw text / entities + ## + elsif ($xml =~ m/\G ([^<>]+)/xgc) + { + Char(de_grok($1)); + } + else + { + my ($str) = $xml =~ m/\G(.{1,40})/; + $str .= "..." if length($str) == 40; + die "bad XML parse at \"$str\""; + } + } + + #use Data::Dumper; print Data::Dumper::Dumper(\@stack), "\n"; + die "oops" if @stack != 1; + die "oops" if not $stack[0]->{Data}; + die "oops" if keys(%{ $stack[0]->{Data}} ) != 1; + my ($tree) = values(%{$stack[0]->{Data}}); + return $tree; +} + +1; + diff --git a/test.pl b/test.pl new file mode 100755 index 0000000..6d45fab --- /dev/null +++ b/test.pl @@ -0,0 +1,15 @@ +#!/usr/local/bin/perl -w +use strict; +## By virtue of being named "test.pl", this program is automatically run +## via "make test". + +use Yahoo::Search AppId => "Perl API install test", + Count => 1; + +my @Results = Yahoo::Search->Results(Doc => 'Larry Wall'); +if (@Results == 1 and $Results[0]->Url =~ m{^https?://}) { + print "Yahoo::Search test passes\n"; +} +else { + die "Yahoo::Search test failed: $@\n"; +}