Skip to content

Commit

Permalink
add more module
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaonan song committed May 27, 2013
1 parent db93e3a commit 3d2669f
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 1 deletion.
4 changes: 3 additions & 1 deletion lib/Crawler/Store/101.pm
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ sub parse {
$h{title} = $1;
}

if ( $content =~ m{<span class="yen">&yen;</span>(.+?)</b>} ) {
if ( $content =~ m{id="promo_price">&yen;(.+?)<} ) {
$h{price} = $1;
} elsif ( $content =~ m{<span class="yen">&yen;</span>(.+?)</b>} ) {
$h{price} = $1;
}

Expand Down
68 changes: 68 additions & 0 deletions lib/Crawler/Store/120.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package Crawler::Store::120;
use base qw(Crawler::Store);
use HTML::TreeBuilder;
use Debug;
use Time::HiRes qw(time);

sub new {
my $class = shift;
my $self = $class->SUPER::new();
$self->_init();
return $self;
}

sub _init {
my $self = shift;
($self->{site_id}) = __PACKAGE__ =~ m{Store::(\d+)};
}

sub parse {
my $self = shift;
my $url = shift;
my $content = shift;
return unless $content;

if ( $url =~ m{keyword=} ) {
# need to find url
my $time = time;
my $tree = HTML::TreeBuilder->new_from_content($content);
if ( my $div = $tree->look_down(class => 'store_list') ) {
if ( my $sku = $div->attr("productSaleId") ) {
$self->add_url({site_id => $self->{site_id},url => "http://www.winxuan.com/product/$sku"});
}
}
$tree->delete;
debug("find url cost: " . (time - $time));
} elsif ( $url =~ m{/product/(.+)} ) {
# need to find item info
my $time = time;
my %h;

$h{sku} = $1;
$h{url} = $url;

my $sku_tree = HTML::TreeBuilder->new_from_content($content);

if ( my $h1 = $sku_tree->look_down(_tag => 'h1') ) {
$h{title} = $h1->as_trimmed_text;
}

if ( my $ul = $sku_tree->look_down(_tag => 'ul', class => 'price_info') ) {
$h{price} = $ul->look_down(_tag => 'b', class => 'fb')->as_trimmed_text;
}
$sku_tree->delete;

$h{site_id} = $self->{site_id};
$h{id} = $h{sku} . "-" . $h{site_id};

$h{price} =~ s{[^\d,.]}{}g;

$self->add_item(\%h);
debug("parse item cost: " . (time - $time));
}
}

1;



68 changes: 68 additions & 0 deletions lib/Crawler/Store/121.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package Crawler::Store::121;
use base qw(Crawler::Store);
use HTML::TreeBuilder;
use Debug;
use Time::HiRes qw(time);

sub new {
my $class = shift;
my $self = $class->SUPER::new();
$self->_init();
return $self;
}

sub _init {
my $self = shift;
($self->{site_id}) = __PACKAGE__ =~ m{Store::(\d+)};
}

sub parse {
my $self = shift;
my $url = shift;
my $content = shift;
return unless $content;

if ( $url =~ m{k_.+?\.html} ) {
# need to find url
my $time = time;
my $tree = HTML::TreeBuilder->new_from_content($content);
if ( my $div = $tree->look_down(class => 'books-list') ) {
if ( my $url = $div->look_down(_tag => 'a')->attr("href") ) {
$self->add_url({site_id => $self->{site_id},url => $url});
}
}
$tree->delete;
debug("find url cost: " . (time - $time));
} elsif ( $url =~ m{/(\d+)\.html} ) {
# need to find item info
my $time = time;
my %h;

$h{sku} = $1;
$h{url} = $url;

my $sku_tree = HTML::TreeBuilder->new_from_content($content);

if ( my $h1 = $sku_tree->look_down(_tag => 'h1', class => 'detail-title') ) {
$h{title} = $h1->as_trimmed_text;
}

if ( my $ul = $sku_tree->look_down('id', 'money_xsj') ) {
$h{price} = $ul->as_trimmed_text;
}
$sku_tree->delete;

$h{site_id} = $self->{site_id};
$h{id} = $h{sku} . "-" . $h{site_id};

$h{price} =~ s{[^\d,.]}{}g;

$self->add_item(\%h);
debug("parse item cost: " . (time - $time));
}
}

1;



71 changes: 71 additions & 0 deletions lib/Crawler/Store/122.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package Crawler::Store::122;
use base qw(Crawler::Store);
use HTML::TreeBuilder;
use Debug;
use Time::HiRes qw(time);

sub new {
my $class = shift;
my $self = $class->SUPER::new();
$self->_init();
return $self;
}

sub _init {
my $self = shift;
($self->{site_id}) = __PACKAGE__ =~ m{Store::(\d+)};
}

sub parse {
my $self = shift;
my $url = shift;
my $content = shift;
return unless $content;

if ( $url =~ m{search\.aspx} ) {
# need to find url
my $time = time;
my $tree = HTML::TreeBuilder->new_from_content($content);
if ( my $div = $tree->look_down(class => 'mbd') ) {
if ( my $ul = $div->look_down(_tag => 'ul') ) {
if ( my $li = $ul->look_down(_tag => 'li') ) {
if ( my $url = $li->look_down(_tag => 'a')->attr('href') ) {
$self->add_url({site_id => $self->{site_id},url => $url});
}
}
}
}
$tree->delete;
debug("find url cost: " . (time - $time));
} elsif ( $url =~ m{wl\.cn/(\d+)} ) {
# need to find item info
my $time = time;
my %h;

$h{sku} = $1;
$h{url} = $url;

my $sku_tree = HTML::TreeBuilder->new_from_content($content);

if ( my $h1 = $sku_tree->look_down(_tag => 'h2') ) {
$h{title} = $h1->as_trimmed_text;
}

if ( my $ul = $sku_tree->look_down('class', 'lh wl') ) {
$h{price} = $ul->as_trimmed_text;
}
$sku_tree->delete;

$h{site_id} = $self->{site_id};
$h{id} = $h{sku} . "-" . $h{site_id};

$h{price} =~ s{[^\d,.]}{}g;

$self->add_item(\%h);
debug("parse item cost: " . (time - $time));
}
}

1;


0 comments on commit 3d2669f

Please sign in to comment.