Skip to content

Commit

Permalink
Update to new website structure
Browse files Browse the repository at this point in the history
  • Loading branch information
katkad committed Oct 14, 2015
1 parent 9c4c1a6 commit bd28c70
Showing 1 changed file with 28 additions and 10 deletions.
38 changes: 28 additions & 10 deletions scraper.pl
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -114,23 +114,41 @@ sub process_page {
my $root = get_root($uri); my $root = get_root($uri);
my @items = $root->find_by_attribute('class', 'inc-item'); my @items = $root->find_by_attribute('class', 'inc-item');
foreach my $item (@items) { foreach my $item (@items) {
my ($date_div, $type_div) = $item->find_by_attribute('class', 'inc-info') my $date_div = $item->find_by_attribute('class', 'inc-date');
->content_list;
my $datetime = get_db_datetime($date_div->as_text); my $datetime = get_db_datetime($date_div->as_text);
remove_trailing(\$datetime); remove_trailing(\$datetime);
my $type = $type_div->as_text;
remove_trailing(\$type);
my $link = URI->new($base_uri->scheme.'://'.$base_uri->host. my $link = URI->new($base_uri->scheme.'://'.$base_uri->host.
'/modules/incidents/'. '/modules/incidents/'.
$item->find_by_attribute('class', 'inc-detail-link') $item->find_by_tag_name('a')->attr('href'));
->find_by_tag_name('a')->attr('href'));
my $id = $link->query_param('filter[id]'); my $id = $link->query_param('filter[id]');
my $district = $DISTRICT_IDS_HR->{$link->query_param('district_id')}; my $district = $DISTRICT_IDS_HR->{$link->query_param('district_id')};
my $details = $item->find_by_attribute('class', 'inc-content')->as_text;
remove_trailing(\$details); my @divs = $item->find_by_tag_name('div');
my $summary = $item->find_by_tag_name('h3')->as_text; my $inc_content_div = $divs[3];
my $summary = $inc_content_div->as_text;
remove_trailing(\$summary); remove_trailing(\$summary);


my $date = $date_div->as_text;
remove_trailing(\$date);
$summary =~ s/${date}//ms;

my $detail_root = get_root($link);
my $inc_info = $detail_root->find_by_attribute('class', 'inc-info');
my $type = $inc_info->find_by_tag_name('h2')->as_text;
remove_trailing(\$type);

my $inc_content = $detail_root->find_by_attribute('class', 'inc-content col-md-12 col-sm-12 col-xs-12');
my @ps = $inc_content->find_by_tag_name('p');
my $details = '';
foreach my $p (@ps) {
if ($p->find_by_tag_name('strong')->as_text eq 'Popis') {
$details = $p->as_text;
last;
}
}
$details =~ s/^Popis: //ms;
remove_trailing(\$details);

# Save. # Save.
my $ret_ar = eval { my $ret_ar = eval {
$dt->execute('SELECT COUNT(*) FROM data WHERE ID = ?', $dt->execute('SELECT COUNT(*) FROM data WHERE ID = ?',
Expand Down Expand Up @@ -164,7 +182,7 @@ sub process_page {
# Get next link. # Get next link.
sub next_link { sub next_link {
my ($uri, $root) = @_; my ($uri, $root) = @_;
my @pag_a = $root->find_by_attribute('class', 'paginator') my @pag_a = $root->find_by_attribute('class', 'pager')
->find_by_tag_name('a'); ->find_by_tag_name('a');
my $next_uri; my $next_uri;
foreach my $pag_a (@pag_a) { foreach my $pag_a (@pag_a) {
Expand Down

0 comments on commit bd28c70

Please sign in to comment.