Skip to content

Commit bd28c70

Browse files
committed
Update to new website structure
1 parent 9c4c1a6 commit bd28c70

File tree

1 file changed

+28
-10
lines changed

1 file changed

+28
-10
lines changed

scraper.pl

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -114,23 +114,41 @@ sub process_page {
114114
my $root = get_root($uri);
115115
my @items = $root->find_by_attribute('class', 'inc-item');
116116
foreach my $item (@items) {
117-
my ($date_div, $type_div) = $item->find_by_attribute('class', 'inc-info')
118-
->content_list;
117+
my $date_div = $item->find_by_attribute('class', 'inc-date');
119118
my $datetime = get_db_datetime($date_div->as_text);
120119
remove_trailing(\$datetime);
121-
my $type = $type_div->as_text;
122-
remove_trailing(\$type);
123120
my $link = URI->new($base_uri->scheme.'://'.$base_uri->host.
124121
'/modules/incidents/'.
125-
$item->find_by_attribute('class', 'inc-detail-link')
126-
->find_by_tag_name('a')->attr('href'));
122+
$item->find_by_tag_name('a')->attr('href'));
127123
my $id = $link->query_param('filter[id]');
128124
my $district = $DISTRICT_IDS_HR->{$link->query_param('district_id')};
129-
my $details = $item->find_by_attribute('class', 'inc-content')->as_text;
130-
remove_trailing(\$details);
131-
my $summary = $item->find_by_tag_name('h3')->as_text;
125+
126+
my @divs = $item->find_by_tag_name('div');
127+
my $inc_content_div = $divs[3];
128+
my $summary = $inc_content_div->as_text;
132129
remove_trailing(\$summary);
133130

131+
my $date = $date_div->as_text;
132+
remove_trailing(\$date);
133+
$summary =~ s/${date}//ms;
134+
135+
my $detail_root = get_root($link);
136+
my $inc_info = $detail_root->find_by_attribute('class', 'inc-info');
137+
my $type = $inc_info->find_by_tag_name('h2')->as_text;
138+
remove_trailing(\$type);
139+
140+
my $inc_content = $detail_root->find_by_attribute('class', 'inc-content col-md-12 col-sm-12 col-xs-12');
141+
my @ps = $inc_content->find_by_tag_name('p');
142+
my $details = '';
143+
foreach my $p (@ps) {
144+
if ($p->find_by_tag_name('strong')->as_text eq 'Popis') {
145+
$details = $p->as_text;
146+
last;
147+
}
148+
}
149+
$details =~ s/^Popis: //ms;
150+
remove_trailing(\$details);
151+
134152
# Save.
135153
my $ret_ar = eval {
136154
$dt->execute('SELECT COUNT(*) FROM data WHERE ID = ?',
@@ -164,7 +182,7 @@ sub process_page {
164182
# Get next link.
165183
sub next_link {
166184
my ($uri, $root) = @_;
167-
my @pag_a = $root->find_by_attribute('class', 'paginator')
185+
my @pag_a = $root->find_by_attribute('class', 'pager')
168186
->find_by_tag_name('a');
169187
my $next_uri;
170188
foreach my $pag_a (@pag_a) {

0 commit comments

Comments
 (0)