Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 136 lines (121 sloc) 4.99 KB
#!/usr/bin/perl
use strict;
#use warnings;
use Tratten::Monitored;
use Tratten::Throttle;
use Tratten::Committee;
open OUT, ">out.txt";
binmode OUT, ":utf8";
our $table_header = "{| border='1' cellspacing='0' class='wikitable sortable'
! tratten page (oeil) !! forecasts !! stage reached !! legal basis !! stage !! name !! committee dossier !! rapporteur !! COM number !! monitored\n";
our $table_footer = "|}\n";
our @stageId = (100, 210, 235, 290, 300, 400, 500, 600, 610, 640, 700, 710, 720);
our $stageId;
our $next_index;
our $number_results;
our $stage_reached;
sub get_page {
&Tratten::Throttle::sync("http://www.europarl.europa.eu/");
my $form = "-d 'xpath=/oeil/search/procstage/stage' -d 'scope=stage' -d 'countEStat=true' -d 'startIndex=$next_index' -d 'stageId=$stageId' -d 'pageSize=50'";
my @body = `curl -# $form --url http://www.europarl.europa.eu/oeil/FindByStage.do`;
if ($?) { die; }
while (@body) {
$_ = shift @body;
if (/<span class="number_results">(\d+)<\/span>/) {
$number_results = $1;
shift @body;
shift @body;
($stage_reached = shift @body) =~ s/^\s*(.+)\s*$/$1/;
last;
}
}
$next_index += 50;
return @body;
}
our @data;
our @entries;
our $counter = 0;
sub next_bare { while (@data) { $_ = shift @data; utf8::decode($_); return $1 if /^\s*(\w.*?)\s*$/; } }
sub parse_entries {
while (@data) {
$_ = shift @data;
utf8::decode($_);
if (/(file\.jsp\?id=[0-9]+)" class="com_acronym">([^<]+)<\/a>/) {
my $entry = { };
$entry->{stage_reached} = $stage_reached;
$entry->{url} = "http://www.europarl.europa.eu/oeil/$1";
$entry->{refnum} = $2;
while (@data) {
$_ = shift @data;
last if /procedure number/;
$entry->{stage} = $1 if /">[^<,]+(?:, ([^<]+))?<\/div>/;
$entry->{name} = &next_bare if /starts with the title/;
$entry->{committee} = &next_bare if /Display the committee responsible/;
$entry->{rapporteur} = &next_bare if /Display the reporter or co-reporter/;
$entry->{com} = $1 if /(COM\(\d+\)\d+)/;
}
my $monitor = $Tratten::Monitored::refnum{$entry->{refnum}};
$entry->{forecast} = "Unknown";
$entry->{legal_basis} = "";
$entry->{committee_dossier} = "";
if ($monitor) {
$entry->{forecast} = "No forecasts";
my $doss = Tratten::Monitored::dossier($monitor);
if ($doss->{forecasts}) {
my $x = $doss->{forecasts}->[0];
my $date = $x->{date};
if ($date =~ m#^(\d\d)/(\d\d)/(\d\d\d\d)$#) { $date = "$3-$2-$1"; }
$entry->{forecast} = $date . " " . $x->{activity};
my $num_more = scalar @{$doss->{forecasts}} - 1;
if ($num_more == 1) { $entry->{forecast} .= " (+ 1 more forecast)" }
if ($num_more > 1) { $entry->{forecast} .= " (+ $num_more more forecasts)" }
}
$entry->{legal_basis} = $doss->{legal_basis};
$entry->{legal_basis} =~ s/(TFEU\s+0*(\d+)[-0-9a-z]*)/[[TFEU#Article_$2 | $1]]/g;
$entry->{legal_basis} =~ s/(TEU\s+0*(\d+)[-0-9a-z]*)/[[TEU#Article_$2 | $1]]/g;
$entry->{legal_basis} =~ s/(EP\s+0*(\d+)[-0-9a-z]*)/[[RoP#Rule_$2 | $1]]/g;
if ($doss->{committee_dossier} =~ /(([A-Z]+)\/\d+\/\d+)/) {
$entry->{committee_dossier} = "[http://www.europarl.europa.eu/activities/committees/workingDocsCom/comparlDossier.do?dossier=$1&body=$2&language=EN $1]";
} elsif (not $entry->{committee}) {
for (@{$doss->{agents}}) {
if ($_->{committee} =~ /^\s*(.+?)\s+\(responsible\)\s*$/) { $entry->{committee} = $1; }
}
}
}
unless ($entry->{committee_dossier}) {
my $committee_abbr = Tratten::Committee::abbreviate($entry->{committee});
if ($committee_abbr) { $entry->{committee_dossier} = "[[$committee_abbr]]"; }
else { $entry->{committee_dossier} = $entry->{committee}; }
}
$entry->{com} =~ s#(COM\((\d+)\)(\d+))#[http://eur-lex.europa.eu/LexUriServ/LexUriServ.do?uri=COM:$2:$3:FIN:EN:HTML $1]#;
$entry->{monitored} = ($monitor->{log} =~ /\/([^\/]+)\.html$/) ?
("[" . $monitor->{log} . " $1]") : "";
$counter += 1;
push @entries, $entry;
}
}
}
sub write_entries {
@entries = sort { $a->{forecast} cmp $b->{forecast} } @entries;
for (@entries) {
print OUT "|-\n";
print OUT "| [[$_->{refnum}]] ([$_->{url} oeil]) || $_->{forecast} || $_->{stage_reached} || $_->{legal_basis} || $_->{stage} || $_->{name} || $_->{committee_dossier} || $_->{rapporteur} || $_->{com} || $_->{monitored}\n";
}
}
print "Getting oeil-pages...\n";
print OUT $table_header;
while (@stageId) {
$stageId = shift @stageId;
$next_index = 1;
$number_results = 1;
while ($next_index <= $number_results) {
@data = &get_page;
&parse_entries;
}
}
&write_entries;
print OUT $table_footer;
chomp(my $date = `date -u +%F`);
my $dossiers_monitored = keys %Tratten::Monitored::refnum;
print OUT "\nRetrieved $counter rows of data at $date. Number of dossiers monitored: $dossiers_monitored.\n";
print "Done.\n";