Skip to content
Permalink
Browse files

Add support parse zipped reports from PATH

  • Loading branch information...
islander committed May 30, 2019
1 parent b25a093 commit 6797fe30b3fe0b9a7d4c1485814bae116e59f007
Showing with 90 additions and 7 deletions.
  1. +4 −3 README.md
  2. +86 −4 dmarcts-report-parser.pl
@@ -18,19 +18,19 @@ To install dependencies...

### on Debian:
```
apt-get install libmail-imapclient-perl libmime-tools-perl libxml-simple-perl \
apt-get install libfile-mimeinfo-perl libmail-imapclient-perl libmime-tools-perl libxml-simple-perl \
libclass-dbi-mysql-perl libio-socket-inet6-perl libio-socket-ip-perl libperlio-gzip-perl \
libmail-mbox-messageparser-perl unzip
```
### on Fedora (Fedora 23):
```
sudo dnf install perl-Mail-IMAPClient perl-MIME-tools perl-XML-Simple perl-DBI \
sudo dnf install perl-File-MimeInfo perl-Mail-IMAPClient perl-MIME-tools perl-XML-Simple perl-DBI \
perl-Socket6 perl-PerlIO-gzip perl-DBD-MySQL unzip
```
### on CentOS (CentOS 7):
```
yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
yum install perl-Mail-IMAPClient perl-MIME-tools perl-XML-Simple perl-DBI \
yum install perl-File-MimeInfo perl-Mail-IMAPClient perl-MIME-tools perl-XML-Simple perl-DBI \
perl-Socket6 perl-PerlIO-gzip perl-DBD-MySQL unzip perl-Mail-Mbox-MessageParser
```

@@ -104,6 +104,7 @@ One of the following source options must be provided:
# -m : Read reports from mbox file(s) provided in PATH.
# -e : Read reports from MIME email file(s) provided in PATH.
# -x : Read reports from xml file(s) provided in PATH.
# -z : Read reports from zip file(s) provided in PATH.
```

The following options are always allowed:
@@ -72,6 +72,7 @@
use Socket6;
use PerlIO::gzip;
use File::Basename ();
use File::MimeInfo;
use IO::Socket::SSL;
#use IO::Socket::SSL 'debug3';

@@ -153,8 +154,8 @@ sub show_usage {

# Get command line options.
my %options = ();
use constant { TS_IMAP => 0, TS_MESSAGE_FILE => 1, TS_XML_FILE => 2, TS_MBOX_FILE => 3 };
GetOptions( \%options, 'd', 'r', 'x', 'm', 'e', 'i', 'delete' );
use constant { TS_IMAP => 0, TS_MESSAGE_FILE => 1, TS_XML_FILE => 2, TS_MBOX_FILE => 3, TS_ZIP_FILE => 4 };
GetOptions( \%options, 'd', 'r', 'x', 'm', 'e', 'i', 'z', 'delete' );

# Evaluate command line options
my $source_options = 0;
@@ -180,12 +181,17 @@ sub show_usage {
$reports_source = TS_IMAP;
}

if (exists $options{z}) {
$source_options++;
$reports_source = TS_ZIP_FILE;
}

if ($source_options > 1) {
show_usage();
die "Only one source option can be used (-i, -x, -m or -e).\n";
die "Only one source option can be used (-i, -x, -m, -e or -z).\n";
} elsif ($source_options == 0) {
show_usage();
die "Please provide a source option (-i, -x, -m or -e).\n";
die "Please provide a source option (-i, -x, -m, -e or -z).\n";
}

if ($ARGV[0]) {
@@ -335,6 +341,14 @@ sub show_usage {
}
} while(defined($filecontent));

} elsif ($reports_source == TS_ZIP_FILE) {
# filecontent is zip file
$filecontent = getXMLFromZip($f);
if (processXML(TS_ZIP_FILE, $filecontent, "xml file <$f>") & 2) {
# processXML return a value with delete bit enabled
unlink($f);
}
$counts++;
} elsif (open FILE, $f) {

$filecontent = join("", <FILE>);
@@ -390,6 +404,7 @@ sub processXML {

my $xml; #TS_XML_FILE or TS_MESSAGE_FILE
if ($type == TS_MESSAGE_FILE) {$xml = getXMLFromMessage($filecontent);}
elsif ($type == TS_ZIP_FILE) {$xml = $filecontent;}
else {$xml = getXMLFromXMLString($filecontent);}

# If !$xml, the file/mail is probably not a DMARC report.
@@ -568,6 +583,73 @@ sub getXMLFromMessage {
return $xml;
}

################################################################################

sub getXMLFromZip {
my $filename = $_[0];
my $mtype = mimetype($filename);

if (open FILE, $filename) {
if ($debug) {
print "Filename: $filename, MimeType: $mtype\n";
}
}

my $isgzip = 0;

if(lc $mtype eq "application/zip") {
if ($debug) {
print "This is a ZIP file \n";
}
} elsif (lc $mtype eq "application/gzip" or lc $mtype eq "application/x-gzip") {
if ($debug) {
print "This is a GZIP file \n";
}

$isgzip = 1;
} else {
if ($debug) {
print "This is not an archive file \n";
}
}

# If a ZIP has been found, extract XML and parse it.
my $xml;
if(defined($filename)) {
# Open the zip file and process the XML contained inside.
my $unzip = "";
if($isgzip) {
open(XML, "<:gzip", $filename)
or $unzip = "ungzip";
} else {
open(XML,"unzip -p " . $filename . " |")
or $unzip = "unzip"; # Will never happen.

# Sadly unzip -p never failes, but we can check if the
# filehandle points to an empty file and pretend it did
# not open/failed.
if (eof XML) {
$unzip = "unzip";
close XML;
}
}

# Read XML if possible (if open)
if ($unzip eq "") {
$xml = getXMLFromXMLString(join("", <XML>));
if (!$xml) {
print "The XML found in ZIP file (<$filename>) does not seem to be valid XML! ";
}
close XML;
} else {
print "Failed to $unzip ZIP file (<$filename>)! ";
}
} else {
print "Could not find an <$filename>! ";
}

return $xml;
}

################################################################################

0 comments on commit 6797fe3

Please sign in to comment.
You can’t perform that action at this time.