Skip to content

Commit

Permalink
add a script to convert apache combined log files to tsunami XML conf…
Browse files Browse the repository at this point in the history
…ig file

SVN Revision: 424
  • Loading branch information
nniclausse committed Oct 19, 2004
1 parent 097b48e commit 75eb47d
Show file tree
Hide file tree
Showing 2 changed files with 287 additions and 8 deletions.
19 changes: 11 additions & 8 deletions Makefile
Expand Up @@ -12,7 +12,7 @@ else
ifeq ($(TYPE),test)
OPT:=+export_all
else
OPT =
OPT =
endif
endif
INC = ./include
Expand Down Expand Up @@ -56,6 +56,8 @@ DTD = idx-tsunami-1.0.dtd
USERMANUAL = doc/user_manual.html doc/IDXDOC.css
USERMANUAL_IMG = $(wildcard doc/images/*.png)
USERMANUAL_SRC = doc/user_manual.tex
PERL_SCRIPTS_SRC = $(wildcard $(ESRC)/*.pl.src)
PERL_SCRIPTS = $(notdir $(basename $(PERL_SCRIPTS_SRC)))

TARGET = $(addsuffix .beam, $(basename \
$(addprefix $(EBIN)/, $(notdir $(SRC)))))
Expand Down Expand Up @@ -113,11 +115,11 @@ deb:
clean:
-cd priv && rm -f $(shell ls priv | grep -v builder\.erl) && cd ..
-rm -f $(TARGET) $(TMP) $(BUILD_OPTIONS_FILE) builder.beam
-rm -f $(TGT_APPFILES) idx-tsunami.sh analyse_msg.pl
-rm -f $(TGT_APPFILES) idx-tsunami.sh $(PERL_SCRIPTS)
-rm -f ebin/*.beam
# -make -C doc clean

install: doc boot idx-tsunami.sh analyse_msg.pl install_recorder install_controller $(CONFFILE)
install: doc boot idx-tsunami.sh $(PERL_SCRIPTS) install_recorder install_controller $(CONFFILE)
-rm -f $(TMP)

install -d $(TARGETDIR)/priv
Expand All @@ -144,7 +146,7 @@ install: doc boot idx-tsunami.sh analyse_msg.pl install_recorder install_contro

# create startup script
cp idx-tsunami.sh $(SCRIPT)
install analyse_msg.pl $(LIBDIR)/analyse_msg.pl
install $(PERL_SCRIPTS) $(LIBDIR)/
chmod +x $(SCRIPT)

#
Expand Down Expand Up @@ -246,9 +248,9 @@ release:
$(USERMANUAL) $(USERMANUAL_SRC) $(USERMANUAL_IMG) $(DTD) \
COPYING README LISEZMOI TODO $(CONFFILE_SRC) Makefile \
priv/builder.erl idx-tsunami.sh.in vsn.mk \
$(DEBIAN) src/analyse_msg.pl.src CONTRIBUTORS CHANGES \
$(DEBIAN) $(PERL_SCRIPTS_SRC) CONTRIBUTORS CHANGES \
configure configure.in config.guess config.sub include.mk.in \
install-sh idx-tsunami.spec
install-sh idx-tsunami.spec
tar -C $(PACKAGE)-$(VERSION) -zxf tmp.tgz
mkdir $(PACKAGE)-$(VERSION)/ebin
tar zvcf $(PACKAGE)-$(VERSION).tar.gz $(PACKAGE)-$(VERSION)
Expand All @@ -268,8 +270,9 @@ ebin/%.beam: src/$(RECORDER_APPLICATION)/%.erl $(INC_FILES)
ebin/%.beam: src/$(CONTROLLER_APPLICATION)/%.erl $(INC_FILES)
$(CC) $(OPT) -I $(INC) -o ebin $<

analyse_msg.pl: src/analyse_msg.pl.src Makefile
$(SED) -e 's;%VERSION%;$(VERSION);g' < $< > $@
%.pl: src/%.pl.src Makefile
@$(SED) -e 's;%VERSION%;$(VERSION);g' \
-e 's;%DTD%;$(SHARE_DIR)/$(DTD);g' < $< > $@

idx-tsunami.sh: idx-tsunami.sh.in include.mk Makefile
@$(SED) \
Expand Down
276 changes: 276 additions & 0 deletions src/log2tsunami.pl.src
@@ -0,0 +1,276 @@
#!/usr/bin/env perl
# -*- Mode: CPerl -*-
#
# Copyright (C) 2004 Nicolas Niclausse
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

# Auteur: Nicolas Niclausse (Nicolas.Niclausse@sophia.inria.fr)
# Version: $Id$

# purpose: create a config file for IDX-Tsunami from a Combined Log file

use strict;
use Getopt::Long;
use Time::Local;

use vars qw ($help *verbose $version $thinktime_threshold $visit_timeout
$session_threshold $max_pages $max_duration);
my %Months=('Jan','0', 'Feb','1', 'Mar','2', 'Apr','3', 'May','4', 'Jun','5',
'Jul','6', 'Aug','7', 'Sep','8', 'Oct','9', 'Nov','10', 'Dec','11');

my $tagvsn = '%VERSION%';

GetOptions( "help",\$help,
"verbose",\$verbose,
"tt=i",\$thinktime_threshold,
"st=i",\$session_threshold,
"visit_timeout=i",\$visit_timeout,
"max_pages=i",\$max_pages,
"max_duration=i",\$max_duration,
"version",\$version
);

my $dtd ="%DTD%";

# remove thinktime less than 1 sec
$thinktime_threshold ="1" unless $thinktime_threshold;
# remove session with less than 2 requests
$session_threshold ="2"unless $session_threshold;

my $ims = "Fri, 14 Nov 2003 02:43:31 GMT"; # if modified since ... for 304

# if thinktime is more than $visit_timeout, it's a new session
$visit_timeout=600 unless $visit_timeout;

$max_pages = 100 unless $max_pages ; # 100 pages max per session
$max_duration = 3600 unless $max_duration; # 1hour max session duration

my %hit;
my %http;
my $visite;
my ($time,$sec,$min,$hour,$mday,$mon,$year);
my $total;
my $bad = 0;
my $user;
my $id;
my $visit_tot=0;

&usage if $help or $Getopt::Long::error;
&version if $version;

while (<>) {
if (m@^([\w\.]+) \S+ \S+ \[(\w+/\w+/\w+:\d+:\d+:\d+)([^\]]+)\] \"(\w+) ([^\"]+)\" (\d+) (\S+) \"([^\"]*)\" \"([^\"]*)\"$@) {
my $ip = $1;
my $date = $2;
my $code = $6;
my $referer = $8;
my $method = $4;
my $user_agent = $9;
my $req = $5;
my ($url, $protocole) = split(/\s+/,$req);
$url = &replace_entities($url);
my $version;
if ($protocole =~ /HTTP\/(\d\.\d)/) {
$version=$1;
} else {
$version="1.0";
}

$date =~ m'(\d+)/(\w+)/(\d+):(\d+):(\d+):(\d+)';
$mday = $1;
$mon = $Months{$2};
$year = $3 - 1900;
$hour = $4;
$min = $5;
$sec = $6;
$time = timelocal($sec,$min,$hour,$mday,$mon,$year);
$user = "$ip-$user_agent";
if ($visite->{$user}) {
if ($time - $visite->{$user}->{'last_visit'} > $visit_timeout) {
# new visit
$visit_tot ++;
$visite->{$user}->{'id'}++;
$id = $visite->{$user}->{'id'};
$visite->{$user}->{'last_visit'}=$time;
$visite->{$user}->{'last_referer'}=$referer;
$visite->{$user}->{$id}->{'started'}=$time;
$visite->{$user}->{$id}->{'last_request'}=$time;
$visite->{$user}->{$id}->{'page'}=1;
$visite->{$user}->{$id}->{'hit'}=1;
$visite->{$user}->{$id}->{'duration'}=0;
$visite->{$user}->{$id}->{'tsunami'} = '<session name="'.$ip."-".$id.'" type="ts_http">'."\n";
$visite->{$user}->{$id}->{'tsunami'} .= "\t".'<request><http url="'.$url.'" version="'.$version.'" method="'.$method.'"';
if ($code == 304) {
$visite->{$user}->{$id}->{'tsunami'} .= ' if_modified_since="'.$ims.'">';
} else {
$visite->{$user}->{$id}->{'tsunami'} .= '>';
}
$visite->{$user}->{$id}->{'tsunami'} .= "</http></request>\n";
} else {
# same visit
$id = $visite->{$user}->{'id'};
$visite->{$user}->{$id}->{'hit'}++;
my $thinktime = $time - $visite->{$user}->{$id}->{'last_request'};
$visite->{$user}->{'last_visit'}=$time;
$visite->{$user}->{$id}->{'last_request'}=$time;
$visite->{$user}->{$id}->{'tsunami'} .= "\t".'<thinktime value="'.$thinktime.'"/>'."\n\n" if $thinktime > $thinktime_threshold;
$visite->{$user}->{$id}->{'tsunami'} .= "\t".'<request><http url="'.$url.'" version="'.$version.'" method="'.$method.'"></http></request>'."\n";
# update duration
$visite->{$user}->{$id}->{'duration'} = $time - $visite->{$user}->{$id}->{'started'} ;
if ($visite->{$user}->{'last_referer'} eq $referer) {
# same page/frame
} else {
# new frame/page
$visite->{$user}->{$id}->{'page'}++;
$visite->{$user}->{'last_referer'}=$referer;
}

}

} else {# new visitor
$visit_tot ++;
$visite->{$user}->{'id'}=1;
$id = 1;
$visite->{$user}->{'last_visit'}=$time;
$visite->{$user}->{'last_referer'}=$referer;
$visite->{$user}->{$id}->{'started'}=$time;
$visite->{$user}->{$id}->{'last_request'}=$time;
$visite->{$user}->{$id}->{'hit'}=1;
$visite->{$user}->{$id}->{'page'}=1;
$visite->{$user}->{$id}->{'duration'}=0;
$visite->{$user}->{$id}->{'tsunami'} = '<session name="'.$ip."-".$id.'" type="ts_http">'."\n";
$visite->{$user}->{$id}->{'tsunami'} .= "\t".'<request><http url="'.$url.'" version="'.$version.'" method="'.$method.'"></http></request>'."\n";
}
$total ++;
} else {
# print STDERR "$_\n";
$bad ++;
}
}
my $users_tot=scalar %{$visite};
my $page_tot=0;
my $hit_tot=0;
my $bad_visit =0;
my $bad_pages =0;
print STDERR "number of unique users is $users_tot\n" if $verbose;
print '<?xml version="1.0"?>
<!DOCTYPE idx-tsunami SYSTEM "'.$dtd.'" [] >
';
print '<idx-tsunami loglevel="notice" dumptraffic="false" version="1.0">
<clients>
<client host="myhostname" weight="2" maxusers="950">
<ip value="192.168.0.2"></ip>
</client>
</clients>
<server host="myservername" port="80" type="tcp"></server>
<arrivalphase phase="1" duration="10" unit="minute">
<users interarrival="0.1" unit="second"></users>
</arrivalphase>
';
my $real_visit = 0;
foreach my $key (keys %$visite) {
foreach my $id (1..$visite->{$key}->{'id'}) {
my $page = $visite->{$key}->{$id}->{'page'};
my $hit = $visite->{$key}->{$id}->{'hit'};
$real_visit ++ if $hit > $session_threshold;
}
}
foreach my $key (sort {$visite->{$a}->{'id'} cmp $visite->{$b}->{'id'}} keys %$visite) {
my $tot_id = $visite->{$key}->{'id'};
print STDERR "number of visit for $key is $tot_id\n" if $verbose;
foreach my $id (1..$tot_id) {
my $page = $visite->{$key}->{$id}->{'page'};
my $hit = $visite->{$key}->{$id}->{'hit'};
my $duration = $visite->{$key}->{$id}->{'duration'};
if ($page < $max_pages and $duration < $max_duration) {
$page_tot += $page;
$hit_tot += $hit;
print STDERR " page=$page hit=$hit duration=$duration\n" if $verbose;
} else {
$bad_visit++;
$bad_pages +=$page;

print STDERR "# page=$page hit=$hit duration=$duration\n" if $verbose;

}
next unless $hit > $session_threshold;
my $pop=sprintf "%.3f",100/$real_visit;
my $tsunami = $visite->{$key}->{$id}->{'tsunami'};
$tsunami =~ s/\<session/<session popularity=\"$pop\"/;
print "$tsunami</session>\n";
}
}
print '</idx-tsunami>';
print STDERR "real_visit = $real_visit\n" if $verbose;
print STDERR "total_visit = $visit_tot , bad visit = $bad_visit " if $verbose;
printf STDERR "page/visit = %.2f\n",($page_tot/($visit_tot-$bad_visit)) if $verbose;
print STDERR "good_pages = $page_tot , bad pages = $bad_pages " if $verbose;
printf STDERR "hit/page = %.2f\n",($hit_tot/$page_tot) if $verbose;
print STDERR "bad = $bad\n" if $verbose;

sub replace_entities {
my $str = shift;
$str =~ s/\&/\&amp;/g;
$str =~ s/\'/\&apos;/g;
$str =~ s/\"/\&quot;/g;
$str =~ s/>/\&gt;/g;
$str =~ s/</\&lt;/g;
return $str;
}
sub usage {
print "log2tsunami.pl: create a config file for IDX-Tsunami from a Combined Log file\n\n";
print "This script is part of IDX-TSUNAMI version $tagvsn,
Copyright (C) 2004 Nicolas Niclausse\n\n";
print "IDX-TSUNAMI comes with ABSOLUTELY NO WARRANTY; This is free software, and
ou are welcome to redistribute it under certain conditions
type `log2tsunami.pl --version` for details.\n\n";

print "Usage: $0 [<options>] <log file>\n","Available options:\n\t",
"[--help] (this help text)\n\t",
"[--version] (print version)\n\t",
"[--tt <integer>] (thinktime threshold: min thinktime (def=2))\n\t",
"[--st <integer>] (session threshold : min number of requests (def=2))\n\t",
"[--max_duration <integer>] (maximum session duration in sec. (3600))\n\t",
"[--max_pages <integer>] (maximum number of pages winthin a session. (100))\n\t";
exit;
}

sub version {
print "this script is part of IDX-TSUNAMI version $tagvsn
Written by Nicolas Niclausse
Copyright (C) 2004 Nicolas Niclausse
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, write to the
Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.";
exit;
}

0 comments on commit 75eb47d

Please sign in to comment.