Permalink
Browse files

add a script to convert apache combined log files to tsunami XML conf…

…ig file

SVN Revision: 424
  • Loading branch information...
1 parent 097b48e commit 75eb47d55c0c5d9bd78508332d752359db75a131 @nniclausse nniclausse committed Oct 19, 2004
Showing with 287 additions and 8 deletions.
  1. +11 −8 Makefile
  2. +276 −0 src/log2tsunami.pl.src
View
19 Makefile
@@ -12,7 +12,7 @@ else
ifeq ($(TYPE),test)
OPT:=+export_all
else
- OPT =
+ OPT =
endif
endif
INC = ./include
@@ -56,6 +56,8 @@ DTD = idx-tsunami-1.0.dtd
USERMANUAL = doc/user_manual.html doc/IDXDOC.css
USERMANUAL_IMG = $(wildcard doc/images/*.png)
USERMANUAL_SRC = doc/user_manual.tex
+PERL_SCRIPTS_SRC = $(wildcard $(ESRC)/*.pl.src)
+PERL_SCRIPTS = $(notdir $(basename $(PERL_SCRIPTS_SRC)))
TARGET = $(addsuffix .beam, $(basename \
$(addprefix $(EBIN)/, $(notdir $(SRC)))))
@@ -113,11 +115,11 @@ deb:
clean:
-cd priv && rm -f $(shell ls priv | grep -v builder\.erl) && cd ..
-rm -f $(TARGET) $(TMP) $(BUILD_OPTIONS_FILE) builder.beam
- -rm -f $(TGT_APPFILES) idx-tsunami.sh analyse_msg.pl
+ -rm -f $(TGT_APPFILES) idx-tsunami.sh $(PERL_SCRIPTS)
-rm -f ebin/*.beam
# -make -C doc clean
-install: doc boot idx-tsunami.sh analyse_msg.pl install_recorder install_controller $(CONFFILE)
+install: doc boot idx-tsunami.sh $(PERL_SCRIPTS) install_recorder install_controller $(CONFFILE)
-rm -f $(TMP)
install -d $(TARGETDIR)/priv
@@ -144,7 +146,7 @@ install: doc boot idx-tsunami.sh analyse_msg.pl install_recorder install_contro
# create startup script
cp idx-tsunami.sh $(SCRIPT)
- install analyse_msg.pl $(LIBDIR)/analyse_msg.pl
+ install $(PERL_SCRIPTS) $(LIBDIR)/
chmod +x $(SCRIPT)
#
@@ -246,9 +248,9 @@ release:
$(USERMANUAL) $(USERMANUAL_SRC) $(USERMANUAL_IMG) $(DTD) \
COPYING README LISEZMOI TODO $(CONFFILE_SRC) Makefile \
priv/builder.erl idx-tsunami.sh.in vsn.mk \
- $(DEBIAN) src/analyse_msg.pl.src CONTRIBUTORS CHANGES \
+ $(DEBIAN) $(PERL_SCRIPTS_SRC) CONTRIBUTORS CHANGES \
configure configure.in config.guess config.sub include.mk.in \
- install-sh idx-tsunami.spec
+ install-sh idx-tsunami.spec
tar -C $(PACKAGE)-$(VERSION) -zxf tmp.tgz
mkdir $(PACKAGE)-$(VERSION)/ebin
tar zvcf $(PACKAGE)-$(VERSION).tar.gz $(PACKAGE)-$(VERSION)
@@ -268,8 +270,9 @@ ebin/%.beam: src/$(RECORDER_APPLICATION)/%.erl $(INC_FILES)
ebin/%.beam: src/$(CONTROLLER_APPLICATION)/%.erl $(INC_FILES)
$(CC) $(OPT) -I $(INC) -o ebin $<
-analyse_msg.pl: src/analyse_msg.pl.src Makefile
- $(SED) -e 's;%VERSION%;$(VERSION);g' < $< > $@
+%.pl: src/%.pl.src Makefile
+ @$(SED) -e 's;%VERSION%;$(VERSION);g' \
+ -e 's;%DTD%;$(SHARE_DIR)/$(DTD);g' < $< > $@
idx-tsunami.sh: idx-tsunami.sh.in include.mk Makefile
@$(SED) \
View
276 src/log2tsunami.pl.src
@@ -0,0 +1,276 @@
+#!/usr/bin/env perl
+# -*- Mode: CPerl -*-
+#
+# Copyright (C) 2004 Nicolas Niclausse
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+# Auteur: Nicolas Niclausse (Nicolas.Niclausse@sophia.inria.fr)
+# Version: $Id$
+
+# purpose: create a config file for IDX-Tsunami from a Combined Log file
+
+use strict;
+use Getopt::Long;
+use Time::Local;
+
+use vars qw ($help *verbose $version $thinktime_threshold $visit_timeout
+ $session_threshold $max_pages $max_duration);
+my %Months=('Jan','0', 'Feb','1', 'Mar','2', 'Apr','3', 'May','4', 'Jun','5',
+ 'Jul','6', 'Aug','7', 'Sep','8', 'Oct','9', 'Nov','10', 'Dec','11');
+
+my $tagvsn = '%VERSION%';
+
+GetOptions( "help",\$help,
+ "verbose",\$verbose,
+ "tt=i",\$thinktime_threshold,
+ "st=i",\$session_threshold,
+ "visit_timeout=i",\$visit_timeout,
+ "max_pages=i",\$max_pages,
+ "max_duration=i",\$max_duration,
+ "version",\$version
+ );
+
+my $dtd ="%DTD%";
+
+# remove thinktime less than 1 sec
+$thinktime_threshold ="1" unless $thinktime_threshold;
+# remove session with less than 2 requests
+$session_threshold ="2"unless $session_threshold;
+
+my $ims = "Fri, 14 Nov 2003 02:43:31 GMT"; # if modified since ... for 304
+
+# if thinktime is more than $visit_timeout, it's a new session
+$visit_timeout=600 unless $visit_timeout;
+
+$max_pages = 100 unless $max_pages ; # 100 pages max per session
+$max_duration = 3600 unless $max_duration; # 1hour max session duration
+
+my %hit;
+my %http;
+my $visite;
+my ($time,$sec,$min,$hour,$mday,$mon,$year);
+my $total;
+my $bad = 0;
+my $user;
+my $id;
+my $visit_tot=0;
+
+&usage if $help or $Getopt::Long::error;
+&version if $version;
+
+while (<>) {
+ if (m@^([\w\.]+) \S+ \S+ \[(\w+/\w+/\w+:\d+:\d+:\d+)([^\]]+)\] \"(\w+) ([^\"]+)\" (\d+) (\S+) \"([^\"]*)\" \"([^\"]*)\"$@) {
+ my $ip = $1;
+ my $date = $2;
+ my $code = $6;
+ my $referer = $8;
+ my $method = $4;
+ my $user_agent = $9;
+ my $req = $5;
+ my ($url, $protocole) = split(/\s+/,$req);
+ $url = &replace_entities($url);
+ my $version;
+ if ($protocole =~ /HTTP\/(\d\.\d)/) {
+ $version=$1;
+ } else {
+ $version="1.0";
+ }
+
+ $date =~ m'(\d+)/(\w+)/(\d+):(\d+):(\d+):(\d+)';
+ $mday = $1;
+ $mon = $Months{$2};
+ $year = $3 - 1900;
+ $hour = $4;
+ $min = $5;
+ $sec = $6;
+ $time = timelocal($sec,$min,$hour,$mday,$mon,$year);
+ $user = "$ip-$user_agent";
+ if ($visite->{$user}) {
+ if ($time - $visite->{$user}->{'last_visit'} > $visit_timeout) {
+ # new visit
+ $visit_tot ++;
+ $visite->{$user}->{'id'}++;
+ $id = $visite->{$user}->{'id'};
+ $visite->{$user}->{'last_visit'}=$time;
+ $visite->{$user}->{'last_referer'}=$referer;
+ $visite->{$user}->{$id}->{'started'}=$time;
+ $visite->{$user}->{$id}->{'last_request'}=$time;
+ $visite->{$user}->{$id}->{'page'}=1;
+ $visite->{$user}->{$id}->{'hit'}=1;
+ $visite->{$user}->{$id}->{'duration'}=0;
+ $visite->{$user}->{$id}->{'tsunami'} = '<session name="'.$ip."-".$id.'" type="ts_http">'."\n";
+ $visite->{$user}->{$id}->{'tsunami'} .= "\t".'<request><http url="'.$url.'" version="'.$version.'" method="'.$method.'"';
+ if ($code == 304) {
+ $visite->{$user}->{$id}->{'tsunami'} .= ' if_modified_since="'.$ims.'">';
+ } else {
+ $visite->{$user}->{$id}->{'tsunami'} .= '>';
+ }
+ $visite->{$user}->{$id}->{'tsunami'} .= "</http></request>\n";
+ } else {
+ # same visit
+ $id = $visite->{$user}->{'id'};
+ $visite->{$user}->{$id}->{'hit'}++;
+ my $thinktime = $time - $visite->{$user}->{$id}->{'last_request'};
+ $visite->{$user}->{'last_visit'}=$time;
+ $visite->{$user}->{$id}->{'last_request'}=$time;
+ $visite->{$user}->{$id}->{'tsunami'} .= "\t".'<thinktime value="'.$thinktime.'"/>'."\n\n" if $thinktime > $thinktime_threshold;
+ $visite->{$user}->{$id}->{'tsunami'} .= "\t".'<request><http url="'.$url.'" version="'.$version.'" method="'.$method.'"></http></request>'."\n";
+ # update duration
+ $visite->{$user}->{$id}->{'duration'} = $time - $visite->{$user}->{$id}->{'started'} ;
+ if ($visite->{$user}->{'last_referer'} eq $referer) {
+ # same page/frame
+ } else {
+ # new frame/page
+ $visite->{$user}->{$id}->{'page'}++;
+ $visite->{$user}->{'last_referer'}=$referer;
+ }
+
+ }
+
+ } else {# new visitor
+ $visit_tot ++;
+ $visite->{$user}->{'id'}=1;
+ $id = 1;
+ $visite->{$user}->{'last_visit'}=$time;
+ $visite->{$user}->{'last_referer'}=$referer;
+ $visite->{$user}->{$id}->{'started'}=$time;
+ $visite->{$user}->{$id}->{'last_request'}=$time;
+ $visite->{$user}->{$id}->{'hit'}=1;
+ $visite->{$user}->{$id}->{'page'}=1;
+ $visite->{$user}->{$id}->{'duration'}=0;
+ $visite->{$user}->{$id}->{'tsunami'} = '<session name="'.$ip."-".$id.'" type="ts_http">'."\n";
+ $visite->{$user}->{$id}->{'tsunami'} .= "\t".'<request><http url="'.$url.'" version="'.$version.'" method="'.$method.'"></http></request>'."\n";
+ }
+ $total ++;
+ } else {
+# print STDERR "$_\n";
+ $bad ++;
+ }
+}
+my $users_tot=scalar %{$visite};
+my $page_tot=0;
+my $hit_tot=0;
+my $bad_visit =0;
+my $bad_pages =0;
+print STDERR "number of unique users is $users_tot\n" if $verbose;
+print '<?xml version="1.0"?>
+<!DOCTYPE idx-tsunami SYSTEM "'.$dtd.'" [] >
+';
+print '<idx-tsunami loglevel="notice" dumptraffic="false" version="1.0">
+<clients>
+ <client host="myhostname" weight="2" maxusers="950">
+ <ip value="192.168.0.2"></ip>
+ </client>
+</clients>
+
+<server host="myservername" port="80" type="tcp"></server>
+
+<arrivalphase phase="1" duration="10" unit="minute">
+ <users interarrival="0.1" unit="second"></users>
+</arrivalphase>
+
+';
+my $real_visit = 0;
+foreach my $key (keys %$visite) {
+ foreach my $id (1..$visite->{$key}->{'id'}) {
+ my $page = $visite->{$key}->{$id}->{'page'};
+ my $hit = $visite->{$key}->{$id}->{'hit'};
+ $real_visit ++ if $hit > $session_threshold;
+ }
+}
+foreach my $key (sort {$visite->{$a}->{'id'} cmp $visite->{$b}->{'id'}} keys %$visite) {
+ my $tot_id = $visite->{$key}->{'id'};
+ print STDERR "number of visit for $key is $tot_id\n" if $verbose;
+ foreach my $id (1..$tot_id) {
+ my $page = $visite->{$key}->{$id}->{'page'};
+ my $hit = $visite->{$key}->{$id}->{'hit'};
+ my $duration = $visite->{$key}->{$id}->{'duration'};
+ if ($page < $max_pages and $duration < $max_duration) {
+ $page_tot += $page;
+ $hit_tot += $hit;
+ print STDERR " page=$page hit=$hit duration=$duration\n" if $verbose;
+ } else {
+ $bad_visit++;
+ $bad_pages +=$page;
+
+ print STDERR "# page=$page hit=$hit duration=$duration\n" if $verbose;
+
+ }
+ next unless $hit > $session_threshold;
+ my $pop=sprintf "%.3f",100/$real_visit;
+ my $tsunami = $visite->{$key}->{$id}->{'tsunami'};
+ $tsunami =~ s/\<session/<session popularity=\"$pop\"/;
+ print "$tsunami</session>\n";
+ }
+}
+print '</idx-tsunami>';
+print STDERR "real_visit = $real_visit\n" if $verbose;
+print STDERR "total_visit = $visit_tot , bad visit = $bad_visit " if $verbose;
+printf STDERR "page/visit = %.2f\n",($page_tot/($visit_tot-$bad_visit)) if $verbose;
+print STDERR "good_pages = $page_tot , bad pages = $bad_pages " if $verbose;
+printf STDERR "hit/page = %.2f\n",($hit_tot/$page_tot) if $verbose;
+print STDERR "bad = $bad\n" if $verbose;
+
+sub replace_entities {
+ my $str = shift;
+ $str =~ s/\&/\&amp;/g;
+ $str =~ s/\'/\&apos;/g;
+ $str =~ s/\"/\&quot;/g;
+ $str =~ s/>/\&gt;/g;
+ $str =~ s/</\&lt;/g;
+ return $str;
+}
+sub usage {
+ print "log2tsunami.pl: create a config file for IDX-Tsunami from a Combined Log file\n\n";
+ print "This script is part of IDX-TSUNAMI version $tagvsn,
+Copyright (C) 2004 Nicolas Niclausse\n\n";
+ print "IDX-TSUNAMI comes with ABSOLUTELY NO WARRANTY; This is free software, and
+ou are welcome to redistribute it under certain conditions
+type `log2tsunami.pl --version` for details.\n\n";
+
+ print "Usage: $0 [<options>] <log file>\n","Available options:\n\t",
+ "[--help] (this help text)\n\t",
+ "[--version] (print version)\n\t",
+ "[--tt <integer>] (thinktime threshold: min thinktime (def=2))\n\t",
+ "[--st <integer>] (session threshold : min number of requests (def=2))\n\t",
+ "[--max_duration <integer>] (maximum session duration in sec. (3600))\n\t",
+ "[--max_pages <integer>] (maximum number of pages winthin a session. (100))\n\t";
+ exit;
+ }
+
+sub version {
+print "this script is part of IDX-TSUNAMI version $tagvsn
+
+Written by Nicolas Niclausse
+
+Copyright (C) 2004 Nicolas Niclausse
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program (see COPYING); if not, write to the
+Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.";
+exit;
+}

0 comments on commit 75eb47d

Please sign in to comment.