From ad4bae2855deb4f61a11125d65e86c4adbc6a02b Mon Sep 17 00:00:00 2001 From: Abizer Lokhandwala Date: Mon, 29 May 2017 18:09:20 -0700 Subject: [PATCH 1/6] add mirrors stats script and cronjob --- modules/ocf_mirrors/manifests/init.pp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/modules/ocf_mirrors/manifests/init.pp b/modules/ocf_mirrors/manifests/init.pp index d276d54f9..6427d89d4 100644 --- a/modules/ocf_mirrors/manifests/init.pp +++ b/modules/ocf_mirrors/manifests/init.pp @@ -46,7 +46,7 @@ source => 'puppet:///modules/ocf_mirrors/README.html', owner => mirrors, group => mirrors; - } +} class { '::apache': @@ -166,4 +166,15 @@ command => '/opt/mirrors/bin/report-sizes', special => 'daily', } + + file { '/usr/local/sbin/record-mirrors-stats': + source => 'puppet:///private/stats/record-mirrors-stats.py', + mode => '0640', + } -> + cron { 'mirrors-stats': + command => '/usr/local/sbin/record-mirrors-stats', + minute => 0, + hour => 0, + } + } From 2f31f1c5e1ee9f9c80e449f6806c6cfd44034d0c Mon Sep 17 00:00:00 2001 From: Abizer Lokhandwala Date: Sat, 17 Jun 2017 16:21:26 -0700 Subject: [PATCH 2/6] templatize mirrors stats script --- modules/ocf_mirrors/manifests/init.pp | 4 +- .../templates/record-mirrors-stats.py.erb | 54 +++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 modules/ocf_mirrors/templates/record-mirrors-stats.py.erb diff --git a/modules/ocf_mirrors/manifests/init.pp b/modules/ocf_mirrors/manifests/init.pp index 6427d89d4..e37a1070c 100644 --- a/modules/ocf_mirrors/manifests/init.pp +++ b/modules/ocf_mirrors/manifests/init.pp @@ -34,6 +34,8 @@ password => '*', } + $ocfstats_password = hiera('ocfstats::mysql::password') + file { ['/opt/mirrors', '/opt/mirrors/ftp', '/opt/mirrors/project', '/opt/mirrors/bin']: ensure => directory, @@ -46,7 +48,7 @@ source => 'puppet:///modules/ocf_mirrors/README.html', owner => mirrors, group => mirrors; -} + } class { '::apache': diff --git a/modules/ocf_mirrors/templates/record-mirrors-stats.py.erb b/modules/ocf_mirrors/templates/record-mirrors-stats.py.erb new file mode 100644 index 000000000..e87048f51 --- /dev/null +++ b/modules/ocf_mirrors/templates/record-mirrors-stats.py.erb @@ -0,0 +1,54 @@ +#!/usr/bin/python + +import os +import pymysql +from datetime import date + +OCFSTATS_PWD = "<%= @ocfstats_password %>" +MIRRORS_PATH = '/opt/mirrors/ftp' +LOG_PATH = '/var/log/apache2' +LOG_NAME = 'mirrors.ocf.berkeley.edu_access.log' + +sources = [mirrored for mirrored in os.listdir(MIRRORS_PATH) + if os.path.isdir(os.path.join(MIRRORS_PATH, mirrored)) and not mirrored.startswith('.')] + +sources.append('other') # catchall + +def build_dists(): + return { mirrored: { 'up': 0, 'down': 0 } for mirrored in sources } + +def process_file(fn): + dists = build_dists() + with open(fn, 'r') as f: + for line in f: + stats = line.split() + dist = stats[6] + dist = dist.split('/')[1] if '/' in dist else dist + + if stats[8][0] in ('2', '3') and dist in dists: # http status code is 2xx/3xx + dists[dist]['up'] += int(stats[-2]) + dists[dist]['down'] += int(stats[-1]) + else: + dists['other']['up'] += int(stats[-2]) + dists['other']['down'] += int(stats[-1]) + return dists + +def to_mysql(dists): + dt = date.today() + conn = pymysql.connect( + host = 'mysql.ocf.berkeley.edu', + user = 'ocfstats', + password = OCFSTATS_PWD, + db='ocfstats', + autocommit = True, + cursorclass=pymysql.cursors.DictCursor, + ) + + c = conn.cursor() + + for dist in dists: + c.execute('INSERT INTO `mirrors` (`date`, `dist`, `up`, `down`) VALUES (%s, %s, %s, %s)', + (dt, dist, dists[dist]['up'], dists[dist]['down'])) + +if __name__ == '__main__': + to_mysql(process_file(os.path.join(LOG_PATH, LOG_NAME))) From 9063da62c504452750cadb8ec60d906ed71c07e6 Mon Sep 17 00:00:00 2001 From: Abizer Lokhandwala Date: Mon, 19 Jun 2017 03:21:38 -0700 Subject: [PATCH 3/6] secrets in cron env instead of templates --- .../record-mirrors-stats.py} | 37 ++++++++++++------- modules/ocf_mirrors/manifests/init.pp | 1 + 2 files changed, 24 insertions(+), 14 deletions(-) rename modules/ocf_mirrors/{templates/record-mirrors-stats.py.erb => files/record-mirrors-stats.py} (59%) diff --git a/modules/ocf_mirrors/templates/record-mirrors-stats.py.erb b/modules/ocf_mirrors/files/record-mirrors-stats.py similarity index 59% rename from modules/ocf_mirrors/templates/record-mirrors-stats.py.erb rename to modules/ocf_mirrors/files/record-mirrors-stats.py index e87048f51..01b34fef0 100644 --- a/modules/ocf_mirrors/templates/record-mirrors-stats.py.erb +++ b/modules/ocf_mirrors/files/record-mirrors-stats.py @@ -1,10 +1,10 @@ #!/usr/bin/python - import os -import pymysql from datetime import date -OCFSTATS_PWD = "<%= @ocfstats_password %>" +import pymysql + +OCFSTATS_PWD = os.environ['OCFSTATS_PWD'] MIRRORS_PATH = '/opt/mirrors/ftp' LOG_PATH = '/var/log/apache2' LOG_NAME = 'mirrors.ocf.berkeley.edu_access.log' @@ -12,20 +12,26 @@ sources = [mirrored for mirrored in os.listdir(MIRRORS_PATH) if os.path.isdir(os.path.join(MIRRORS_PATH, mirrored)) and not mirrored.startswith('.')] -sources.append('other') # catchall +sources.append('other') # catchall + def build_dists(): - return { mirrored: { 'up': 0, 'down': 0 } for mirrored in sources } + return {mirrored: {'up': 0, 'down': 0} for mirrored in sources} + def process_file(fn): dists = build_dists() with open(fn, 'r') as f: for line in f: stats = line.split() + + # extract dist name from request url + # '/debian/pool/main/h/hwdata/...' -> 'debian' dist = stats[6] dist = dist.split('/')[1] if '/' in dist else dist - if stats[8][0] in ('2', '3') and dist in dists: # http status code is 2xx/3xx + # record if we returned http 2xx/3xx + if stats[8][0] in ('2', '3') and dist in dists: dists[dist]['up'] += int(stats[-2]) dists[dist]['down'] += int(stats[-1]) else: @@ -33,22 +39,25 @@ def process_file(fn): dists['other']['down'] += int(stats[-1]) return dists + def to_mysql(dists): dt = date.today() conn = pymysql.connect( - host = 'mysql.ocf.berkeley.edu', - user = 'ocfstats', - password = OCFSTATS_PWD, + host='mysql.ocf.berkeley.edu', + user='ocfstats', + password=OCFSTATS_PWD, db='ocfstats', - autocommit = True, + autocommit=True, cursorclass=pymysql.cursors.DictCursor, ) - c = conn.cursor() + with conn as cursor: + for dist in dists: + cursor.execute( + 'INSERT INTO `mirrors` (`date`, `dist`, `up`, `down`) VALUES (%s, %s, %s, %s)', + (dt, dist, dists[dist]['up'], dists[dist]['down']) + ) - for dist in dists: - c.execute('INSERT INTO `mirrors` (`date`, `dist`, `up`, `down`) VALUES (%s, %s, %s, %s)', - (dt, dist, dists[dist]['up'], dists[dist]['down'])) if __name__ == '__main__': to_mysql(process_file(os.path.join(LOG_PATH, LOG_NAME))) diff --git a/modules/ocf_mirrors/manifests/init.pp b/modules/ocf_mirrors/manifests/init.pp index e37a1070c..5bc16487c 100644 --- a/modules/ocf_mirrors/manifests/init.pp +++ b/modules/ocf_mirrors/manifests/init.pp @@ -177,6 +177,7 @@ command => '/usr/local/sbin/record-mirrors-stats', minute => 0, hour => 0, + environment => ["OCFSTATS_PWD=${ocfstats_password}"]; } } From 2846a0430cefcc9ab07b072fa83417de7601bcc2 Mon Sep 17 00:00:00 2001 From: Abizer Lokhandwala Date: Mon, 26 Jun 2017 13:48:04 -0700 Subject: [PATCH 4/6] rename mirrors stats script, add argparse --- ...-mirrors-stats.py => process-mirrors-logs} | 25 ++++++++++++++++--- modules/ocf_mirrors/manifests/init.pp | 4 +-- 2 files changed, 24 insertions(+), 5 deletions(-) rename modules/ocf_mirrors/files/{record-mirrors-stats.py => process-mirrors-logs} (65%) diff --git a/modules/ocf_mirrors/files/record-mirrors-stats.py b/modules/ocf_mirrors/files/process-mirrors-logs similarity index 65% rename from modules/ocf_mirrors/files/record-mirrors-stats.py rename to modules/ocf_mirrors/files/process-mirrors-logs index 01b34fef0..27f80071f 100644 --- a/modules/ocf_mirrors/files/record-mirrors-stats.py +++ b/modules/ocf_mirrors/files/process-mirrors-logs @@ -1,4 +1,5 @@ #!/usr/bin/python +import argparse import os from datetime import date @@ -40,8 +41,8 @@ def process_file(fn): return dists -def to_mysql(dists): - dt = date.today() +def to_mysql(dists, dt=None, quiet=False): + dt = dt or date.today() conn = pymysql.connect( host='mysql.ocf.berkeley.edu', user='ocfstats', @@ -58,6 +59,24 @@ def to_mysql(dists): (dt, dist, dists[dist]['up'], dists[dist]['down']) ) + if not quiet: + print('{:20} {:8} {:8}'.format(dist, dists[dist]['up'], dists[dist]['down'])) + if __name__ == '__main__': - to_mysql(process_file(os.path.join(LOG_PATH, LOG_NAME))) + parser = argparse.ArgumentParser(description='Process mirrors logs to calculate network usage ' + 'and store in ocfstats') + parser.add_argument('-q', '--quiet', action='store_true', + help='do not print stats after collecting them') + parser.add_argument('log_file', nargs='?', default=os.path.join(LOG_PATH, LOG_NAME), + help='log file to parse') + parser.add_argument('date', nargs='?', default=date.today(), + help='date this log file corresponds to') + + args = parser.parse_args() + + to_mysql( + process_file(args.log_file), + args.date, + args.quiet + ) diff --git a/modules/ocf_mirrors/manifests/init.pp b/modules/ocf_mirrors/manifests/init.pp index 5bc16487c..2caf01fcc 100644 --- a/modules/ocf_mirrors/manifests/init.pp +++ b/modules/ocf_mirrors/manifests/init.pp @@ -170,11 +170,11 @@ } file { '/usr/local/sbin/record-mirrors-stats': - source => 'puppet:///private/stats/record-mirrors-stats.py', + source => 'puppet:///modules/ocf_mirrors/record-mirrors-stats', mode => '0640', } -> cron { 'mirrors-stats': - command => '/usr/local/sbin/record-mirrors-stats', + command => '/usr/local/sbin/record-mirrors-stats --quiet', minute => 0, hour => 0, environment => ["OCFSTATS_PWD=${ocfstats_password}"]; From 0fc9eb5d1799b1fb22b561203d727d1fa9b7dc98 Mon Sep 17 00:00:00 2001 From: Abizer Lokhandwala Date: Wed, 2 Aug 2017 02:38:26 -0700 Subject: [PATCH 5/6] messed up the rebase --- .../ocf_mirrors/files/process-mirrors-logs | 73 +++++++++++++------ 1 file changed, 52 insertions(+), 21 deletions(-) diff --git a/modules/ocf_mirrors/files/process-mirrors-logs b/modules/ocf_mirrors/files/process-mirrors-logs index 27f80071f..418c45d45 100644 --- a/modules/ocf_mirrors/files/process-mirrors-logs +++ b/modules/ocf_mirrors/files/process-mirrors-logs @@ -1,7 +1,9 @@ -#!/usr/bin/python +#!/usr/bin/python3 import argparse import os from datetime import date +from datetime import timedelta +from datetime import datetime import pymysql @@ -10,22 +12,21 @@ MIRRORS_PATH = '/opt/mirrors/ftp' LOG_PATH = '/var/log/apache2' LOG_NAME = 'mirrors.ocf.berkeley.edu_access.log' -sources = [mirrored for mirrored in os.listdir(MIRRORS_PATH) - if os.path.isdir(os.path.join(MIRRORS_PATH, mirrored)) and not mirrored.startswith('.')] - -sources.append('other') # catchall - - -def build_dists(): - return {mirrored: {'up': 0, 'down': 0} for mirrored in sources} - - -def process_file(fn): - dists = build_dists() +def process_log(dists, fn, log_date): with open(fn, 'r') as f: + n = 0 for line in f: + n += 1 stats = line.split() + # apache log date format looks like [11/Jul/2017:00:05:16 -0700] + # line.split()[3][1:12] = 11/Jul/2017 + # we need to dump dates that don't match because + # logrotate rotates the logs at 6am + line_date = datetime.strptime(stats[3][1:12], '%d/%b/%Y') + if line_date.date() != log_date: + continue + # extract dist name from request url # '/debian/pool/main/h/hwdata/...' -> 'debian' dist = stats[6] @@ -38,6 +39,10 @@ def process_file(fn): else: dists['other']['up'] += int(stats[-2]) dists['other']['down'] += int(stats[-1]) + + if n % 10000 is 0: + print(n, "lines processed from", fn) + return dists @@ -60,23 +65,49 @@ def to_mysql(dists, dt=None, quiet=False): ) if not quiet: - print('{:20} {:8} {:8}'.format(dist, dists[dist]['up'], dists[dist]['down'])) + print('{:20} {:8} {:8}'.format(dist, + _humanize(dists[dist]['up']), + _humanize(dists[dist]['down']))) +def _humanize(n): + for unit in ['', 'KB', 'MB', 'GB', 'TB', 'PB']: + if n < 1024.0: + return '{:3.2f} {}'.format(n, unit) + n /= 1024.0 if __name__ == '__main__': parser = argparse.ArgumentParser(description='Process mirrors logs to calculate network usage ' 'and store in ocfstats') parser.add_argument('-q', '--quiet', action='store_true', help='do not print stats after collecting them') - parser.add_argument('log_file', nargs='?', default=os.path.join(LOG_PATH, LOG_NAME), - help='log file to parse') - parser.add_argument('date', nargs='?', default=date.today(), - help='date this log file corresponds to') + parser.add_argument('log_files', nargs='*', + help='log file(s) to process') + parser.add_argument('date', nargs='?', default=date.today() - timedelta(1), + help='date for use in filtering log entries') args = parser.parse_args() + if not args.log_files: + log = os.path.join(LOG_PATH, LOG_NAME) + # logrotate rotates the log at 6am, but this script + # runs at midnight. So to capture 24h of data, we need + # to parse the rotated log as well. + log_files = [log, log + '.1'] + else: + log_files = args.log_files + + sources = [mirrored for mirrored in os.listdir(MIRRORS_PATH) + if os.path.isdir(os.path.join(MIRRORS_PATH, mirrored)) and not mirrored.startswith('.')] + + sources.append('other') # catchall + + dists = {mirrored: {'up': 0, 'down': 0} for mirrored in sources} + + for fn in log_files: + dists = process_log(dists, fn, args.date) + to_mysql( - process_file(args.log_file), - args.date, - args.quiet + dists = dists, + dt = args.date, + quiet = args.quiet ) From 1a00549879dc3801c91df16580f40a13abb5efb5 Mon Sep 17 00:00:00 2001 From: Abizer Lokhandwala Date: Wed, 2 Aug 2017 02:43:14 -0700 Subject: [PATCH 6/6] messed up more than the rebase --- modules/ocf_mirrors/files/process-mirrors-logs | 3 --- modules/ocf_mirrors/manifests/init.pp | 13 ++++++------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/modules/ocf_mirrors/files/process-mirrors-logs b/modules/ocf_mirrors/files/process-mirrors-logs index 418c45d45..46f8aae78 100644 --- a/modules/ocf_mirrors/files/process-mirrors-logs +++ b/modules/ocf_mirrors/files/process-mirrors-logs @@ -40,9 +40,6 @@ def process_log(dists, fn, log_date): dists['other']['up'] += int(stats[-2]) dists['other']['down'] += int(stats[-1]) - if n % 10000 is 0: - print(n, "lines processed from", fn) - return dists diff --git a/modules/ocf_mirrors/manifests/init.pp b/modules/ocf_mirrors/manifests/init.pp index 2caf01fcc..6d9ae432f 100644 --- a/modules/ocf_mirrors/manifests/init.pp +++ b/modules/ocf_mirrors/manifests/init.pp @@ -170,14 +170,13 @@ } file { '/usr/local/sbin/record-mirrors-stats': - source => 'puppet:///modules/ocf_mirrors/record-mirrors-stats', - mode => '0640', + source => 'puppet:///modules/ocf_mirrors/record-mirrors-stats', + mode => '0640', } -> cron { 'mirrors-stats': - command => '/usr/local/sbin/record-mirrors-stats --quiet', - minute => 0, - hour => 0, - environment => ["OCFSTATS_PWD=${ocfstats_password}"]; + command => '/usr/local/sbin/record-mirrors-stats --quiet', + minute => 0, + hour => 0, + environment => ["OCFSTATS_PWD=${ocfstats_password}"]; } - }