Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ bootstrap.json
coverage.xml
/htmlcov
node_modules/
/bin/accesslog.csv
1 change: 1 addition & 0 deletions api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def prefix(path, routes):
route('/resolve', ResolveHandler, h='resolve', m=['POST']),
route('/schemas/<schema:{schema}>', SchemaHandler, m=['GET']),
route('/report/<report_type:site|project|accesslog|usage>', ReportHandler, m=['GET']),
route('/report/accesslog/types', ReportHandler, h='get_types', m=['GET']),


# Search
Expand Down
1 change: 1 addition & 0 deletions api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ def initialize_db():

if __config['core']['access_log_enabled']:
log_db.access_log.create_index('context.ticket_id')
log_db.access_log.create_index([('timestamp', pymongo.DESCENDING)])

create_or_recreate_ttl_index('authtokens', 'timestamp', 2592000)
create_or_recreate_ttl_index('uploads', 'timestamp', 60)
Expand Down
97 changes: 94 additions & 3 deletions api/handlers/reporthandler.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,48 @@
import copy
import unicodecsv as csv
import datetime
import os

import bson
import dateutil
import pymongo

from .. import config
from .. import tempdir as tempfile
from .. import util
from ..web import base

from ..web.request import AccessTypeList


EIGHTEEN_YEARS_IN_SEC = 18 * 365.25 * 24 * 60 * 60
BYTES_IN_MEGABYTE = float(1<<20)
ACCESS_LOG_FIELDS = [
"_id",
"access_type",
"context.acquisition.id",
"context.acquisition.label",
"context.analysis.id",
"context.analysis.label",
"context.collection.id",
"context.collection.label",
"context.group.id",
"context.group.label",
"context.project.id",
"context.project.label",
"context.session.id",
"context.session.label",
"context.subject.id",
"context.subject.label",
"context.ticket_id",
"origin.id",
"origin.method",
"origin.name",
"origin.type",
"request_method",
"request_path",
"timestamp"
]

class APIReportException(Exception):
pass
Expand All @@ -25,6 +56,9 @@ class ReportHandler(base.RequestHandler):
def __init__(self, request=None, response=None):
super(ReportHandler, self).__init__(request, response)

def get_types(self):
return AccessTypeList

def get(self, report_type):

report = None
Expand All @@ -39,7 +73,26 @@ def get(self, report_type):
raise NotImplementedError('Report type {} is not supported'.format(report_type))

if self.superuser_request or report.user_can_generate(self.uid):
return report.build()
# If csv is true create a temp file to respond with
if report_type == 'accesslog' and self.request.params.get('csv') == 'true':

tempdir = tempfile.TemporaryDirectory(prefix='.tmp', dir=config.get_item('persistent', 'data_path'))
csv_file = open(os.path.join(tempdir.name, 'accesslog.csv'), 'w+')
writer = csv.DictWriter(csv_file, ACCESS_LOG_FIELDS)
writer.writeheader()
try:
for doc in report.build():
writer.writerow(doc)

except APIReportException as e:
self.abort(404, str(e))
# Need to close and reopen file to flush buffer into file
csv_file.close()
self.response.app_iter = open(os.path.join(tempdir.name, 'accesslog.csv'), 'r')
self.response.headers['Content-Type'] = 'text/csv'
self.response.headers['Content-Disposition'] = 'attachment; filename="accesslog.csv"'
else:
return report.build()
else:
self.abort(403, 'User {} does not have required permissions to generate report'.format(self.uid))

Expand Down Expand Up @@ -437,6 +490,9 @@ def __init__(self, params):
:end_date: ISO formatted timestamp
:uid: user id of the target user
:limit: number of records to return
:subject: subject code of session accessed
:access_types: list of access_types to filter logs
:csv: Boolean if user wants csv file
"""

super(AccessLogReport, self).__init__(params)
Expand All @@ -445,6 +501,12 @@ def __init__(self, params):
end_date = params.get('end_date')
uid = params.get('user')
limit= params.get('limit', 100)
subject = params.get('subject', None)
if params.get('bin') == 'true':
access_types = params.get('access_types', [])
else:
access_types = params.getall('access_types')
csv_bool = (params.get('csv') == 'true')

if start_date:
start_date = dateutil.parser.parse(start_date)
Expand All @@ -460,12 +522,19 @@ def __init__(self, params):
raise APIReportParamsException('Limit must be an integer greater than 0.')
if limit < 1:
raise APIReportParamsException('Limit must be an integer greater than 0.')
elif limit > 10000:
raise APIReportParamsException('Limit exceeds 10,000 entries, please contact admin to run script.')
for access_type in access_types:
if access_type not in AccessTypeList:
raise APIReportParamsException('Not a valid access type')

self.start_date = start_date
self.end_date = end_date
self.uid = uid
self.limit = limit

self.subject = subject
self.access_types = access_types
self.csv_bool = csv_bool

def user_can_generate(self, uid):
"""
Expand All @@ -475,6 +544,20 @@ def user_can_generate(self, uid):
return True
return False

def flatten(self, json_obj, flat, prefix = ""):
"""
flattens a document to not have nested objects
"""

for field in json_obj.keys():
if isinstance(json_obj[field], dict):
flat = self.flatten(json_obj[field], flat, prefix = prefix + field + ".")
else:
flat[prefix + field] = json_obj[field]
return flat

def make_csv_ready(self, cursor):
return [self.flatten(json_obj, {}) for json_obj in cursor]

def build(self):
query = {}
Expand All @@ -487,8 +570,16 @@ def build(self):
query['timestamp']['$gte'] = self.start_date
if self.end_date:
query['timestamp']['$lte'] = self.end_date
if self.subject:
query['context.subject.label'] = self.subject
if self.access_types:
query['access_type'] = {'$in': self.access_types}

cursor = config.log_db.access_log.find(query).limit(self.limit).sort('timestamp', pymongo.DESCENDING).batch_size(1000)
if self.csv_bool:
return self.make_csv_ready(cursor)

return config.log_db.access_log.find(query).limit(self.limit).sort('timestamp', pymongo.DESCENDING)
return cursor

class UsageReport(Report):
"""
Expand Down
1 change: 1 addition & 0 deletions api/web/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
'user_login': 'user_login',
'user_logout': 'user_logout'
})
AccessTypeList = [type_name for type_name, member in AccessType.__members__.items()]


class SciTranRequest(Request):
Expand Down
80 changes: 80 additions & 0 deletions bin/log_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# This implementation as of July 19 2017 has these resource utilizations of the mongodb container:
# - 2 million entries: 1.50 Gb
# - 3 million entries: 2.05 Gb
# The entire docker application was given 6 Gb to use, when given the default 2 Gb,
# the process would frequently crash before 1 million entries were downloaded.

import argparse
import unicodecsv as csv
import pymongo
import tarfile
import sys
import logging
import datetime

from api.web.request import AccessTypeList
from api import config
from api.handlers.reporthandler import AccessLogReport, ACCESS_LOG_FIELDS

ARG_TO_PARAMS= {
'l': 'limit',
's': 'start_date',
'e': 'end_date',
'u': 'uid',
'j': 'subject',
't': 'access_types'
}

def download_large_csv(params):
"""
Script to download large csv files to avoid uwsgi worker running out of memory.
"""
entries = int(params['limit'])
params['csv'] = "true"
params['bin'] = "true"
params['limit'] = "100000"

csv_file = open('accesslog.csv', 'w+')
writer = csv.DictWriter(csv_file, ACCESS_LOG_FIELDS)
writer.writeheader()
unicode_err_count = 0
while entries > 0:
print "{} entries left".format(entries)
params['limit'] = str(min(entries, 100000))
report = AccessLogReport(params)
rep = report.build()
end_date = str(rep[-1]['timestamp'])
for doc in rep[:-1]:
entries = entries - 1
writer.writerow(doc)

if len(rep) == 1:
entries = 0
writer.writerow(rep[0])
if len(rep) < int(params['limit']) - 1:
entries = 0
csv_file.flush()
params['end_date'] = end_date


print "Encountered unicode errors and skipped {} entries".format(unicode_err_count)
csv_file.close()

def format_arg(args):
return {ARG_TO_PARAMS[arg]: args[arg] for arg in args if args[arg] != None}

if __name__ == '__main__':
try:
parser = argparse.ArgumentParser()
parser.add_argument("-s", help="Start date", type=str)
parser.add_argument("-e", help="End date", type=str)
parser.add_argument("-u", help="User id", type=str)
parser.add_argument("-l", help="Limit", type=str)
parser.add_argument("-j", help="subJect", type=str)
parser.add_argument("-t", help="list of access Types", type=str, nargs='+')

args = vars(parser.parse_args())
download_large_csv(format_arg(args))
except Exception as e:
logging.exception('Unexpected error in log_csv.py')
sys.exit(1)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ uwsgi==2.0.13.1
webapp2==2.5.2
WebOb==1.5.1
git+https://github.com/flywheel-io/gears.git@v0.1.1#egg=gears
unicodecsv==0.9.0
61 changes: 59 additions & 2 deletions test/integration_tests/python/test_reports.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import calendar
import copy
import datetime

from api.web.request import AccessTypeList

# create timestamps for report filtering
today = datetime.datetime.today()
Expand Down Expand Up @@ -93,7 +93,7 @@ def test_project_report(data_builder, as_admin, as_user):
assert len(project_report['projects']) == 2


def test_access_log_report(with_user, as_user, as_admin):
def test_access_log_report(data_builder, with_user, as_user, as_admin):
# try to get access log report as user
r = as_user.get('/report/accesslog')
assert r.status_code == 403
Expand All @@ -116,6 +116,12 @@ def test_access_log_report(with_user, as_user, as_admin):
r = as_admin.get('/report/accesslog', params={'limit': 0})
assert r.status_code == 400

# try to get report w/ limit == 1000 and limit > 1000
r = as_admin.get('/report/accesslog', params={'limit': 10000})
assert r.ok
r = as_admin.get('/report/accesslog', params={'limit': 10001})
assert r.status_code == 400

# get access log report for user
r = as_admin.get('/report/accesslog', params={
'start_date': yesterday_ts, 'end_date': tomorrow_ts, 'user': with_user.user
Expand All @@ -135,6 +141,57 @@ def test_access_log_report(with_user, as_user, as_admin):
assert len(accesslog) == 1
assert accesslog[0]['access_type'] == 'user_login'

# get access log report of certain subject
project = data_builder.create_project()
r = as_admin.post('/sessions', json={
'project': project,
'label': 'test-accesslog-session',
'timestamp': '1979-01-01T00:00:00+00:00',
'subject': {'code': 'compliant5'}
})
assert r.ok
session = r.json()['_id']

# In order to have two logs of this subject (POST does not create a log)
r = as_admin.get('/sessions/' + session)
assert r.ok
session = r.json()['_id']
r = as_admin.get('/sessions/' + session)
assert r.ok

r = as_admin.get('/report/accesslog', params={'subject': 'compliant5'})
assert r.ok
for count,log in enumerate(r.json(), start = 1):
assert log.get('context', {}).get('subject', {}).get('label') == 'compliant5'
assert count == 2
r = as_admin.delete('/sessions/' + session)
data_builder.delete_project(project, recursive=True)

# get access log report of certain access types
r = as_admin.get('/report/accesslog', params={'access_types': ['user_login', 'view_container']})
assert r.ok
ul, vc = False, False

# test that each item in log is either view_container or user_login
for log in r.json():
assert log.get('access_type') in ['user_login', 'view_container']
if log.get('access_type') == 'user_login':
ul = True
elif log.get('access_type') == 'view_container':
vc = True
assert ul and vc

# Download .csv file
r = as_admin.get('/report/accesslog', params={'csv': 'true'})
assert r.ok

r.content[0][:3] == '_id'

# get the access types
r = as_admin.get('/report/accesslog/types')
assert r.ok
assert r.json() == AccessTypeList


def test_usage_report(data_builder, file_form, as_user, as_admin):
# try to get usage report as user
Expand Down