Skip to content

Commit

Permalink
Merge pull request #31 from mrname/urls_taken_more_time
Browse files Browse the repository at this point in the history
Urls taken more time
  • Loading branch information
mrname committed Oct 11, 2019
2 parents f537a1b + 518c366 commit 2bcfb77
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 4 deletions.
5 changes: 4 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,10 @@ easily produced using the public methods of ``HarParser`` and ``HarPage``::
# * status_code ('200' for example)
# * request_type ('GET' for example)
# * http_version ('HTTP/1.1' for example)
# It will use a regex by default, but you can also force a literal string match by passing regex=False
# * load_time__gt (Takes an int representing load time in milliseconds.
# Entries with a load time greater than this will be included in the
# results.)
# Parameters that accept a string use a regex by default, but you can also force a literal string match by passing regex=False

# Get the size of the collection we just made #
collection_size = har_page.get_total_size(entries)
Expand Down
13 changes: 10 additions & 3 deletions haralyzer/assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import datetime
import re

import dateutil
from collections import Counter
Expand All @@ -12,7 +13,6 @@
from dateutil import parser

assert parser
import re

from .compat import iteritems
from .errors import PageNotFoundError
Expand Down Expand Up @@ -306,15 +306,20 @@ def _get_asset_load(self, asset_type):
content_type=self.asset_types[asset_type]
)

def filter_entries(self, request_type=None, content_type=None,
status_code=None, http_version=None, regex=True):
def filter_entries(
self, request_type=None, content_type=None, status_code=None,
http_version=None, load_time__gt=None, regex=True
):
"""
Returns a ``list`` of entry objects based on the filter criteria.
:param request_type: ``str`` of request type (i.e. - GET or POST)
:param content_type: ``str`` of regex to use for finding content type
:param status_code: ``int`` of the desired status code
:param http_version: ``str`` of HTTP version of request
:param load_time__gt: ``int`` of a load time in milliseconds. If
provided, an entry whose load time is less than this value will
be excluded from the results.
:param regex: ``bool`` indicating whether to use regex or exact match.
"""
results = []
Expand Down Expand Up @@ -345,6 +350,8 @@ def filter_entries(self, request_type=None, content_type=None,
if http_version is not None and not p.match_http_version(
entry, http_version, regex=regex):
valid_entry = False
if load_time__gt is not None and entry.get('time') < load_time__gt:
valid_entry = False

if valid_entry:
results.append(entry)
Expand Down
14 changes: 14 additions & 0 deletions tests/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,20 @@ def test_filter_entries(har_data):
entries = page.filter_entries(request_type='.*ET', content_type='image.*',
status_code='3.*')

def test_filter_entries_load_time(har_data):
"""
Tests ability to filter entries by load time
"""
init_data = har_data('humanssuck.net_duplicate_url.har')
page = HarPage(PAGE_ID, har_data=init_data)

entries = page.filter_entries(load_time__gt=100)
assert len(entries) == 4
entries = page.filter_entries(load_time__gt=300)
assert len(entries) == 3
entries = page.filter_entries(load_time__gt=500)
assert len(entries) == 0


def test_get_load_time(har_data):
"""
Expand Down

0 comments on commit 2bcfb77

Please sign in to comment.