Merge pull request #31 from mrname/urls_taken_more_time

Urls taken more time
haralyzer · Oct 11, 2019 · 2bcfb77 · 2bcfb77
2 parents f537a1b + 518c366
commit 2bcfb77
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 4 deletions.
diff --git a/README.rst b/README.rst
@@ -198,7 +198,10 @@ easily produced using the public methods of ``HarParser`` and ``HarPage``::
     # * status_code ('200' for example)
     # * request_type ('GET' for example)
     # * http_version ('HTTP/1.1' for example)
-    # It will use a regex by default, but you can also force a literal string match by passing regex=False
+    # * load_time__gt (Takes an int representing load time in milliseconds.
+    #   Entries with a load time greater than this will be included in the
+    #   results.)
+    # Parameters that accept a string use a regex by default, but you can also force a literal string match by passing regex=False
 
     # Get the size of the collection we just made #
     collection_size = har_page.get_total_size(entries)

diff --git a/haralyzer/assets.py b/haralyzer/assets.py
@@ -3,6 +3,7 @@
 """
 
 import datetime
+import re
 
 import dateutil
 from collections import Counter
@@ -12,7 +13,6 @@
 from dateutil import parser
 
 assert parser
-import re
 
 from .compat import iteritems
 from .errors import PageNotFoundError
@@ -306,15 +306,20 @@ def _get_asset_load(self, asset_type):
                 content_type=self.asset_types[asset_type]
             )
 
-    def filter_entries(self, request_type=None, content_type=None,
-                       status_code=None, http_version=None, regex=True):
+    def filter_entries(
+        self, request_type=None, content_type=None,  status_code=None,
+        http_version=None, load_time__gt=None, regex=True
+    ):
         """
         Returns a ``list`` of entry objects based on the filter criteria.
 
         :param request_type: ``str`` of request type (i.e. - GET or POST)
         :param content_type: ``str`` of regex to use for finding content type
         :param status_code: ``int`` of the desired status code
         :param http_version: ``str`` of HTTP version of request
+        :param load_time__gt: ``int`` of a load time in milliseconds. If
+            provided, an entry whose load time is less than this value will
+            be excluded from the results.
         :param regex: ``bool`` indicating whether to use regex or exact match.
         """
         results = []
@@ -345,6 +350,8 @@ def filter_entries(self, request_type=None, content_type=None,
             if http_version is not None and not p.match_http_version(
                     entry, http_version, regex=regex):
                 valid_entry = False
+            if load_time__gt is not None and entry.get('time') < load_time__gt:
+                valid_entry = False
 
             if valid_entry:
                 results.append(entry)

diff --git a/tests/test_page.py b/tests/test_page.py
@@ -89,6 +89,20 @@ def test_filter_entries(har_data):
     entries = page.filter_entries(request_type='.*ET', content_type='image.*',
                                   status_code='3.*')
 
+def test_filter_entries_load_time(har_data):
+    """
+    Tests ability to filter entries by load time
+    """
+    init_data = har_data('humanssuck.net_duplicate_url.har')
+    page = HarPage(PAGE_ID, har_data=init_data)
+
+    entries = page.filter_entries(load_time__gt=100)
+    assert len(entries) == 4
+    entries = page.filter_entries(load_time__gt=300)
+    assert len(entries) == 3
+    entries = page.filter_entries(load_time__gt=500)
+    assert len(entries) == 0
+
 
 def test_get_load_time(har_data):
     """