In [None]:
!pip install haralyzer

import haralyzer
import json
from haralyzer import HarPage
from haralyzer import HarParser

import numpy as np



In [None]:
def func(file_name):
  with open(file_name, 'r') as f:
      har_data = json.load(f)

  har_page = HarPage('page_1', har_data=har_data)
# Q1
  # times are in milliseconds
  # page load time
  print("Page load time: " + str(har_page.page_load_time))

  # TTFB
  page_load_entry = None
  for entry in har_data['log']['entries']:
      if 'pageref' in entry and entry['pageref'] == 'page_1':
          page_load_entry = entry
          break

  if page_load_entry is not None and 'timings' in page_load_entry:
      timings = page_load_entry['timings']
      if 'wait' in timings:
          wait_time = timings['wait']
          print("TTFB:", wait_time)
  else:
      print("Page load entry not found")

# Q2........
  get_request_count = 0
  for entry in har_data['log']['entries']:
      if entry['request']['method'] == 'GET':
          get_request_count += 1

  print("Total number of HTTP GET requests:", get_request_count)

# Q3
  mime_types = set()

  for entry in har_data['log']['entries']:
      content_type = entry['response']['content']['mimeType']
      mime_types.add(content_type)

  print("Different MIME types observed in the requests:")
  for mime_type in mime_types:
      print(mime_type)

# Q4
  total_get_requests = 0
  image_requests = 0
  javascript_requests = 0
  html_css_requests = 0

  for entry in har_data['log']['entries']:
      if entry['request']['method'] == 'GET':
          total_get_requests += 1
          content_type = entry['response']['content']['mimeType']
          if content_type.startswith('image/'):
              image_requests += 1
          elif content_type == 'application/javascript':
              javascript_requests += 1
          elif content_type in ('text/html', 'text/css'):
              html_css_requests += 1

  image_fraction = image_requests / total_get_requests if total_get_requests > 0 else 0
  javascript_fraction = javascript_requests / total_get_requests if total_get_requests > 0 else 0
  html_css_fraction = html_css_requests / total_get_requests if total_get_requests > 0 else 0

  print("Fraction of GET requests corresponding to images:", image_fraction)
  print("Fraction of GET requests corresponding to JavaScript:", javascript_fraction)
  print("Fraction of GET requests corresponding to HTML and CSS:", html_css_fraction)

# Q5
  total_size = 0
  image_size = 0
  image_sizes = []

  for entry in har_data['log']['entries']:
      content_size = entry['response']['content']['size']
      total_size += content_size
      content_type = entry['response']['content']['mimeType']
      if content_type.startswith('image/'):
          image_size += content_size
          image_sizes.append(content_size)

  image_fraction = image_size / total_size if total_size > 0 else 0

  mean_image_size = np.mean(image_sizes) if image_sizes else 0
  median_image_size = np.median(image_sizes) if image_sizes else 0

  # Print the results
  print("Total size of assets downloaded:", total_size)
  print("Fraction of size constituted by images:", image_fraction)
  print("Mean size of images:", mean_image_size)
  print("Median size of images:", median_image_size)

In [None]:
# Prints the analysis of the har files
har_files = ['deccan.har', 'jagaran.har', 'mit.har', 'usach.har', 'sinu.har']

for file_name in har_files:
  func(file_name)
  print()

Page load time: 6878.402999998798
TTFB: 60.63600000050337
Total number of HTTP GET requests: 35
Different MIME types observed in the requests:
font/woff2
image/png
image/svg+xml
text/css
image/avif
x-unknown
text/javascript
application/javascript
text/plain
text/html
Fraction of GET requests corresponding to images: 0.2571428571428571
Fraction of GET requests corresponding to JavaScript: 0.3142857142857143
Fraction of GET requests corresponding to HTML and CSS: 0.08571428571428572
Total size of assets downloaded: 3908446
Fraction of size constituted by images: 0.012061059561779797
Mean size of images: 5237.777777777777
Median size of images: 1814.0

Page load time: 1637.8069999991567
TTFB: 50.57699999987892
Total number of HTTP GET requests: 52
Different MIME types observed in the requests:
font/woff2
image/svg+xml
application/json
image/jpeg
text/json
text/css
x-unknown
application/javascript
text/plain
text/html
image/x-icon
Fraction of GET requests corresponding to images: 0.1346153