### Read lines of log file and compute the average runtime for all operations ###

In [1]:
# Import package(s).
import os
import numpy as np
from datetime import datetime

**Content of Log File `operations.txt`<br>**

2017-02-01T10:00 Operation ABC Start<br>
2017-02-01T10:01 Operation ABC End<br>
2017-02-01T10:02 Operation DEF Start<br>
2017-02-01T10:08 Operation XYZ Start<br>
2017-02-01T20:09 Operation WXY Start<br>
2017-02-01T20:10 Operation XYZ End<br>
2017-02-01T20:12 Operation WXY End<br>
2017-02-01T10:05 Operation DEF End

In [2]:
# Declare variable(s).
dict_operations = {}

lst_durations = []

DateTimeFormat = "%Y-%m-%dT%H:%M"

downloads_dir_path = os.getenv('DOWNLOADS_PATH')

operations_file_path = os.path.join(downloads_dir_path, 'operations.txt')

In [3]:
# Open file in read-only mode.
with open(operations_file_path, 'r') as operations:
    
    for operation in operations:

        # Print current line.
        print(operation, end='')
        
        # Split items of line by whitespace.
        operation_items = operation.split()
        
        # Capture Operation Time. E.g., 2017-02-01T10:00
        operation_time = operation_items[0]

        # Capture Operation Name. E.g., ABC
        operation_name = operation_items[2]
        
        # Capture Operation Type. E.g., Start
        operation_type = operation_items[3]

        # Capture Start Time.
        if operation_type == 'Start':
        
            # If entry for Operation already exists ...
            if dict_operations.get(operation_name):
                
                # Capture Start Time and, if End Time exists, retain End Time.
                dict_operations[operation_name] = (operation_time, dict_operations[operation_name][1], None)
            
            # Entry for Operation does not exist ...
            else:

                # Capture only Start Time.
                dict_operations[operation_name] = (operation_time, None, None)

        # Capture End Time.
        elif operation_type == 'End':
        
            # If entry for Operation already exists ...
            if dict_operations.get(operation_name):
                
                # Capture End Time and, if Start Time exists, retain Start Time.
                dict_operations[operation_name] = (dict_operations[operation_name][0], operation_time, None)
                
            # Entry for Operation does not exist ...
            else:

                # Capture only End Time.
                dict_operations[operation_name] = (None, operation_time, None)
                
        # When Start Time and End Time exist, calculate Duration (in minutes).
        if dict_operations[operation_name][0] and dict_operations[operation_name][1]:
            
            start_time = dict_operations[operation_name][0]
            
            end_time = dict_operations[operation_name][1]
            
            # Compute Duration: End Time - Start Time
            duration = (datetime.strptime(end_time, DateTimeFormat)
                        - datetime.strptime(start_time, DateTimeFormat))
            
            # Extract Minutes from Duration.
            duration_minutes = duration.total_seconds() / 60
            
            # Store Duration.
            dict_operations[operation_name] = (start_time, end_time, duration_minutes)

# Print captured content.
dict_operations

2017-02-01T10:00 Operation ABC Start
2017-02-01T10:01 Operation ABC End
2017-02-01T10:02 Operation DEF Start
2017-02-01T10:08 Operation XYZ Start
2017-02-01T20:09 Operation WXY Start
2017-02-01T20:10 Operation XYZ End
2017-02-01T20:12 Operation WXY End
2017-02-01T10:05 Operation DEF End

{'ABC': ('2017-02-01T10:00', '2017-02-01T10:01', 1.0),
 'DEF': ('2017-02-01T10:02', '2017-02-01T10:05', 3.0),
 'XYZ': ('2017-02-01T10:08', '2017-02-01T20:10', 602.0),
 'WXY': ('2017-02-01T20:09', '2017-02-01T20:12', 3.0)}

In [4]:
# Create list of all Duration entries.
lst_durations = [operations[2] for operations in dict_operations.values()]

# Print list of Duration entries.
lst_durations

[1.0, 3.0, 602.0, 3.0]

In [5]:
# Compute mean of Durations.
average_run_time = np.mean(lst_durations)

# Print Average Run Time.
print(f'Average Run Time: {average_run_time} Minutes')

Average Run Time: 152.25 Minutes
