In [1]:
def resource(system_level_events):
    """
    Adds resource statistics to the system level events given.

    :param system_level_events: pandas DataFrame containing all system level events
    :return: pandas Dataframe with one line for all system level events of one segment
    """
    log = system_level_events.copy()
    
    if log.empty:
        return log
    
    log = log.groupby('segment_name')
    log = log[["ratio_handovers", "ratio_same_user", "ratio_start_end", "ratio_user1_start", "ratio_user1_end", "ratio_workload"]].agg(list).join(log.size().rename('count'))
    
    if log.empty:
        return log
    
    log["mean_handovers"] = np.mean(log["ratio_handovers"].tolist(), axis=1)
    log["std_handovers"] = np.std(log["ratio_handovers"].tolist(), axis=1)
    
    log["mean_same_user"] = np.mean(log["ratio_same_user"].tolist(), axis=1)
    log["std_same_user"] = np.std(log["ratio_same_user"].tolist(), axis=1)
    
    log["mean_user1_start"] = np.mean(log["ratio_user1_start"].tolist(), axis=1)
    log["std_user1_start"] = np.std(log["ratio_user1_start"].tolist(), axis=1)
    
    log["mean_user1_end"] = np.mean(log["ratio_user1_end"].tolist(), axis=1)
    log["std_user1_end"] = np.std(log["ratio_user1_end"].tolist(), axis=1)
    
    log["mean_start_end"] = np.mean(log["ratio_start_end"].tolist(), axis=1)
    log["std_start_end"] = np.std(log["ratio_start_end"].tolist(), axis=1)
    
    log["mean_workload"] = np.mean(log["ratio_workload"].tolist(), axis=1)
    log["std_workload"] = np.std(log["ratio_workload"].tolist(), axis=1)
    
    cols = log.columns.tolist()
    cols = cols[6:] + cols[:6]
    log = log[cols]
    
    return log

In [2]:
def combine_resources(event_log, segments, tresholds, event_type="batching"):
    """
    Combines resource rows from different segments in a pandas DataFrame

    :param event_log: event log used (Performance_Spectrum.EventLog)
    :param segments: list of segments to compute resource statistics for
    :param tresholds: tresholds for the computation of system level events
    :param event_type: type of system level event ('batching' or None)
    :return: pandas Dataframe containing all resource statistics for all segments
    """
    dfs = []
    print("segment:", end = " ")
    for i, segment in enumerate(segments):
        print(str(i), end = " ")
        segs = segment_level_events(event_log, segment)
        
        if event_type == "high_load":
            events = system_level_events_batching_on_start(segs, tresholds[i])
        elif event_type == "high_workload":
            events = system_level_events_high_workload(segs)
        else:
            events = system_level_events_batching_on_end(segs, tresholds[i])
        
        resources = resource(events)
        
        if not events.empty: 
            dfs.append(resources)
    
    if dfs:
        df = pd.concat(dfs)
        df["id"] = range(len(df))
        cols = df.columns.tolist()
        cols = [cols[-1]] + cols[:-1]
        df = df[cols]
        
        return df
    else:
        return pd.DataFrame()
                            

In [3]:
def resource_summary(event_log, segments, tresholds, event_type="batching"):
    """
    Prints a summary of a DataFrame containing resource statistics

    :param event_log: event log used (Performance_Spectrum.EventLog)
    :param segments: list of segments to compute resource statistics for
    :param tresholds: tresholds for the computation of system level events
    :param event_type: type of system level event ('batching' or None)
    :return: pandas Dataframe containing all resource statistics for all segments
    """
    resource_sum = combine_resources(event_log, segments, tresholds, event_type)
    
    # Split resource summary based on user1 being heavily present in either the start or end activity in segment
    user_11 = resource_sum[(resource_sum["mean_user1_start"] >= 0.9) & (resource_sum["mean_user1_end"] >= 0.9)]
    user_1o = resource_sum[(resource_sum["mean_user1_start"] >= 0.9) & (resource_sum["mean_user1_end"] < 0.9)]
    user_o1 = resource_sum[(resource_sum["mean_user1_start"] < 0.9) & (resource_sum["mean_user1_end"] >= 0.9)]
    user_oo = resource_sum[(resource_sum["mean_user1_start"] < 0.9) & (resource_sum["mean_user1_end"] < 0.9)]
    print("user_11:", len(user_11), "user_1o:", len(user_1o), "user_o1:",  len(user_o1), "user_oo:",  len(user_oo))

    # Split 'other-to-other' segments in 'same users' and 'handovers' segments
    user_oo_same = user_oo[user_oo["mean_same_user"] >= 0.70]
    user_oo_handover = user_oo[user_oo["mean_same_user"] < 0.70]
    print("user_oo same user:", len(user_oo_same),"user_oo handover:", len(user_oo_handover))

    # Extrect from 'same users' and 'handovers' 'other-to-other' segments 'significantly' varying workload segments
    user_oo_same_sign = user_oo_same[user_oo_same["std_workload"] > 2]
    user_oo_handover_sign = user_oo_handover[user_oo_handover["std_start_end"] > 2]

    # Export segment ids for significant same user segments 
    user_oo_same_sign_std = list(zip(user_oo_same_sign["id"], user_oo_same_sign["std_workload"]))
    user_oo_same_sign_std.sort(key=lambda x:x[1], reverse=True)
    user_oo_same_sign_ids = [i for i, _ in user_oo_same_sign_std]
    print("same user sign ids:", user_oo_same_sign_ids)

    # Export segment ids for significant handover segments 
    user_oo_handover_sign_std = list(zip(user_oo_handover_sign["id"], user_oo_handover_sign["std_workload"]))
    user_oo_handover_sign_std.sort(key=lambda x:x[1], reverse=True)
    user_oo_handover_sign_ids = [i for i, _ in user_oo_handover_sign_std]
    print("handover sign ids:", user_oo_handover_sign_ids)

    # print minimum and maximum workload for same user segments
    print("Workload for same user segments")
    for i, segment in enumerate(user_oo_same_sign_ids):
        segs = segment_level_events(event_logs[0][0], Segments.SEGMENTS_COMPLETE[segment])
        events = system_level_events_batching_on_end(segs, tresholds_batching_bpi2017[segment])
        print(Segments.SEGMENTS_COMPLETE[segment], "min:", events["ratio_workload"].min(), "max:", events["ratio_workload"].max(), "std:", user_oo_same_sign_std[i][1])

    # print minimum and maximum workload for handover segments
    print("start/end ratio for handover segments")
    for i, segment in enumerate(user_oo_handover_sign_ids):
        segs = segment_level_events(event_logs[0][0], Segments.SEGMENTS_COMPLETE[segment])
        events = system_level_events_batching_on_end(segs, tresholds_batching_bpi2017[segment])
        print(Segments.SEGMENTS_COMPLETE[segment], "min:", events["ratio_start_end"].min(), "max:", events["ratio_start_end"].max(), "std:", user_oo_handover_sign_std[i][1])
    

In [4]:
def non_handover_segments(event_log, segments, tresholds, event_type="high_workload"):
    """
    Computes all segments that contain mainly same user segment level events, excluding user 1

    :param event_log: event log used (Performance_Spectrum.EventLog)
    :param segments: list of segments to compute resource statistics for
    :param tresholds: tresholds for the computation of system level events
    :param event_type: type of system level event ('batching' or None)
    :return: pandas Dataframe containing all resource statistics for all segments
    """
    resource_sum = combine_resources(event_log, segments, [], event_type)
    users = resource_sum[(resource_sum["mean_user1_start"] < 0.9) & (resource_sum["mean_user1_end"] < 0.9)]
    same_user = users[users["mean_same_user"] >= 0.70]
    segments = list(same_user["id"])
    print(len(segments))
    return [Segments.SEGMENTS_COMPLETE[i] for i in segments]

In [5]:
def handover_segments(event_log, segments, tresholds, event_type="high_workload"):
    """
    Computes all segments that contain mainly same user segment level events, excluding user 1

    :param event_log: event log used (Performance_Spectrum.EventLog)
    :param segments: list of segments to compute resource statistics for
    :param tresholds: tresholds for the computation of system level events
    :param event_type: type of system level event ('batching' or None)
    :return: pandas Dataframe containing all resource statistics for all segments
    """
    resource_sum = combine_resources(event_log, segments, [], event_type)
    users = resource_sum[(resource_sum["mean_user1_start"] < 0.9) & (resource_sum["mean_user1_end"] < 0.9)]
    handover = users[users["mean_same_user"] < 0.70]
    segments = list(handover["id"])
    print(len(segments))
    return [Segments.SEGMENTS_COMPLETE[i] for i in segments]