In [1]:
def segment_ids(events):
    """
    Returns a raw list with segment_id's of the given system level events.

    :param events: DataFrame consisting of all system level events
    :return: list containing all segment_id's in 'events'
    """
    ids = []
    if events.empty:
        return ids
    else:
        events["segments"].apply(lambda segments: ids.append(segments))
        ids = sorted([segment for segments in ids for segment in segments])

    return ids

In [2]:
def segment_level_event_ids(event_log, segments, tresholds, slow=True, instant=False):
    """
    Computes the ids of all segment level events that fit the criteria

    :param event_log: event log used (Performance_Spectrum.EventLog)
    :param segments: set of segments to compute events for
    :param tresholds: tresholds for duration
    :param slow: Boolean that indicates whether we want unusually slow or fast
    :param instant: Boolean that indicates whether we want segments that start and complete on the same day
    :return: list of ids of all segment level events meeting the criteria
    """

    segment_ids=[]
    for i, segment in enumerate(segments):
        events = segment_level_events(event_log, segment)

        if events.empty:
            ids = []
        elif instant:
            ids = events.loc[(events['segment_name'] == segment) & (events['duration'] == 0), 'segment_id'].to_list()
        elif slow:
            ids = events.loc[(events['segment_name'] == segment) & (events['duration'] >= tresholds[i]), 'segment_id'].to_list()
        else:
            ids = events.loc[(events['segment_name'] == segment) & (events['duration'] <= tresholds[i]), 'segment_id'].to_list()
        
        segment_ids += ids
        
    return segment_ids

In [3]:
def add_instant(event_log, segments):
    """
    Computes all instant (same day) segment level events and adds their type column to the event log's PS

    :param event_log: event log used (Performance_Spectrum.EventLog)
    :param segments: list of segments to compute events for
    """
    log, name = event_log
    print("get instant ids")
    ids = segment_level_event_ids(log, segments, None, instant=True)
    print("add event type instant", name, end=" ")
    add_event_type(log, "instant", ids)
    print("Done")

In [4]:
def add_delayed(event_log, segments, duration_tresholds, occurence_tresholds):
    """
    Computes delayed system level events and adds their type column to the event log's PS

    :param event_log: event log used (Performance_Spectrum.EventLog)
    :param segments: list of segments to compute events for
    :param duration_tresholds: list of tresholds for the minimal duration of the segment level events in the system level events
    :param occurence_tresholds: list of tresholds for the minimal number of cases that must be in the system level events
    """
    log, name = event_log
    ids = [[] for _ in range(len(occurence_tresholds))]
    
    print("processing segment ", end=" ")
    for i, segment in enumerate(segments):
        print(str(i), end=" ")
        seg_events = segment_level_events(log, segment)
        sys_events = system_level_events_delayed(seg_events, 1, duration_tresholds[i])
        
        if not sys_events.empty:
            for j, oc in enumerate(occurence_tresholds):
                filtered = sys_events[sys_events["nr_cases"] >= oc] 
                ids[j] += segment_ids(filtered)  
    
    print("Done")
    for k, oc in enumerate(occurence_tresholds):
        column_name = "delayed" + str(oc)
        print("add event type " + column_name, name, end=" ")
        add_event_type(log, column_name, ids[k])
        print("Done")

In [5]:
def add_batching_on_end(event_log, segments, batching_tresholds, occurence_classes=[]):
    """
    Computes batching on end system level events and adds their type column to the event log's PS

    :param event_log: event log used (Performance_Spectrum.EventLog)
    :param segments: list of segments to compute events for
    :param duration_tresholds: list of tresholds for the minimal duration of the segment level events in the system level events
    :param occurence_tresholds: list of tresholds for the minimal number of cases that must be in the system level events
    """
    log, name = event_log
    ids = []
    ids_classes = [[] for _ in range(len(occurence_classes))]
    
    print("processing segment ", end=" ")
    for i, segment in enumerate(segments):
        print(str(i), end=" ")
        seg_events = segment_level_events(log, segment)
        sys_events = system_level_events_batching_on_end(seg_events, 1)
        
        if not sys_events.empty:
            filtered = sys_events[sys_events["nr_cases"] > batching_tresholds[i]] 
            ids += segment_ids(filtered)
            
            if occurence_classes:
                for j, oc in enumerate(occurence_classes):
                    filtered_class = sys_events[sys_events["nr_cases"] >= oc]
                    ids_classes[j] += segment_ids(filtered_class)
            
            
    print("Done")
    column_name = "batching"
    print("add event type " + column_name, name, end=" ")
    add_event_type(log, column_name, ids)
    print("Done")
    
    for k, oc in enumerate(occurence_classes):
        column_name = "batching" + str(oc)
        print("add event type " + column_name, name, end=" ")
        add_event_type(log, column_name, ids_classes[k])
        print("Done")

In [6]:
def add_batching_on_start(event_log, segments, high_load_tresholds, occurence_classes=[]):
    """
    Computes batching on start system level events and adds their type column to the event log's PS

    :param event_log: event log used (Performance_Spectrum.EventLog)
    :param segments: list of segments to compute events for
    :param duration_tresholds: list of tresholds for the minimal duration of the segment level events in the system level events
    :param occurence_tresholds: list of tresholds for the minimal number of cases that must be in the system level events
    """
    log, name = event_log
    ids = []
    ids_classes = [[] for _ in range(len(occurence_classes))]
    
    print("processing segment ", end=" ")
    for i, segment in enumerate(segments):
        print(str(i), end=" ")
        seg_events = segment_level_events(log, segment)
        sys_events = system_level_events_batching_on_start(seg_events, 1)
        
        if not sys_events.empty:
            filtered = sys_events[sys_events["nr_cases"] > high_load_tresholds[i]] 
            ids += segment_ids(filtered)
            
            if occurence_classes:
                for j, oc in enumerate(occurence_classes):
                    filtered_class = sys_events[sys_events["nr_cases"] >= oc]
                    ids_classes[j] += segment_ids(filtered_class)
            
            
    print("Done")
    column_name = "high_load"
    print("add event type " + column_name, name, end=" ")
    add_event_type(log, column_name, ids)
    print("Done")
    
    for k, oc in enumerate(occurence_classes):
        column_name = "high_load" + str(oc)
        print("add event type " + column_name, name, end=" ")
        add_event_type(log, column_name, ids_classes[k])
        print("Done")

In [7]:
def add_high_workload(event_log, segments, high_workload_tresholds, workload_classes=[], same_user=True):
    """
    Computes high workload system level events and adds their type column to the event log's PS

    :param event_log: event log used (Performance_Spectrum.EventLog)
    :param segments: list of segments to compute events for
    :param high_workload_tresholds: list of tresholds for the minimal duration of the segment level events in the system level events
    :param occurence_classes: list of classes for the minimal workload of the system level events
    """
    log, name = event_log
    ids = []
    ids_classes = [[] for _ in range(len(workload_classes))]
    
    print("processing segment ", end=" ")
    for i, segment in enumerate(segments):
        print(str(i), end=" ")
        seg_events = segment_level_events(log, segment)
        sys_events = system_level_events_high_workload(seg_events, 1)
        
        if not sys_events.empty:
            if same_user:
                filtered = sys_events[sys_events["ratio_workload"] > high_workload_tresholds[i]]
            else:
                filtered = sys_events[sys_events["ratio_start_end"] > high_workload_tresholds[i]]
            ids += segment_ids(filtered)
            
            if workload_classes:
                for j, wc in enumerate(workload_classes):
                    if same_user:
                        filtered_class = sys_events[sys_events["ratio_workload"] >= wc]
                    else:
                        filtered_class = sys_events[sys_events["ratio_start_end"] >= wc]
                    ids_classes[j] += segment_ids(filtered_class)
            
            
    print("Done")
    if same_user:
        column_name = "workload"
    else:
        column_name = "workload_hand"
    print("add event type " + column_name, name, end=" ")
    add_event_type(log, column_name, ids)
    print("Done")
    
    for k, wc in enumerate(workload_classes):
        if same_user:
            column_name = "workload" + str(wc)
        else:
            column_name = "workload_hand" + str(wc)
        print("add event type " + column_name, name, end=" ")
        add_event_type(log, column_name, ids_classes[k])
        print("Done")

In [8]:
def add_event_type(event_log, event_type, segment_ids):
    """
    extends event log's PS with event type column

    :param event_log: event log to alter performance spectrum for (Performance_Spectrum.EventLog)
    :param event_type: type of system level event and name of the column
    :param segment_ids: list of segment ids corresponding to the system level events
    """
    pfs = event_log.pf   

    pfs[event_type] = pfs["segment_id"].isin(segment_ids)

In [9]:
def save_performance_spectrum(event_log, percentile):
    """
    Saves an event log's PS to disk for fast access

    :param event_log: event log used (Performance_Spectrum.EventLog)
    :param percentile: percentile used for duration tresholds, will be printed behind filename
    """
    log, name = event_log
    
    with open('output/dumps/' + name + "_pf" + str(percentile), 'wb+') as output:
        pickle.dump(log.pf, output, -1)

In [10]:
def load_performance_spectrum(event_log, percentile):
    """
    loads a saved event log's PS from disk

    :param event_log: event log used (Performance_Spectrum.EventLog)
    :param percentile: percentile used for duration tresholds, will be printed behind filename
    """
    log, name = event_log
    log.pf = pickle.load(open('output/dumps/' + name + "_pf" + str(percentile), 'rb'))

In [11]:
def date_formatter(first):
    """
    Returns FuncFormatter that converts hours to date strings

    :param first: first date in the event log (datetime object)
    :return: FuncFormatter that converts hours to date strings
    """
    return FuncFormatter(lambda x_val, tick_pos: (first + datetime.timedelta(days=math.floor(x_val/24))).strftime("%Y-%m-%d"))

In [12]:
def classify(performance_spectrum, classifier, complete):
    """
    Sets the 'class' column of a PS according to the classifier function

    :param performance_spectrum: performance spectrum to work with
    :param classifier: function that returns the class given a row of the PS
    :return: 'performance_spectrum' with derived 'class' column
    """
    pf = performance_spectrum.copy()
    pf["class"] = -1
    pf["class"] = pf.apply(classifier, axis=1)
    
    if not complete:
        pf = pf[pf["class"] >= 0]
    
    return pf

In [1]:
def plot_performance_spectrum(performance_spectrum, segments, classifier, height, complete=False):
    """
    Plots the performance spectrum with Matplotlib

    :param performance_spectrum: performance spectrum to plot
    :param segments: segments to plot
    :param classifier: dictionary containing classifier function ("classifier") and colors ("colors")
    :param complete: Boolean that indicates if we want to plot all non-classified segments (True) or not (False)
    """
    cf = classifier["classifier"]
    class_colors = classifier["colors"]
    
    pf = classify(performance_spectrum, cf, complete)
    vis = Performance_Spectrum.Spectrum(segments, pf)

    fig, ax = plt.subplots(figsize=[20,height])
    formatter = date_formatter(FIRST)

    vis.plot_performance_spectrum(class_colors, 
                                  ax, 
                                  label_offset=-50, 
                                  vis_mask=True
                                 )

    ax.set_xticks(np.arange(0, round(ax.get_xlim()[1]), 168))
    ax.set_xticklabels(ax.get_xticks(), rotation = 90)
    ax.xaxis.set_major_formatter(formatter)

In [14]:
def plot_interactive(performance_spectrum, segments, classifier, complete=False):
    """
    Prepares the data for plotting interactively with pyplot

    :param performance_spectrum: performance spectrum to plot
    :param segments: segments to plot
    :param classifier: dictionary containing classifier function ("classifier") and colors ("colors")
    :param complete: Boolean that indicates if we want to plot all non-classified segments (True) or not (False)
    """
    cf = classifier["classifier"]
    class_colors = classifier["colors"]
    
    vis = Performance_Spectrum.Spectrum(segments, performance_spectrum)
    pfs = classify(vis.pf, cf, complete)
    
    pfs = vis.build_coordinates(pfs, 'start_time', 'end_time')
    
    class_colors = [f'rgb({cc[0]*256},{cc[1]*256},{cc[2]*256})' for cc in class_colors]

    pfs['fill'] = None
    data = []
    for cl in range(len(class_colors)):
        xlist = list(pfs[pfs['class'] == float(cl)][['start_time', 'end_time', 'fill']].values.flatten())
        ylist = list(pfs[pfs['class'] == float(cl)][['start_y', 'end_y', 'fill']].values.flatten())
        data.append(go.Scattergl(x=xlist, y=ylist, line=dict(width=.5, color=class_colors[cl])))

    for y in vis.y_s:
        data.append(go.Scattergl(mode='lines', x=[0,max(pfs['end_time'])], y=[y[0], y[0]], line=dict(width=.5, color='black'), showlegend=False))
    data.append(go.Scattergl(mode='lines', x=[0,max(pfs['end_time'])], y=[y[1], y[1]], line=dict(width=.5, color='black'), showlegend=False))
    
    return data