# Peer Mobbing

## Description
When an agent, or a group of agents, chooses to degrade the performance of a colleague or a group of colleagues.

In [1]:
import pandas as pd
import numpy as np
import altair as alt
alt.data_transformers.enable('vegafusion')

DataTransformerRegistry.enable('vegafusion')

In [2]:
log_name = 'propr'

fn_logs = {
    'bpic15': '../data/preproc/bpic15.csv',
    'bpic17': '../data/preproc/bpic17.csv',
    'propr': '../data/preproc/proprietary.csv'
}

In [3]:
log = pd.read_csv(fn_logs[log_name])
log

Unnamed: 0,activity,case_id,start timestamp,complete timestamp,resource,Weekday,"School holidays; 0 = no, 1 = yes",Approval; 1 = low sum to 4 = high sum,Type of loan; 0 = normal; 1 = special,Cost
0,Approval_Branch,005141EEB1240B31C12577DF004F6A77,18/11/2010 15:27,18/11/2010 15:58,004-9-1,Thursday,0.0,2.0,0.0,"0,87 € per minute"
1,Precheck,005141EEB1240B31C12577DF004F6A77,19/11/2010 12:45,19/11/2010 12:46,000-3-01,Friday,0.0,2.0,0.0,"0,87 € per minute"
2,Precheck,005141EEB1240B31C12577DF004F6A77,24/11/2010 8:18,24/11/2010 8:26,000-2-01,Wednesday,0.0,2.0,0.0,"0,87 € per minute"
3,Check_of_Processing_Applications,005141EEB1240B31C12577DF004F6A77,24/11/2010 10:35,24/11/2010 10:35,000-2-01,Wednesday,0.0,2.0,0.0,"0,87 € per minute"
4,Processing_of_Applications,005141EEB1240B31C12577DF004F6A77,2/12/2010 10:46,2/12/2010 10:46,010-23-11,Thursday,0.0,2.0,0.0,"1,02 € per minute"
...,...,...,...,...,...,...,...,...,...,...
18440,Precheck,FFFF329EF772D73CC12577EA00534A1C,13/12/2010 11:38,13/12/2010 11:40,000-2-01,Monday,0.0,2.0,0.0,"0,87 € per minute"
18441,Check_of_Processing_Applications,FFFF329EF772D73CC12577EA00534A1C,13/12/2010 12:06,13/12/2010 12:14,000-2-01,Monday,0.0,2.0,0.0,"0,87 € per minute"
18442,Processing_of_Applications,FFFF329EF772D73CC12577EA00534A1C,22/12/2010 11:48,22/12/2010 11:48,010-23-07,Wednesday,1.0,2.0,0.0,"1,02 € per minute"
18443,Archieving,FFFF329EF772D73CC12577EA00534A1C,22/12/2010 11:49,22/12/2010 13:19,010-23-07,Wednesday,1.0,2.0,0.0,"1,02 € per minute"


In [4]:
# proprietary
log['department'] = log['resource'].apply(
    lambda x: x.split('-')[0]
)
log['department_role'] = log['resource'].apply(
    lambda x: x.split('-')[0] + '-' + x.split('-')[1]
)
log

Unnamed: 0,activity,case_id,start timestamp,complete timestamp,resource,Weekday,"School holidays; 0 = no, 1 = yes",Approval; 1 = low sum to 4 = high sum,Type of loan; 0 = normal; 1 = special,Cost,department,department_role
0,Approval_Branch,005141EEB1240B31C12577DF004F6A77,18/11/2010 15:27,18/11/2010 15:58,004-9-1,Thursday,0.0,2.0,0.0,"0,87 € per minute",004,004-9
1,Precheck,005141EEB1240B31C12577DF004F6A77,19/11/2010 12:45,19/11/2010 12:46,000-3-01,Friday,0.0,2.0,0.0,"0,87 € per minute",000,000-3
2,Precheck,005141EEB1240B31C12577DF004F6A77,24/11/2010 8:18,24/11/2010 8:26,000-2-01,Wednesday,0.0,2.0,0.0,"0,87 € per minute",000,000-2
3,Check_of_Processing_Applications,005141EEB1240B31C12577DF004F6A77,24/11/2010 10:35,24/11/2010 10:35,000-2-01,Wednesday,0.0,2.0,0.0,"0,87 € per minute",000,000-2
4,Processing_of_Applications,005141EEB1240B31C12577DF004F6A77,2/12/2010 10:46,2/12/2010 10:46,010-23-11,Thursday,0.0,2.0,0.0,"1,02 € per minute",010,010-23
...,...,...,...,...,...,...,...,...,...,...,...,...
18440,Precheck,FFFF329EF772D73CC12577EA00534A1C,13/12/2010 11:38,13/12/2010 11:40,000-2-01,Monday,0.0,2.0,0.0,"0,87 € per minute",000,000-2
18441,Check_of_Processing_Applications,FFFF329EF772D73CC12577EA00534A1C,13/12/2010 12:06,13/12/2010 12:14,000-2-01,Monday,0.0,2.0,0.0,"0,87 € per minute",000,000-2
18442,Processing_of_Applications,FFFF329EF772D73CC12577EA00534A1C,22/12/2010 11:48,22/12/2010 11:48,010-23-07,Wednesday,1.0,2.0,0.0,"1,02 € per minute",010,010-23
18443,Archieving,FFFF329EF772D73CC12577EA00534A1C,22/12/2010 11:49,22/12/2010 13:19,010-23-07,Wednesday,1.0,2.0,0.0,"1,02 € per minute",010,010-23


In [5]:
# only consider special type of cases
# proprietary: special loans
# log = log[log['Type of loan; 0 = normal; 1 = special'] == 1]
# proprietary:  loans with high approval sum (>=3, 'higher' and 'high'?)
log = log[log['Approval; 1 = low sum to 4 = high sum'] >= 3]

## Social-PM-1

### Profile work
- Proprietary: case type (special loan or not; high vs. low sum); time type (school holiday or not)

In [6]:
case_length = log.groupby('case_id').agg(
    num_events_in_case=pd.NamedAgg('activity', aggfunc=len)
).reset_index()
log = log.merge(case_length, how='left', on='case_id')
alt.Chart(case_length).mark_bar().encode(
    x=alt.X(
        'num_events_in_case:Q', 
        bin=alt.Bin(extent=[0, case_length['num_events_in_case'].max()], step=1)
    ).title('Number of events in case'),
    y=alt.Y('count()').title('Count of cases')
).properties(width=1500)

In [7]:
# proprietary
log['case_type'] = log.apply(
    lambda row: (
        'selected type' 
        if (
            row['num_events_in_case'] <= 9
        ) else
        'other type'
    ),
    axis=1
)
log['time_type'] = log.apply(
    lambda row: (
        'holiday' 
        if (row['School holidays; 0 = no, 1 = yes'] == 1 or row['Weekday'] in{'Saturday', 'Sunday'}) else
        'workday'
    ), 
    axis=1
)
log

Unnamed: 0,activity,case_id,start timestamp,complete timestamp,resource,Weekday,"School holidays; 0 = no, 1 = yes",Approval; 1 = low sum to 4 = high sum,Type of loan; 0 = normal; 1 = special,Cost,department,department_role,num_events_in_case,case_type,time_type
0,Approval_Branch,017FA12C13EE5A75C12578730044F7C4,15/04/2011 14:33,15/04/2011 14:57,091-9-1,Friday,0.0,3.0,0.0,"0,87 € per minute",091,091-9,8,selected type,workday
1,Precheck,017FA12C13EE5A75C12578730044F7C4,15/04/2011 16:58,15/04/2011 16:59,000-3-01,Friday,0.0,3.0,0.0,"0,87 € per minute",000,000-3,8,selected type,workday
2,Approval,017FA12C13EE5A75C12578730044F7C4,18/04/2011 11:42,18/04/2011 12:08,010-23-13,Monday,1.0,3.0,0.0,"1,02 € per minute",010,010-23,8,selected type,holiday
3,Check_of_Processing_Applications,017FA12C13EE5A75C12578730044F7C4,18/04/2011 15:33,18/04/2011 15:37,010-30-01,Monday,1.0,3.0,0.0,"1,02 € per minute",010,010-30,8,selected type,holiday
4,Processing_of_Applications,017FA12C13EE5A75C12578730044F7C4,21/04/2011 11:16,21/04/2011 11:16,010-23-02,Thursday,1.0,3.0,0.0,"1,02 € per minute",010,010-23,8,selected type,holiday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2970,Check_of_Processing_Applications,FFAE4641B2711A82C125779200404B1C,8/09/2010 16:03,8/09/2010 16:06,010-25-01,Wednesday,0.0,3.0,0.0,"1,02 € per minute",010,010-25,10,other type,workday
2971,Processing_of_Applications,FFAE4641B2711A82C125779200404B1C,14/09/2010 10:36,14/09/2010 10:36,010-23-13,Tuesday,0.0,3.0,0.0,"1,02 € per minute",010,010-23,10,other type,workday
2972,Check_of_Documents,FFAE4641B2711A82C125779200404B1C,14/09/2010 10:36,14/09/2010 11:13,010-23-13,Tuesday,0.0,3.0,0.0,"1,02 € per minute",010,010-23,10,other type,workday
2973,Archieving,FFAE4641B2711A82C125779200404B1C,14/09/2010 11:34,14/09/2010 11:38,010-23-02,Tuesday,0.0,3.0,0.0,"1,02 € per minute",010,010-23,10,other type,workday


### Determine groups/teams

In [8]:
# proprietary
# proprietary
# use the first two parts of the resource ids to determine resource groups
log['resource_group'] = log['department_role']

### Select event data of good ROI

- Proprietary: case type (special loan and/or of high sum); time type (school holiday)

In [9]:
log['specific_item'] = log.apply(
    lambda row: (
        row['case_type'] == 'selected type' 
        # and
        # row['time_type'] == 'holiday'
    ),
    axis=1
)
print(log['specific_item'].unique())
log

[ True False]


Unnamed: 0,activity,case_id,start timestamp,complete timestamp,resource,Weekday,"School holidays; 0 = no, 1 = yes",Approval; 1 = low sum to 4 = high sum,Type of loan; 0 = normal; 1 = special,Cost,department,department_role,num_events_in_case,case_type,time_type,resource_group,specific_item
0,Approval_Branch,017FA12C13EE5A75C12578730044F7C4,15/04/2011 14:33,15/04/2011 14:57,091-9-1,Friday,0.0,3.0,0.0,"0,87 € per minute",091,091-9,8,selected type,workday,091-9,True
1,Precheck,017FA12C13EE5A75C12578730044F7C4,15/04/2011 16:58,15/04/2011 16:59,000-3-01,Friday,0.0,3.0,0.0,"0,87 € per minute",000,000-3,8,selected type,workday,000-3,True
2,Approval,017FA12C13EE5A75C12578730044F7C4,18/04/2011 11:42,18/04/2011 12:08,010-23-13,Monday,1.0,3.0,0.0,"1,02 € per minute",010,010-23,8,selected type,holiday,010-23,True
3,Check_of_Processing_Applications,017FA12C13EE5A75C12578730044F7C4,18/04/2011 15:33,18/04/2011 15:37,010-30-01,Monday,1.0,3.0,0.0,"1,02 € per minute",010,010-30,8,selected type,holiday,010-30,True
4,Processing_of_Applications,017FA12C13EE5A75C12578730044F7C4,21/04/2011 11:16,21/04/2011 11:16,010-23-02,Thursday,1.0,3.0,0.0,"1,02 € per minute",010,010-23,8,selected type,holiday,010-23,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2970,Check_of_Processing_Applications,FFAE4641B2711A82C125779200404B1C,8/09/2010 16:03,8/09/2010 16:06,010-25-01,Wednesday,0.0,3.0,0.0,"1,02 € per minute",010,010-25,10,other type,workday,010-25,False
2971,Processing_of_Applications,FFAE4641B2711A82C125779200404B1C,14/09/2010 10:36,14/09/2010 10:36,010-23-13,Tuesday,0.0,3.0,0.0,"1,02 € per minute",010,010-23,10,other type,workday,010-23,False
2972,Check_of_Documents,FFAE4641B2711A82C125779200404B1C,14/09/2010 10:36,14/09/2010 11:13,010-23-13,Tuesday,0.0,3.0,0.0,"1,02 € per minute",010,010-23,10,other type,workday,010-23,False
2973,Archieving,FFAE4641B2711A82C125779200404B1C,14/09/2010 11:34,14/09/2010 11:38,010-23-02,Tuesday,0.0,3.0,0.0,"1,02 € per minute",010,010-23,10,other type,workday,010-23,False


### Profile member workload

In [10]:
res_workload = []
for res, events in log.groupby('resource'):
    res_workload.append({
        'resource': res,
        'resource_group': events['resource_group'].unique()[0],
        'workload_specific': len(events[events['specific_item'] == True]),
        'workload_nonspecific': len(events[events['specific_item'] == False]),
    })
res_workload = pd.DataFrame(res_workload)
res_workload['total_workload'] = (
    res_workload['workload_specific'] + res_workload['workload_nonspecific']
)
res_workload['normalized_workload_specific'] = (
    res_workload['workload_specific'] / res_workload['total_workload']
)
res_workload['normalized_workload_nonspecific'] = (
    res_workload['workload_nonspecific'] / res_workload['total_workload']
)
res_workload

Unnamed: 0,resource,resource_group,workload_specific,workload_nonspecific,total_workload,normalized_workload_specific,normalized_workload_nonspecific
0,000-0,000-0,0,10,10,0.000000,1.000000
1,000-1-01,000-1,3,4,7,0.428571,0.571429
2,000-2-01,000-2,44,202,246,0.178862,0.821138
3,000-3-01,000-3,27,128,155,0.174194,0.825806
4,000-4-01,000-4,1,12,13,0.076923,0.923077
...,...,...,...,...,...,...,...
141,110-8-3,110-8,4,15,19,0.210526,0.789474
142,110-9-1,110-9,0,4,4,0.000000,1.000000
143,111-8-2,111-8,0,2,2,0.000000,1.000000
144,111-9-1,111-9,0,4,4,0.000000,1.000000


### Determine expected level of workload

In [11]:
# use the sum of all members in the same group
rg_workload = []
for rg, events in log.groupby('resource_group'):
    rg_workload.append({
        'resource_group': rg,
        'workload_specific': len(events[events['specific_item'] == True]),
        'workload_nonspecific': len(events[events['specific_item'] == False]),
    })
rg_workload = pd.DataFrame(rg_workload)
rg_workload['total_workload'] = (
    rg_workload['workload_specific'] + rg_workload['workload_nonspecific']
)
rg_workload['normalized_workload_specific'] = (
    rg_workload['workload_specific'] / rg_workload['total_workload']
)
rg_workload['normalized_workload_nonspecific'] = (
    rg_workload['workload_nonspecific'] / rg_workload['total_workload']
)
rg_workload


Unnamed: 0,resource_group,workload_specific,workload_nonspecific,total_workload,normalized_workload_specific,normalized_workload_nonspecific
0,000-0,0,10,10,0.000000,1.000000
1,000-1,3,4,7,0.428571,0.571429
2,000-2,44,202,246,0.178862,0.821138
3,000-3,27,128,155,0.174194,0.825806
4,000-4,6,27,33,0.181818,0.818182
...,...,...,...,...,...,...
87,110-8,4,26,30,0.133333,0.866667
88,110-9,0,4,4,0.000000,1.000000
89,111-8,0,2,2,0.000000,1.000000
90,111-9,0,4,4,0.000000,1.000000


### Discover abnormally high workload of specific members

In [12]:
# consider only selected groups
if True:
    # group_size = log.groupby('resource_group').agg(
    #     group_size=pd.NamedAgg('resource', aggfunc='nunique')
    # ).reset_index().sort_values(by='group_size', ascending=False)
    # sel_rgs = group_size.loc[group_size['group_size'] >= 3, 'resource_group'].unique()
    # print(len(sel_rgs))
    sel_rgs = ['010-23', '010-24']
else:
    sel_rgs = []
print(len(sel_rgs))

2


In [13]:
charts = []
if len(sel_rgs) > 0:
    for rg in sel_rgs:
        charts.append(
            alt.layer(
                alt.Chart(
                    res_workload[res_workload['resource_group'] == rg],
                    title='Resources in group {}'.format(rg)
                ).mark_circle(size=60).encode(
                    x='normalized_workload_nonspecific:Q',
                    y='normalized_workload_specific:Q',
                    color='resource:N',
                    tooltip=[
                        'resource',
                        'workload_specific',
                        'normalized_workload_specific',
                        'workload_nonspecific',
                        'normalized_workload_nonspecific'
                    ]
                ),
                alt.Chart(
                    rg_workload[rg_workload['resource_group'] == rg]
                ).mark_rule().encode(
                    x='normalized_workload_nonspecific:Q'
                ),
                alt.Chart(
                    rg_workload[rg_workload['resource_group'] == rg]
                ).mark_rule().encode(
                    y='normalized_workload_specific:Q'
                ),
            )
        )
else:
    charts.append(
        alt.layer(
            alt.Chart(
                res_workload,
                title='All resources'
            ).mark_circle(size=60).encode(
                x='normalized_workload_nonspecific:Q',
                y='normalized_workload_specific:Q',
                color='resource:N',
                tooltip=[
                    'resource',
                    'workload_specific',
                    'normalized_workload_specific',
                    'workload_nonspecific',
                    'normalized_workload_nonspecific'
                ]
            ),
            # alt.Chart(
            #     rg_workload[rg_workload['resource_group'] == rg]
            # ).mark_rule().encode(
            #     x='normalized_workload_nonspecific:Q'
            # ),
            # alt.Chart(
            #     rg_workload[rg_workload['resource_group'] == rg]
            # ).mark_rule().encode(
            #     y='normalized_workload_specific:Q'
            # ),
        )
    )

alt.vconcat(*charts).resolve_scale(
    x='independent',
    y='independent',
    color='independent'
)