# Idling

## Description
When agents do not perform work during work time, but rather focus on non-work related activities or spend time socializing with their colleagues.

In [60]:
import pandas as pd
import numpy as np
import altair as alt
alt.data_transformers.enable('vegafusion')

DataTransformerRegistry.enable('vegafusion')

In [61]:
log_name = 'propr_test'

fn_logs = {
    'bpic15': '../data/preproc/bpic15.csv',
    'bpic15_ref': '../data/preproc/bpic15_train-70.csv',
    'bpic15_test': '../data/preproc/bpic15_test-30.csv',

    'bpic17': '../data/preproc/bpic2017.csv',

    'propr': '../data/preproc/proprietary.csv',
    'propr_ref': '../data/preproc/proprietary_train-70.csv',
    'propr_test': '../data/preproc/proprietary_test-30.csv'
}

In [62]:
log = pd.read_csv(fn_logs[log_name])
log

Unnamed: 0,activity,case_id,start timestamp,complete timestamp,resource,Weekday,"School holidays; 0 = no, 1 = yes",Approval; 1 = low sum to 4 = high sum,Type of loan; 0 = normal; 1 = special,Cost,_start timestamp,_complete timestamp
0,Precheck,0051574BDF30D681C1257888004DC40D,6/05/2011 16:09,6/05/2011 16:27,090-10-02,Friday,0.0,1.0,0.0,"0,87 € per minute",2011-05-06 16:09:00,2011-05-06 16:27:00
1,Application_Processing_Branches,0051574BDF30D681C1257888004DC40D,9/05/2011 11:14,9/05/2011 11:16,010-23-13,Monday,0.0,1.0,0.0,"1,02 € per minute",2011-05-09 11:14:00,2011-05-09 11:16:00
2,Precheck,0051574BDF30D681C1257888004DC40D,9/05/2011 15:44,9/05/2011 15:58,090-10-02,Monday,0.0,1.0,0.0,"0,87 € per minute",2011-05-09 15:44:00,2011-05-09 15:58:00
3,Processing_Incomplete_Orders,0051574BDF30D681C1257888004DC40D,9/05/2011 16:10,9/05/2011 16:10,010-23-13,Monday,0.0,1.0,0.0,"1,02 € per minute",2011-05-09 16:10:00,2011-05-09 16:10:00
4,Application_Processing_Branches,0051574BDF30D681C1257888004DC40D,10/05/2011 8:52,10/05/2011 9:32,010-23-13,Tuesday,0.0,1.0,0.0,"1,02 € per minute",2011-05-10 08:52:00,2011-05-10 09:32:00
...,...,...,...,...,...,...,...,...,...,...,...,...
5285,Precheck,FE3E1E0D3928202EC1257896004B8F00,20/05/2011 17:24,20/05/2011 17:26,000-3-01,Friday,0.0,2.0,0.0,"0,87 € per minute",2011-05-20 17:24:00,2011-05-20 17:26:00
5286,Check_of_Processing_Applications,FE3E1E0D3928202EC1257896004B8F00,23/05/2011 15:53,23/05/2011 16:06,000-2-01,Monday,0.0,2.0,0.0,"0,87 € per minute",2011-05-23 15:53:00,2011-05-23 16:06:00
5287,Processing_of_Applications,FE3E1E0D3928202EC1257896004B8F00,26/05/2011 9:25,26/05/2011 9:25,010-23-07,Thursday,0.0,2.0,0.0,"1,02 € per minute",2011-05-26 09:25:00,2011-05-26 09:25:00
5288,Archieving,FE3E1E0D3928202EC1257896004B8F00,26/05/2011 10:38,26/05/2011 10:47,010-23-07,Thursday,0.0,2.0,0.0,"1,02 € per minute",2011-05-26 10:38:00,2011-05-26 10:47:00


In [63]:
# proprietary
log['department'] = log['resource'].apply(
    lambda x: x.split('-')[0]
)
log['department_role'] = log['resource'].apply(
    lambda x: x.split('-')[0] + '-' + x.split('-')[1]
)
log

Unnamed: 0,activity,case_id,start timestamp,complete timestamp,resource,Weekday,"School holidays; 0 = no, 1 = yes",Approval; 1 = low sum to 4 = high sum,Type of loan; 0 = normal; 1 = special,Cost,_start timestamp,_complete timestamp,department,department_role
0,Precheck,0051574BDF30D681C1257888004DC40D,6/05/2011 16:09,6/05/2011 16:27,090-10-02,Friday,0.0,1.0,0.0,"0,87 € per minute",2011-05-06 16:09:00,2011-05-06 16:27:00,090,090-10
1,Application_Processing_Branches,0051574BDF30D681C1257888004DC40D,9/05/2011 11:14,9/05/2011 11:16,010-23-13,Monday,0.0,1.0,0.0,"1,02 € per minute",2011-05-09 11:14:00,2011-05-09 11:16:00,010,010-23
2,Precheck,0051574BDF30D681C1257888004DC40D,9/05/2011 15:44,9/05/2011 15:58,090-10-02,Monday,0.0,1.0,0.0,"0,87 € per minute",2011-05-09 15:44:00,2011-05-09 15:58:00,090,090-10
3,Processing_Incomplete_Orders,0051574BDF30D681C1257888004DC40D,9/05/2011 16:10,9/05/2011 16:10,010-23-13,Monday,0.0,1.0,0.0,"1,02 € per minute",2011-05-09 16:10:00,2011-05-09 16:10:00,010,010-23
4,Application_Processing_Branches,0051574BDF30D681C1257888004DC40D,10/05/2011 8:52,10/05/2011 9:32,010-23-13,Tuesday,0.0,1.0,0.0,"1,02 € per minute",2011-05-10 08:52:00,2011-05-10 09:32:00,010,010-23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5285,Precheck,FE3E1E0D3928202EC1257896004B8F00,20/05/2011 17:24,20/05/2011 17:26,000-3-01,Friday,0.0,2.0,0.0,"0,87 € per minute",2011-05-20 17:24:00,2011-05-20 17:26:00,000,000-3
5286,Check_of_Processing_Applications,FE3E1E0D3928202EC1257896004B8F00,23/05/2011 15:53,23/05/2011 16:06,000-2-01,Monday,0.0,2.0,0.0,"0,87 € per minute",2011-05-23 15:53:00,2011-05-23 16:06:00,000,000-2
5287,Processing_of_Applications,FE3E1E0D3928202EC1257896004B8F00,26/05/2011 9:25,26/05/2011 9:25,010-23-07,Thursday,0.0,2.0,0.0,"1,02 € per minute",2011-05-26 09:25:00,2011-05-26 09:25:00,010,010-23
5288,Archieving,FE3E1E0D3928202EC1257896004B8F00,26/05/2011 10:38,26/05/2011 10:47,010-23-07,Thursday,0.0,2.0,0.0,"1,02 € per minute",2011-05-26 10:38:00,2011-05-26 10:47:00,010,010-23


In [64]:
sel_resources = log.loc[log['department_role'] == '010-24', 'resource'].unique()
print(len(sel_resources))

11


## Social-Idling-1

In [65]:
log = log.sort_values(by='start timestamp')
log['start timestamp'] = pd.to_datetime(log['start timestamp'], format='mixed', dayfirst=True)
log['complete timestamp'] = pd.to_datetime(log['complete timestamp'], format='mixed', dayfirst=True)


### Calculate activity duration for each resource

In [66]:
df_res_actdur = log[[
    'activity', 'case_id', 'start timestamp', 'complete timestamp',
    'resource'
]]
df_res_actdur['activity duration'] = (
    df_res_actdur['complete timestamp'] -
    df_res_actdur['start timestamp']
).dt.total_seconds() / 60
df_res_actdur

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_res_actdur['activity duration'] = (


Unnamed: 0,activity,case_id,start timestamp,complete timestamp,resource,activity duration
811,Precheck,294185B20DDA8523C1257846002E877F,2011-03-01 10:00:00,2011-03-01 10:05:00,001-7-03,5.0
4980,Check_of_Processing_Applications,EEBDCCE3EDF9A9BAC1257845002E8C51,2011-03-01 10:06:00,2011-03-01 10:07:00,000-2-01,1.0
1338,Approval_Branch,3D5A42DCFDBD5453C12578450042A8A0,2011-03-01 10:34:00,2011-03-01 10:36:00,010-9-1,2.0
1803,Application_Processing_Branches,4DD951A609E3068FC125784500352436,2011-03-01 10:38:00,2011-03-01 16:27:00,080-10-03,349.0
1339,Refusal_Branches,3D5A42DCFDBD5453C12578450042A8A0,2011-03-01 10:39:00,2011-03-01 10:43:00,010-9-1,4.0
...,...,...,...,...,...,...
1485,Get_Signature_of_Customer,408284FA34AFF90AC125787600362779,2011-05-09 09:42:00,2011-05-09 09:44:00,010-24-23,2.0
2620,Precheck,6E7CD452F910DB82C1257877004B125D,2011-05-09 09:43:00,2011-05-09 09:45:00,010-8-1,2.0
1292,Processing_of_Applications,39BA4E367A0CE05CC1257884004B7DAF,2011-05-09 09:49:00,2011-05-09 09:49:00,010-25-07,0.0
1293,Archieving,39BA4E367A0CE05CC1257884004B7DAF,2011-05-09 09:50:00,2011-05-09 09:51:00,010-25-07,1.0


### Calculate waiting time between activity instances for each resource

In [67]:
df_res_waiting = list()
for res, events in log.groupby('resource'):
    for i in range(len(events) - 1):
        pre_event = events.iloc[i]
        next_event = events.iloc[i+1]
        waiting_time = (
            next_event['start timestamp'] - 
            pre_event['complete timestamp']
        )
        df_res_waiting.append({
            'resource': res,
            'preceding case_id': pre_event['case_id'],
            'preceding activity': pre_event['activity'],
            'preceding activity completion': pre_event['complete timestamp'],
            'next case_id': next_event['case_id'],
            'next activity': next_event['activity'],
            'next activity start': next_event['start timestamp'],
            'waiting time': waiting_time
        })
df_res_waiting = pd.DataFrame(df_res_waiting)
df_res_waiting['waiting time'] =  df_res_waiting['waiting time'].dt.total_seconds() / 60
# NOTE: only non-negative waiting time makes sense
df_res_waiting = df_res_waiting[df_res_waiting['waiting time'] >= 0]
# NOTE: consider only waiting time within the same case
df_res_waiting = df_res_waiting.loc[df_res_waiting['preceding case_id'] == df_res_waiting['next case_id'], :]

df_res_waiting = df_res_waiting[df_res_waiting['resource'].isin(sel_resources)]
df_res_waiting

Unnamed: 0,resource,preceding case_id,preceding activity,preceding activity completion,next case_id,next activity,next activity start,waiting time
2102,010-24-01,0E6B8348089B2783C1257881003E4022,Processing_of_Applications,2011-05-10 11:18:00,0E6B8348089B2783C1257881003E4022,Processing_of_Applications,2011-05-10 11:18:00,0.0
2103,010-24-01,0E6B8348089B2783C1257881003E4022,Processing_of_Applications,2011-05-10 11:26:00,0E6B8348089B2783C1257881003E4022,Processing_of_Applications,2011-05-10 12:47:00,81.0
2104,010-24-01,0E6B8348089B2783C1257881003E4022,Processing_of_Applications,2011-05-10 13:00:00,0E6B8348089B2783C1257881003E4022,Get_Signature_of_Customer,2011-05-10 13:32:00,32.0
2106,010-24-01,16C8AC76D2DCE29BC12578760057A819,Archieving,2011-05-10 16:17:00,16C8AC76D2DCE29BC12578760057A819,End,2011-05-10 16:17:00,0.0
2109,010-24-01,FCC2C249E601786CC125788B003A7A83,Processing_of_Applications,2011-05-10 08:44:00,FCC2C249E601786CC125788B003A7A83,Processing_of_Applications,2011-05-10 08:44:00,0.0
...,...,...,...,...,...,...,...,...
3684,010-24-26,0BFBC318631D1240C125784D002B64FD,Check_of_Processing_Applications,2011-03-09 11:04:00,0BFBC318631D1240C125784D002B64FD,Check_of_Processing_Applications,2011-03-09 11:05:00,1.0
3694,010-24-26,28589B85403CD09BC12578480055B2FA,Processing_of_Applications,2011-03-09 09:15:00,28589B85403CD09BC12578480055B2FA,Archieving,2011-03-09 09:16:00,1.0
3698,010-24-26,C568F496955AF585C1257888003303C3,Precheck,2011-05-09 14:22:00,C568F496955AF585C1257888003303C3,Rework_Branches,2011-05-09 14:28:00,6.0
3705,010-24-26,EF1857716C3DE738C12578870044040D,Precheck,2011-05-09 16:14:00,EF1857716C3DE738C12578870044040D,Approval,2011-05-09 16:14:00,0.0


### Identify resources with higher activity duration and waiting time

In [68]:
# waiting time
df_res_order = df_res_waiting.groupby('resource').agg(
    median_waiting_time=pd.NamedAgg('waiting time', aggfunc='median')
)
df_res_order = df_res_order.sort_values(by='median_waiting_time')
df_res_actdur = df_res_actdur[df_res_actdur['resource'].isin(sel_resources)]

# activity duration
df_res_mean_actdur = df_res_actdur.groupby(['resource', 'activity']).agg(
    mean_duration=pd.NamedAgg('activity duration', aggfunc='mean')
).reset_index()
df_res_mean_actdur = df_res_mean_actdur[df_res_mean_actdur['resource'].isin(sel_resources)]

alt.vconcat(
    alt.Chart(
        df_res_waiting,
        title='Average waiting time by resource'
    ).mark_boxplot().encode(
        x=alt.X('resource:O', sort=list(df_res_order.index)),
        y=alt.Y('waiting time:Q').title('Waiting time (minutes)')
    ),
    alt.Chart(
        df_res_mean_actdur,
        title='Mean activity duration by resource per activity'
    ).mark_rect().encode(
        x=alt.X('resource:O', sort=list(df_res_order.index)),
        y=alt.Y('activity:N'),
        color='mean_duration:Q',
        tooltip=['resource', 'activity', 'mean_duration']
    )
)