# Idling

## Description
When agents do not perform work during work time, but rather focus on non-work related activities or spend time socializing with their colleagues.

In [148]:
import pandas as pd
import numpy as np
import altair as alt
alt.data_transformers.enable('vegafusion')

DataTransformerRegistry.enable('vegafusion')

In [149]:
log_name = 'propr'

fn_logs = {
    'bpic15': '../data/preproc/bpic2015_disco.csv',
    'bpic17': '../data/preproc/bpic2017.csv',
    'propr': '../data/preproc/proprietary.csv'
}

In [150]:
log = pd.read_csv(fn_logs[log_name])
log

Unnamed: 0,activity,case_id,start timestamp,complete timestamp,resource,Weekday,"School holidays; 0 = no, 1 = yes",Approval; 1 = low sum to 4 = high sum,Type of loan; 0 = normal; 1 = special,Cost
0,Approval_Branch,005141EEB1240B31C12577DF004F6A77,18/11/2010 15:27,18/11/2010 15:58,004-9-1,Thursday,0.0,2.0,0.0,"0,87 € per minute"
1,Precheck,005141EEB1240B31C12577DF004F6A77,19/11/2010 12:45,19/11/2010 12:46,000-3-01,Friday,0.0,2.0,0.0,"0,87 € per minute"
2,Precheck,005141EEB1240B31C12577DF004F6A77,24/11/2010 8:18,24/11/2010 8:26,000-2-01,Wednesday,0.0,2.0,0.0,"0,87 € per minute"
3,Check_of_Processing_Applications,005141EEB1240B31C12577DF004F6A77,24/11/2010 10:35,24/11/2010 10:35,000-2-01,Wednesday,0.0,2.0,0.0,"0,87 € per minute"
4,Processing_of_Applications,005141EEB1240B31C12577DF004F6A77,2/12/2010 10:46,2/12/2010 10:46,010-23-11,Thursday,0.0,2.0,0.0,"1,02 € per minute"
...,...,...,...,...,...,...,...,...,...,...
18440,Precheck,FFFF329EF772D73CC12577EA00534A1C,13/12/2010 11:38,13/12/2010 11:40,000-2-01,Monday,0.0,2.0,0.0,"0,87 € per minute"
18441,Check_of_Processing_Applications,FFFF329EF772D73CC12577EA00534A1C,13/12/2010 12:06,13/12/2010 12:14,000-2-01,Monday,0.0,2.0,0.0,"0,87 € per minute"
18442,Processing_of_Applications,FFFF329EF772D73CC12577EA00534A1C,22/12/2010 11:48,22/12/2010 11:48,010-23-07,Wednesday,1.0,2.0,0.0,"1,02 € per minute"
18443,Archieving,FFFF329EF772D73CC12577EA00534A1C,22/12/2010 11:49,22/12/2010 13:19,010-23-07,Wednesday,1.0,2.0,0.0,"1,02 € per minute"


## Social-Idling-1

In [151]:
log = log.sort_values(by='start timestamp')
log['start timestamp'] = pd.to_datetime(log['start timestamp'], format='mixed', dayfirst=True)
log['complete timestamp'] = pd.to_datetime(log['complete timestamp'], format='mixed', dayfirst=True)


### Calculate activity duration for each resource

In [152]:
df_res_actdur = log[[
    'activity', 'case_id', 'start timestamp', 'complete timestamp',
    'resource'
]]
df_res_actdur['activity duration'] = (
    df_res_actdur['complete timestamp'] -
    df_res_actdur['start timestamp']
).dt.total_seconds() / 60
df_res_actdur

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_res_actdur['activity duration'] = (


Unnamed: 0,activity,case_id,start timestamp,complete timestamp,resource,activity duration
8231,Preparation_Draft_Executive_Board,6C9B95FFEBE57A0DC125782A002A3229,2011-02-01 10:01:00,2011-02-01 10:02:00,001-17-02,1.0
649,Administrative_Tasks,08BC7C266A535237C1257826002ADD6E,2011-02-01 10:02:00,2011-02-01 10:51:00,010-24-26,49.0
648,Processing_of_Applications_Sonderkredit,08BC7C266A535237C1257826002ADD6E,2011-02-01 10:02:00,2011-02-01 10:02:00,010-24-26,0.0
1527,End,14050B07C84527C5C1257824002E58A8,2011-02-01 10:03:00,2011-02-01 10:03:00,010-23-11,0.0
4528,Approval_Branch,3C3A3DDEF08D2225C1257829004E319B,2011-02-01 10:03:00,2011-02-01 10:09:00,050-8-1,6.0
...,...,...,...,...,...,...
12587,Precheck,AB750FD25DEABED1C12577F300518367,2010-12-09 09:26:00,2010-12-09 10:13:00,070-7-06,47.0
322,Precheck,059B8D0F8B58DFDEC12577F4002FAB94,2010-12-09 09:40:00,2010-12-09 14:18:00,093-18-1,278.0
4849,Precheck,401C68F8E2854EA7C12577F400309E68,2010-12-09 09:51:00,2010-12-09 09:54:00,020-9-1,3.0
15872,Second_vote_KfW,DB060AF533AF5D13C1257735002E3378,2010-12-09 09:55:00,2010-12-09 09:55:00,010-24-01,0.0


### Calculate waiting time between activity instances for each resource

In [153]:
df_res_waiting = list()
for res, events in log.groupby('resource'):
    for i in range(len(events) - 1):
        pre_event = events.iloc[i]
        next_event = events.iloc[i+1]
        waiting_time = (
            next_event['start timestamp'] - 
            pre_event['complete timestamp']
        )
        df_res_waiting.append({
            'resource': res,
            'preceding case_id': pre_event['case_id'],
            'preceding activity': pre_event['activity'],
            'preceding activity completion': pre_event['complete timestamp'],
            'next case_id': next_event['case_id'],
            'next activity': next_event['activity'],
            'next activity start': next_event['start timestamp'],
            'waiting time': waiting_time
        })
df_res_waiting = pd.DataFrame(df_res_waiting)
df_res_waiting['waiting time'] =  df_res_waiting['waiting time'].dt.total_seconds() / 60
# NOTE: only non-negative waiting time makes sense
df_res_waiting = df_res_waiting[df_res_waiting['waiting time'] >= 0]
df_res_waiting

Unnamed: 0,resource,preceding case_id,preceding activity,preceding activity completion,next case_id,next activity,next activity start,waiting time
0,000-0,B89B728E11AC618DC1257831005346B6,Approval_Branch,2011-02-10 16:34:00,CEAD29B5DD6BF7DDC12578760027B059,Approval_Branch,2011-04-18 09:13:00,96039.0
2,000-0,7B36BFC282352B0BC12578020051F856,Approval_Branch,2010-12-23 16:05:00,D26346C3F4E0D83AC125780F004B5250,Approval_Branch,2011-01-24 14:07:00,45962.0
3,000-0,D26346C3F4E0D83AC125780F004B5250,Approval_Branch,2011-01-24 14:08:00,D26346C3F4E0D83AC125780F004B5250,Trash,2011-01-27 17:18:00,4510.0
4,000-0,D26346C3F4E0D83AC125780F004B5250,Trash,2011-01-27 17:19:00,D26346C3F4E0D83AC125780F004B5250,End,2011-01-27 17:19:00,0.0
6,000-0,7B36BFC282352B0BC12578020051F856,Approval_Branch,2010-12-28 14:46:00,F5E6F91B4D545E83C12578080032931C,Approval_Branch,2010-12-29 10:12:00,1166.0
...,...,...,...,...,...,...,...,...
18218,112-18-1,0C277ADB12E1652FC125778300539F8A,Application_Processing_Branches,2010-08-25 16:52:00,0C277ADB12E1652FC125778300539F8A,Precheck,2010-08-26 12:18:00,1166.0
18220,112-18-1,0C277ADB12E1652FC125778300539F8A,Approval_Branch,2010-08-26 09:53:00,46B782EC5D1D68A6C12577AB0055BF50,Approval_Branch,2010-09-27 17:36:00,46543.0
18221,112-18-1,46B782EC5D1D68A6C12577AB0055BF50,Approval_Branch,2010-09-27 18:04:00,402FD23EE77D8EADC1257810006882E9,Application_Processing_Branches,2011-01-06 20:01:00,145557.0
18222,112-18-1,402FD23EE77D8EADC1257810006882E9,Application_Processing_Branches,2011-01-06 20:10:00,402FD23EE77D8EADC1257810006882E9,Application_Processing_Branches,2011-01-07 16:24:00,1214.0


### Identify resources with higher activity duration and waiting time

In [155]:
# waiting time
df_res_order = df_res_waiting.groupby('resource').agg(
    median_waiting_time=pd.NamedAgg('waiting time', aggfunc='median')
)
df_res_order = df_res_order.sort_values(by='median_waiting_time')

# activity duration
df_res_mean_actdur = df_res_actdur.groupby(['resource', 'activity']).agg(
    mean_duration=pd.NamedAgg('activity duration', aggfunc='mean')
).reset_index()

alt.vconcat(
    alt.Chart(
        df_res_waiting,
        title='Average waiting time by resource'
    ).mark_boxplot().encode(
        x=alt.X('resource:O', sort=list(df_res_order.index)),
        y=alt.Y('waiting time:Q').title('Waiting time (minutes)')
    ),
    alt.Chart(
        df_res_mean_actdur,
        title='Mean activity duration by resource per activity'
    ).mark_rect().encode(
        x=alt.X('resource:O', sort=list(df_res_order.index)),
        y=alt.Y('activity:N'),
        color='mean_duration:Q',
        tooltip=['resource', 'activity', 'mean_duration']
    )
)