# Generating Pseudo Time Clock Entries

For the purpose of testing whether a company is complying with labor law,
we do not first assume that employees are perfect automatons,
but instead assume that employees are:
1. Non-uniform with regard to being prompt, early, or late.
2. Relatively normal with regard to precision being within two standard deviations of their promptness.
3. Non-uniform with regard to sloppiness with regard to missing time clock entries.
4. Miss time clock entries based on fatigue or other unexplained/random factors such as distraction or unnoticed machine miss read.

The supporting structures and functions used to generate sets of time clock entries are not specifically special with regard to a generally simply United States labor law standard, but use these standards as well as a three-shift spread over the twenty-four hour day to be sufficiently complicated.

In [1]:
from enum import Enum

class WorkShiftName(Enum):
    graveyard = 0
    day = 1
    afternoon = 2
    
def work_shift_period(work_shift_name):
    switcher = {
        WorkShiftName.graveyard: (0, 8),
        WorkShiftName.day: (8, 16),
        WorkShiftName.afternoon: (16, 24)
    }
    
    return switcher.get(work_shift_name)

In [2]:
from enum import IntFlag

class TimeClockPunchBehaviorFlags(IntFlag):
    default = 0
    
    prompt = 1
    early = 2
    late = 4
    
    precise = 8
    stddev1 = 16
    stddev2 = 32
    
    slop0 = 256
    slop1 = 512
    slop2 = 1024
    slop3 = 2048
    slop4 = 4096
    
def time_clock_punch_behavior(time_clock_punch_behavior_flags):
    flags = time_clock_punch_behavior_flags
    
    if 0 == (flags & TimeClockPunchBehaviorFlags.prompt or
            flags & TimeClockPunchBehaviorFlags.early or
            flags & TimeClockPunchBehaviorFlags.late):
        flags = flags + TimeClockPunchBehaviorFlags.prompt

    if 0 == (flags & TimeClockPunchBehaviorFlags.precise or
            flags & TimeClockPunchBehaviorFlags.stddev1 or
            flags & TimeClockPunchBehaviorFlags.stddev2):
        flags = flags + TimeClockPunchBehaviorFlags.stddev1

    if 0 == (flags & TimeClockPunchBehaviorFlags.slop0 or
            flags & TimeClockPunchBehaviorFlags.slop1 or
            flags & TimeClockPunchBehaviorFlags.slop2 or
            flags & TimeClockPunchBehaviorFlags.slop3 or
            flags & TimeClockPunchBehaviorFlags.slop4):
        flags = flags + TimeClockPunchBehaviorFlags.slop1
        
    return flags

In [3]:
import random

class ShiftWorker:
    def __init__(self, id, work_shift_name, punch_behavior):
        self._id = id
        self._shift_start, self._shift_end = work_shift_period(work_shift_name)
        self._punch_behavior = punch_behavior
        self._punches_to_date = 0.0
        
    def punch_offset(self):
        offset = 0.0
        
        if self._punch_behavior & TimeClockPunchBehaviorFlags.precise:
            offset = random.randrange(0.0, 1.0) / 60.0
        if self._punch_behavior & TimeClockPunchBehaviorFlags.stddev1:
            offset = random.randrange(0.0, 4.0) / 60.0
        if self._punch_behavior & TimeClockPunchBehaviorFlags.stddev2:
            offset = random.randrange(0.0, 9.0) / 60.0

        if self._punch_behavior & TimeClockPunchBehaviorFlags.early:
            return -1.0 * offset
        if self._punch_behavior & TimeClockPunchBehaviorFlags.prompt:
            return 1.0 * offset
        if self._punch_behavior & TimeClockPunchBehaviorFlags.late:
            return 2.0 * offset
        
        return 1.0 * offset
        
    def slop_offset(self):
        if self._punch_behavior & TimeClockPunchBehaviorFlags.slop0:
            return 0
        if self._punch_behavior & TimeClockPunchBehaviorFlags.slop1:
            return 1
        if self._punch_behavior & TimeClockPunchBehaviorFlags.slop2:
            return 2
        if self._punch_behavior & TimeClockPunchBehaviorFlags.slop3:
            return 3
        if self._punch_behavior & TimeClockPunchBehaviorFlags.slop4:
            return 4
        return 0
    
    def punch_miss(self):
        self.punch_inc()
        slop = self.slop_offset()
        # 1 in 100ish w/ fatigue (punches_to_date) and sloppiness (slop)
        balls = 100 - self._punches_to_date - slop
        ball = random.randrange(0, balls)
        return 0 == ball
    
    def punch_inc(self):
        self._punches_to_date = self._punches_to_date + 1

    def punches(self):
        out = []
        
        # clock in
        if not self.punch_miss():
            out.append((self._id, self._shift_start + self.punch_offset()))
            
        # 10m break
        if not self.punch_miss():
            out.append((self._id, self._shift_start + 3 + self.punch_offset()))
        if not self.punch_miss():
            out.append((self._id, self._shift_start + 3 + 10.0 / 60.0 + self.punch_offset()))

        # 30m meal break
        if not self.punch_miss():
            out.append((self._id, self._shift_start + 5 + self.punch_offset()))
        if not self.punch_miss():
            out.append((self._id, self._shift_start + 5 + 10.0 / 60.0 + self.punch_offset()))

        # clock out
        if not self.punch_miss():
            out.append((self._id, self._shift_end + self.punch_offset()))

        return out

In [5]:
def work_shift(i):
    # 2/10 graveyard
    # 3/10 afternoon (aka swing)
    # 5/10 day
    part = i % 10
    if part >= 0 and part < 2:
        return WorkShiftName.graveyard
    if part >= 2 and part < 7:
        return WorkShiftName.day
    return WorkShiftName.afternoon

def worker_behavior(i):
    return TimeClockPunchBehaviorFlags.early + TimeClockPunchBehaviorFlags.stddev2

workers = [ShiftWorker(id, work_shift(id), worker_behavior(id)) for id in range(1, 101)]

wpunches = (worker.punches() for worker in workers)
flatten = lambda l: (item for sublist in l for item in sublist)
punches = sorted(flatten(wpunches), key=lambda punch: punch[1])
print(f"punches: {len(punches)} for {len(workers)} workers w/ {6} punches expected per shift")

punches: 596 for 100 workers w/ 6 punches expected per shift


In [6]:
import csv

with open('punches.csv', 'w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["id", "ts"])
    for punch in punches:
        writer.writerow(punch)

In [7]:
import numpy as np
import pandas as pd

df = pd.read_csv('punches.csv')

In [8]:
df_by_id = df.groupby("id")

In [16]:
missing_punches = df_by_id.filter(lambda x: len(x) < 6)

In [17]:
missing_punches

Unnamed: 0,id,ts
8,40,-0.066667
16,91,-0.016667
24,40,2.9
38,91,3.0
55,91,3.133333
57,40,3.166667
67,40,4.916667
76,91,4.983333
89,91,5.1
95,40,5.133333


In [23]:
[id for id, _ in missing_punches.groupby("id")]

[17, 40, 89, 91]

In [33]:
def missing_punch_type(ts):
    if ts[-1] - ts[0] < 8.0:
        return "b_or_e"
    return "m"

[missing_punch_type(list(s["ts"])) for _id, s in missing_punches.groupby("id")]

['b_or_e', 'b_or_e', 'm', 'b_or_e']

In [73]:
[(id, missing_punch_type(list(ts["ts"]))) for id, ts in missing_punches.groupby("id")]

[(17, 'b_or_e'), (40, 'b_or_e'), (89, 'm'), (91, 'b_or_e')]

In [74]:
# since employee id 89 is missing some middle punches, ie breaks, let's hone in on them
missing_punches.groupby("id").filter(lambda g: g["id"].min() == 89)

Unnamed: 0,id,ts
377,89,15.866667
456,89,18.9
498,89,19.1
555,89,21.133333
595,89,24.0
