In [1]:
import pandas as pd
import numpy as np
import os
import requests
import datetime
from datetime import date, datetime, timedelta
from PyPDF2 import PdfReader, PdfWriter
import holidays
import calendar

In [2]:
# Extracting Clockify workspace ID (for the summary report)
url_base = 'https://api.clockify.me/api/v1/user'
API_KEY = os.environ['Clockify-API-Key']

headers = {'content-type': 'application/json', 'X-Api-Key': API_KEY}
response = requests.get(url_base, headers=headers)
json_response_base = response.json()
json_response_base['id']

workspace_id = json_response_base['activeWorkspace']

In [3]:
# Getting current day, month, and year (for the PDF form)
current_day = datetime.now().day
current_month = datetime.now().month
current_year = datetime.now().year
current_year_short = str(current_year)[-2:] # Last two digits of the year

# Getting first and last day of the previous month (for the summary report)
current_first = date.today().replace(day=1)
prev_last = current_first - timedelta(days=1)
prev_first = prev_last.replace(day=1)
prev_first_str = str(prev_first) # First day of the previous month
prev_last_str = str(prev_last) # Last day of the previous month

# Getting previous month and year (for holidays and the PDF form)
last_date = prev_first.strftime("%Y-%m")
split_date = last_date.split("-")
month_prev = split_date[1]
month_prev_int = int(month_prev) # Previous month
year_prev = split_date[0]
year_prev_int = int(year_prev) # Year of the previous month
year_prev_short = year_prev[-2:] # Last two digits of the year

In [4]:
# Function to get the Clockify summary report for the previous month
def get_summary_report():
    url = f"https://reports.api.clockify.me/v1/workspaces/"+workspace_id+"/reports/summary"
    headers = {'X-Api-Key': API_KEY}
    data = {
        "dateRangeStart": prev_first_str+"T00:00:00",
        "dateRangeEnd": prev_last_str+"T23:59:59",
        "summaryFilter": {
            "groups": [
             "USER",
             "DATE"
            ]
        }
        }

    return requests.post(url, headers=headers, json=data).json()

In [5]:
# Getting the summary report
summary_report = get_summary_report()

In [6]:
# JSON to dataframe
dataframe = pd.json_normalize(summary_report, record_path=['groupOne', 'children'], meta=[['children', 'name']])

In [7]:
# Rename columns
dataframe.rename(columns = {'name':'date', 'children.name':'name'}, inplace=True)

# Calculate duration in hours (decimal form)
dataframe['duration_hours'] = dataframe['duration']/3600
dataframe['duration_hours'] = dataframe['duration_hours'].apply(lambda x:round(x,2)) # Round to 2 decimal places

# Calculate total duration per person
total_hours = dataframe.groupby(['name'])['duration_hours'].transform('sum')
dataframe['total_hours'] = total_hours

# Drop unnecessary columns
dataframe = dataframe.drop(columns = ['amounts', 'amount', '_id', 'duration'])

# Get first name and last name, and initial
sep_names = dataframe['name'].str.split(' ', expand=True)
dataframe['first_name'] = sep_names[0]
dataframe['last_name'] = sep_names[1]
dataframe['last_name2'] = sep_names[2] # For people with two last names
dataframe['initial'] = dataframe['first_name'].astype(str).str[0]

# Fill NaN values with empty string
dataframe.fillna("", inplace=True)

In [8]:
# Transfer hours from weekends and holidays to the closest working day
one_day = timedelta(days=1)
last_day = calendar.monthrange(year_prev_int, month_prev_int)[1]

def next_business_day(date):
    next_day = pd.to_datetime(date) + one_day
    previous_day = pd.to_datetime(date) - one_day
    date_split = date.split("-")
    day = date_split[2]
    day = int(day)
    date = pd.to_datetime(date)
    holidaysNL = holidays.NL(years=year_prev_int)
    if (date.weekday() in holidays.WEEKEND or date in holidaysNL) and (day < (last_day-1)):
        while next_day.weekday() in holidays.WEEKEND or next_day in holidaysNL:
            next_day += one_day
        return next_day
    elif (date.weekday() in holidays.WEEKEND or date in holidaysNL) and (day >= (last_day-1)):
        while previous_day.weekday() in holidays.WEEKEND or previous_day in holidaysNL:
            previous_day -= one_day
        return previous_day
    else:
        return date

In [9]:
# Apply function to dataframe
dataframe['date_weekday'] = dataframe['date'].apply(next_business_day)

In [10]:
# Drop unnecessary columns
dataframe = dataframe.drop(columns=['date'])

# Group rows with the same date
dataframe = dataframe.groupby(['name', 'date_weekday'], as_index=False).agg({'duration_hours':'sum', 'total_hours':'first', 'first_name':'first', 'last_name':'first', 'last_name2':'first', 'initial':'first'})

# Extract day from new date
date_split = dataframe['date_weekday'].astype('str').str.split("-", expand=True)
dataframe['day'] = date_split[2]

In [11]:
# Group dataframe by name
dfs = [y for x, y in dataframe.groupby('name')]

# Get the number of people
people = len(dfs)

# Loop through the number of people
for x in range(people):
    # Get the dataframe for each person
    df = pd.DataFrame(dfs[x])
    
    # Create a dictionary with the day as key and the duration as value
    dict_of_hours = dict(zip(df.day, df.duration_hours))
    
    # Get variables needed to fill in the forms and remove brackets and apostrophes
    first_name = df['first_name'].unique()
    first_name = str(first_name).replace("'", "").replace("[", "").replace("]", "")
    last_name = df['last_name'].unique()
    last_name = str(last_name).replace("'", "").replace("[", "").replace("]", "")
    last_name2 = df['last_name2'].unique()
    last_name2 = str(last_name2).replace("'", "").replace("[", "").replace("]", "")
    
    # Function to add a space before the last name if the person has a second last name
    def last_name2_field(last_name2):
        if last_name2 == "":
            return last_name2
        else:
            return ' '+last_name2
    
    # Function to add an underscore before the last name if the person has a second last name
    def last_name2_title(last_name2):
        if last_name2 == "":
            return last_name2
        else:
            return '_'+last_name2        
     
    last_name2_field = last_name2_field(last_name2) # For the field in the PDF file
    last_name2_title = last_name2_title(last_name2) # For the title of the PDF file
    
    initial = df['initial'].unique()
    initial = str(initial).replace("'", "").replace("[", "").replace("]", "")
    
    total_hours = df['total_hours'].unique()
    total_hours = str(total_hours).replace("[", "").replace("]", "")
    
    # Define the supervisor signing the form
    supervisor = 'Hannes Datta'
    
    # Define reader and writer
    reader = PdfReader("tsh_form.pdf")
    writer = PdfWriter()
    
    # Read pages from the PDF file
    page1 = reader.pages[0]
    page2 = reader.pages[1]
    
    # Fill in the fields in the PDF file
    writer.updatePageFormFieldValues(page1, fields=dict_of_hours)
    writer.updatePageFormFieldValues(page1, {'Naam en voorletters': last_name+last_name2_field+' '+initial, 
                                             'maand1': month_prev, 
                                             'jaar1': year_prev_short,
                                             'tot': total_hours})
    writer.updatePageFormFieldValues(page2, {'dag1': current_day,
                                             'maand2': current_month,
                                             'jaar2': current_year_short,
                                             'Naam gemandateerde leidinggevende': supervisor,
                                             'dag3': current_day,
                                             'maand3': current_month,
                                             'jaar3': current_year_short})
    
    # Add pages
    writer.addPage(page1)
    writer.addPage(page2)
    
    # Save the PDF file
    save_path = './generated_forms/'
    filename = initial+'_'+last_name+last_name2_title+'_'+last_date
    completeName = os.path.join(save_path, filename+".pdf")
    with open(completeName,"wb") as new:
        writer.write(new)