# Toggl Reports Downloader

Script to Extract from Toggl API and create CSV Export of **Latest and Complete Timelogs** as as well as separate exports of Clients, Projects, Workspace Lists. 

Useful for back up purposes or additional data analysis. 

----

### Add Dependencies

In [1]:
import pandas as pd
from datetime import datetime
from dateutil.parser import parse
import time
import pytz

In [2]:
# Toggl Wrapper API 
# https://github.com/matthewdowney/TogglPy
import TogglPy

----

## Authentication

In [3]:
import json

with open("credentials.json", "r") as file:
    credentials = json.load(file)
    toggl_cr = credentials['toggl']
    APIKEY = toggl_cr['APIKEY']

In [4]:
toggl = TogglPy.Toggl()
toggl.setAPIKey(APIKEY) 

-----

## User Data

In [5]:
user = toggl.request("https://www.toggl.com/api/v8/me")

In [6]:
user_id = user['data']['id']

In [7]:
user['data']['fullname']

'Markwkoester'

In [8]:
join_date = parse(user['data']['created_at'])
join_date

datetime.datetime(2013, 2, 12, 13, 6, 33, tzinfo=tzutc())

In [9]:
# today = datetime.now()
def utcnow():
    return datetime.now(tz=pytz.utc)
today = utcnow()
dates = list(pd.date_range(join_date, today))
print("Days Since Joining: " + str(len(dates))) # days since joining

Days Since Joining: 2058


-----

## Clients

In [10]:
user_clients = toggl.request("https://www.toggl.com/api/v8/clients")

In [11]:
clients = pd.DataFrame()
for i in list(range(0, len(user_clients))):
    clients_df_temp = pd.DataFrame.from_dict(user_clients)
    clients = pd.concat([clients_df_temp, clients])

In [12]:
clients.to_csv('data/toggl-clients.csv')

-----

## Workplaces

API Ref: https://github.com/toggl/toggl_api_docs/blob/master/chapters/workspaces.md#get-workspaces

In [13]:
workspaces_list = toggl.request("https://www.toggl.com/api/v8/workspaces")

In [14]:
len(workspaces_list)

3

In [15]:
workspaces = pd.DataFrame.from_dict(workspaces_list)

In [16]:
workspaces_dict = dict(zip(workspaces.id, workspaces.name))

In [17]:
workspaces.to_csv('data/toggl-workspaces.csv')

----

## Workplace Projects

* API Ref: https://github.com/toggl/toggl_api_docs/blob/master/chapters/workspaces.md#get-workspace-projects
* Endpoint: https://www.toggl.com/api/v8/workspaces/{workspace_id}/projects

In [18]:
projects = pd.DataFrame()
for i in list(range(0, len(workspaces_list))):
    projects_list = toggl.request("https://www.toggl.com/api/v8/workspaces/" + str(workspaces_list[i]['id']) + "/projects")
    projects_df_temp = pd.DataFrame.from_dict(projects_list)
    projects = pd.concat([projects_df_temp, projects])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """


In [19]:
len(projects)

49

In [20]:
# map workspace name onto projects
projects['workspace_name'] = projects.wid.map(workspaces_dict)

In [21]:
projects.head(3)

Unnamed: 0,active,actual_hours,at,auto_estimates,billable,color,created_at,guid,hex_color,id,is_private,name,template,wid,workspace_name
0,True,42.0,2018-02-16T10:10:03+00:00,False,False,10,2018-02-16T10:10:03+00:00,,#f1c33f,100370156,True,BioMarker Tracker,False,1234339,My Startup Projects
1,True,56.0,2018-02-16T10:10:17+00:00,False,False,5,2018-02-16T10:10:10+00:00,,#4bc800,100370160,True,PhotoStats App,False,1234339,My Startup Projects
2,True,8.0,2018-02-16T10:10:30+00:00,False,False,8,2018-02-16T10:10:30+00:00,,#3750b5,100370166,True,Podcast Tracker,False,1234339,My Startup Projects


In [22]:
# total time of active projects
projects.actual_hours.sum()

7112.0

In [23]:
projects.to_csv('data/toggl-current-projects.csv')

----

# Collect Yearly Export of Detailed Timelogs

In [24]:
def get_detailed_reports(wid, since, until):  # max 365 days
    uid = user_id
    param = {
        'workspace_id': wid,
        'since': since,
        'until': until,
        'uid': uid
    }
    #print(str(workspace_id) + " " + since)
    toggl.getDetailedReportCSV(param, "data/detailed/toggl-detailed-report-" + wid + "-" + since + "-" + until + ".csv")

In [25]:
# years since joinging
last_year = today.year + 1
years = list(range(join_date.year, last_year))
years

[2013, 2014, 2015, 2016, 2017, 2018]

In [26]:
# list of workspace ids
workspace_ids = []
for i in workspaces_list:
    workspace_ids.append(i['id'])
# workspace_ids

In [27]:
workspace_ids

[341257, 373504, 1234339]

In [28]:
# Generate Detail CSV Tester
workspace_id = "373504"
since = "2017-01-01"
until = "2017-12-31"

get_detailed_reports(workspace_id, since, until)

In [29]:
# generate a yearly report for each workspace
for i in workspace_ids:
    wid = str(i)
    for y in years:
        try: 
            since = str(y) + "-01-01" # "2013-01-01"
            until = str(y) + "-12-31" # "2013-12-31"
            print("Generating CSV... " + "for Workspace: " + str(wid) + " from " + since + " until " + until)
            get_detailed_reports(wid, since, until)            
        except:
            print("ERROR On:  " + str(uid) + " " + str(wid) + " from " + since + " until " + until)

Generating CSV... for Workspace: 341257 from 2013-01-01 until 2013-12-31
Generating CSV... for Workspace: 341257 from 2014-01-01 until 2014-12-31
Generating CSV... for Workspace: 341257 from 2015-01-01 until 2015-12-31
Generating CSV... for Workspace: 341257 from 2016-01-01 until 2016-12-31
Generating CSV... for Workspace: 341257 from 2017-01-01 until 2017-12-31
Generating CSV... for Workspace: 341257 from 2018-01-01 until 2018-12-31
Generating CSV... for Workspace: 373504 from 2013-01-01 until 2013-12-31
Generating CSV... for Workspace: 373504 from 2014-01-01 until 2014-12-31
Generating CSV... for Workspace: 373504 from 2015-01-01 until 2015-12-31
Generating CSV... for Workspace: 373504 from 2016-01-01 until 2016-12-31
Generating CSV... for Workspace: 373504 from 2017-01-01 until 2017-12-31
Generating CSV... for Workspace: 373504 from 2018-01-01 until 2018-12-31
Generating CSV... for Workspace: 1234339 from 2013-01-01 until 2013-12-31
Generating CSV... for Workspace: 1234339 from 2014

-----

## Log of Latest Time Entries for that User 

* API Ref: https://github.com/toggl/toggl_api_docs/blob/master/chapters/time_entries.md#get-time-entries-started-in-a-specific-time-range
* Endpoint: https://www.toggl.com/api/v8/time_entries 
* Note: start_date and end_date must be ISO 8601 date and time strings.

In [30]:
# latest_time_entries from last 9 days
latest_time_entries = toggl.request("https://www.toggl.com/api/v8/time_entries")

In [31]:
len(latest_time_entries)

64

In [32]:
latest_time_entries[-1]

{'at': '2018-10-02T02:58:41+00:00',
 'billable': False,
 'description': 'Using Time Tracking Data Preparations',
 'duration': -1538449120,
 'duronly': False,
 'guid': 'd9a652c833581c6f8f14d5c68bae7146',
 'id': 988833651,
 'pid': 25620514,
 'start': '2018-10-02T02:58:40+00:00',
 'tags': ['ddy'],
 'uid': 440666,
 'wid': 341257}

In [33]:
latest_timelog = pd.DataFrame.from_dict(latest_time_entries)

In [34]:
latest_timelog.tail()

Unnamed: 0,at,billable,description,duration,duronly,guid,id,pid,start,stop,tags,uid,wid
59,2018-10-01T11:26:42+00:00,False,Using Time Data,4649,False,f326a873fe607074f0fc700b6b5a75b8,987893732,2759162,2018-10-01T10:09:11+00:00,2018-10-01T11:26:40+00:00,,440666,341257
60,2018-10-01T12:40:09+00:00,False,Invoices,2192,False,0964403755bc2f5391a3e0b6cdb43b73,988016673,5573084,2018-10-01T12:03:35+00:00,2018-10-01T12:40:07+00:00,,440666,341257
61,2018-10-01T13:08:22+00:00,False,Planning for Travels,1356,False,896b7e72d279873dae82bc3aef5e0651,988070101,2858673,2018-10-01T12:45:44+00:00,2018-10-01T13:08:20+00:00,,440666,341257
62,2018-10-02T02:58:36+00:00,False,Morning Pages,541,False,36487d25826cdb89255f5b1064962d00,988830684,2759162,2018-10-02T02:49:34+00:00,2018-10-02T02:58:35+00:00,,440666,341257
63,2018-10-02T02:58:41+00:00,False,Using Time Tracking Data Preparations,-1538449120,False,d9a652c833581c6f8f14d5c68bae7146,988833651,25620514,2018-10-02T02:58:40+00:00,,[ddy],440666,341257


In [35]:
latest_timelog.head()

Unnamed: 0,at,billable,description,duration,duronly,guid,id,pid,start,stop,tags,uid,wid
0,2018-09-24T03:05:32+00:00,False,Morning Pages - Half Marathon Analysis,2680,False,370f06b3e053946358f87f166edd4a07,981433205,2759162,2018-09-24T02:20:51+00:00,2018-09-24T03:05:31+00:00,,440666,341257
1,2018-09-24T04:55:10+00:00,False,DOC: Personal Data Collection with Code using ...,4746,False,a2fd1e7082e2e04320d0588e3a0cfd2c,981440483,2759162,2018-09-24T03:36:03+00:00,2018-09-24T04:55:09+00:00,[ddy],440666,341257
2,2018-09-24T07:40:15+00:00,False,Slides and RESEARCH: Anaconda Setup and Instal...,4410,False,96c1658d1756e4cddb4b5f14702962de,981501478,2759162,2018-09-24T06:26:43+00:00,2018-09-24T07:40:13+00:00,[ddy],440666,341257
3,2018-09-24T09:30:54+00:00,False,Overview of Data Collection,3735,False,896d6c435de8eaff6905ac8ed66852b4,981614498,2759162,2018-09-24T08:28:38+00:00,2018-09-24T09:30:53+00:00,[ddy],440666,341257
4,2018-09-25T10:11:44+00:00,False,Morning Pages,317,False,eaa91f325bcadae7e42a05dbb134e3a0,982970019,2759162,2018-09-25T10:06:26+00:00,2018-09-25T10:11:43+00:00,,440666,341257


In [36]:
latest_timelog.to_csv('data/toggl-timelog-latest.csv')

-----

# BONUS: Extract Times Entries for Every Single Day Using Toggl API

**NOTE:** A bit of a hackish solution. But this is a possible approach to getting individual day logs. 

In [37]:
extract_date_start = join_date.strftime("%Y-%m-%d") # join date
extract_date_end = today.strftime("%Y-%m-%d") # today

# UNCOMMENT TO Overide Full Extract 
extract_date_start = "2018-05-23"
# extract_date_end = "2018-05-01".strftime("%Y-%m-%d")
# extract_date_end = today.strftime("%Y-%m-%d") # today

# Function that turns datetimes back to strings since that's what the API likes
def date_only(datetimeVal):
      datePart = datetimeVal.strftime("%Y-%m-%d")
      return datePart

# List of Dates of Dates to Extract Time Entries
dates_range = list(pd.date_range(extract_date_start, extract_date_end))
dates_list = [date_only(x) for x in dates_range]

In [38]:
# Extract Timelogs Between Two Dates and Export to a CSV
def toggl_timelog_extractor(input_date1, input_date2):
    date1 = parse(input_date1).isoformat() + '+00:00'
    date2 = parse(input_date2).isoformat() + '+00:00'
    param = {
        'start_date': date1,
        'end_date': date2,
    } 
    try:
        temp_log =  pd.DataFrame.from_dict(toggl.request("https://www.toggl.com/api/v8/time_entries", parameters=param))
        temp_log.to_csv('data/detailed/toggl-time-entries-' + input_date1 + '.csv')
    except: 
        # try again if there is an issue the first time
        temp_log =  pd.DataFrame.from_dict(toggl.request("https://www.toggl.com/api/v8/time_entries", parameters=param))
        temp_log.to_csv('data/daily-detailed/toggl-time-entries-' + input_date1 + '.csv')

In [39]:
# UNCOMMENT to Test Between Two Date
# date1 = '2013-07-23'
# date2 = '2013-07-24'
# toggl_timelog_extractor(date1, date2)

In [40]:
# UNCOMMENT TO RUN
# Extract All Time Entry Data from Previous Days
#for count, item in enumerate(dates_list):
#    if item != dates_list[-1]:
#        date1 = item
#        date2 = (dates_list[count + 1])
#        # print(item + " ~ "+ date2)
#        time.sleep(1)
#        toggl_timelog_extractor(date1, date2)

-----

# Simple Data Analysis  (Using Exported CSV Logs)

In [41]:
import glob
import os

In [42]:
# import all days of time entries and create data frame
path = 'data/detailed/'
allFiles = glob.glob(path + "/*.csv")
timelogs = pd.DataFrame()
list_ = []
for file_ in allFiles:
    df = pd.read_csv(file_,index_col=None, header=0)
    list_.append(df)
timelog = pd.concat(list_)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  if __name__ == '__main__':


In [43]:
timelog.head()

Unnamed: 0,Amount (),Billable,Client,Description,Duration,Email,End date,End time,Project,Start date,Start time,Tags,Task,User
0,,No,,Morning Pages,00:09:01,markwkoester@gmail.com,2018-10-02,10:58:35,Writing,2018-10-02,10:49:34,,,Markwkoester
1,,No,,Planning for Travels,00:22:36,markwkoester@gmail.com,2018-10-01,21:08:20,Organizational Work,2018-10-01,20:45:44,,,Markwkoester
2,,No,,Invoices,00:36:32,markwkoester@gmail.com,2018-10-01,20:40:07,Financials,2018-10-01,20:03:35,,,Markwkoester
3,,No,,Using Time Data,01:17:29,markwkoester@gmail.com,2018-10-01,19:26:40,Writing,2018-10-01,18:09:11,,,Markwkoester
4,,No,,Using Time Data,01:41:13,markwkoester@gmail.com,2018-10-01,18:00:09,Writing,2018-10-01,16:18:56,ddy,,Markwkoester


In [44]:
len(timelog)

17967

In [45]:
# drop unused columns
timelog = timelog.drop(['Email', 'User', 'Amount ()', 'Client', 'Billable'], axis=1)

In [46]:
# helper functions to convert duration string to seconds
def get_sec(time_str):
    h, m, s = time_str.split(':')
    return int(h) * 3600 + int(m) * 60 + int(s)

# get_sec("01:16:36")

def dur2sec(row):
    return get_sec(row['Duration'])

# timelog.apply(dur2sec, axis=1)

In [47]:
timelog['seconds'] = timelog.apply(dur2sec, axis=1)

In [48]:
timelog.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 17967 entries, 0 to 219
Data columns (total 10 columns):
Description    17941 non-null object
Duration       17967 non-null object
End date       17967 non-null object
End time       17967 non-null object
Project        17842 non-null object
Start date     17967 non-null object
Start time     17967 non-null object
Tags           1148 non-null object
Task           0 non-null object
seconds        17967 non-null int64
dtypes: int64(1), object(9)
memory usage: 1.5+ MB


In [49]:
timelog.describe()

Unnamed: 0,seconds
count,17967.0
mean,1928.463628
std,2922.392983
min,0.0
25%,679.0
50%,1342.0
75%,2456.0
max,255420.0


In [50]:
timelog.head()

Unnamed: 0,Description,Duration,End date,End time,Project,Start date,Start time,Tags,Task,seconds
0,Morning Pages,00:09:01,2018-10-02,10:58:35,Writing,2018-10-02,10:49:34,,,541
1,Planning for Travels,00:22:36,2018-10-01,21:08:20,Organizational Work,2018-10-01,20:45:44,,,1356
2,Invoices,00:36:32,2018-10-01,20:40:07,Financials,2018-10-01,20:03:35,,,2192
3,Using Time Data,01:17:29,2018-10-01,19:26:40,Writing,2018-10-01,18:09:11,,,4649
4,Using Time Data,01:41:13,2018-10-01,18:00:09,Writing,2018-10-01,16:18:56,ddy,,6073


In [51]:
timelog.tail()

Unnamed: 0,Description,Duration,End date,End time,Project,Start date,Start time,Tags,Task,seconds
215,BioMarkerDB: Planning and Setup,00:01:25,2018-01-06,15:53:32,Startup Project Misc Work,2018-01-06,15:52:07,,,85
216,BioMarkerDB: Brainstorming,00:22:32,2018-01-05,23:32:28,Startup Project Misc Work,2018-01-05,23:09:56,,,1352
217,VO2 Max Estimator App,00:39:19,2018-01-03,15:16:35,Startup Project Misc Work,2018-01-03,14:37:16,,,2359
218,VO2 Max Estimator App,00:31:46,2018-01-03,14:12:53,Startup Project Misc Work,2018-01-03,13:41:07,,,1906
219,Medical Tourism in Thailand: Research,00:41:24,2018-01-03,13:14:30,Startup Project Misc Work,2018-01-03,12:33:06,,,2484


In [52]:
# Total hours
round((timelog.seconds.sum() / 60 / 60), 1)

9624.6

In [53]:
# total days
round((timelog.seconds.sum() / 60 / 60 / 24), 1)

401.0

In [54]:
timelog.to_csv("data/toggl-detailed-logs-full-export.csv")

-----

## Combine to a Daily Project Time Number

In [55]:
# combine to daily number
daily_project_time = timelog.groupby(['Start date'])['seconds'].sum()
print('{:,} total project time data'.format(len(daily_project_time)))
daily_project_time.to_csv('data/daily_project_time.csv')
daily_project_time.tail(5)

1,924 total project time data


Start date
2018-09-28    30898
2018-09-29    19114
2018-09-30    20571
2018-10-01    34178
2018-10-02      541
Name: seconds, dtype: int64