In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import json
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats
import pickle

In [3]:
def create_focusDF(data,filenr):
    
    percentage_goals = []
    goals_achieved = []
    goals_set = []
    broken_Focus = []
    brokenFocus_number = []
    distraction_count = []
    distraction_time = []
    nr_of_services = []
    messages_received_in_focus = []
    focus_ids = []
    nr_of_autoreplies_per_focus = []
    hourOfDay = []
    
    # transform json part to pandas dataframe
    df = pd.json_normalize(data["focusSessions"])
    
    # create new column, which calculates how long a user stayed in focus
    df["minutesInFocus"] = round((df["endTime"] - df["startTime"])/60000,2)
    
    #df["distractionCount"] = 0

    
    for index, row in df.iterrows():
        # calculate percentage of achieved goals
        if len(row['goals']) == 0:
            percentage_goals.append(0)
            goals_set.append(0)
            goals_achieved.append(0)
        else:
            percentage_goals.append(round(len(row['completedGoals'])/len(row['goals']),2))
            goals_set.append(len(row['goals']))
            goals_achieved.append(len(row['completedGoals']))
        
        # calculate broken focus length
        if len(row["brokenFocus"]) == 0:
            broken_Focus.append(0)
            brokenFocus_number.append(0)
        else:
            break_per_focus= 0
            nr_of_breaks = 0
            for element in row["brokenFocus"]:
                if len(element) ==2:
                    break_per_focus += ((element[1]-element[0])/60000)
                    nr_of_breaks +=1
            broken_Focus.append(round(break_per_focus,2))
            brokenFocus_number.append(nr_of_breaks)
            
        # get distractionCount
        if len(row["activeWindows"]) == 0:
            distraction_count.append(0)
            distraction_time.append(0)
        else:
            distractionCountPerFocus = 0
            distractionTime = 0
            for index, element in enumerate(row["activeWindows"]):
                if element["isDistraction"]:
                    distractionCountPerFocus += 1
                    if index < len(row["activeWindows"])-1:
                        #print(index, len(row["activeWindows"]))
                        duration = row["activeWindows"][index+1]["timestamp"] - element["timestamp"]
                    else:
                        #print(row["endTime"])
                        duration = row["endTime"] - element["timestamp"]
                    distractionTime += (duration/60000)
            distraction_count.append(distractionCountPerFocus)
            distraction_time.append(distractionTime)
        
        # get number of used services
        nr_of_services.append(len(row["services"]))
        
        messages_received = 0
        autoreplies = 0
        for element in row["services"]:
            # get number of received messages
            messages_received += len(element["messages"])
            autoreplies += len(element["autoReplied"])
            
        messages_received_in_focus.append(messages_received)
        nr_of_autoreplies_per_focus.append(autoreplies)
        focus_ids.append(row["id"])
        
        hourOfDay.append(pd.Timestamp(row["startTime"],unit = "ms").hour)
            
    df["focusID"] = focus_ids
    df["PSNR"] = filenr[:3]
    df["distractionCount"] = distraction_count
    df["distractionTime"] = distraction_time
    df["numberOfServices"] = nr_of_services
    df["messagesReceivedInFocus"] = messages_received_in_focus 
    df["brokenFocus"] = broken_Focus
    df["nrOfBreaks"] = brokenFocus_number    
    df["percentageCompletedGoals"] = percentage_goals
    df["goalsAchieved"] = goals_achieved
    df["goalsSet"] = goals_set
    df["autoReplied"] = nr_of_autoreplies_per_focus
    df["hourOfDay"] = hourOfDay
    
    
    # focus session which were longer than 6 hours were dropped (2 datapoints, probably participant forgot to close 
    # focus session)
    df = df[(df["minutesInFocus"] < 300) & (df['minutesInFocus']>10)]
    
    # drop columns startTime, endTime, originalEndTime, goals and completedGoals
    df = df.drop(["id","services","calendarSubject",'startTime', 'endTime','originalEndTime','goals','completedGoals',"activeWindows","comments"], axis=1)
    return df

In [4]:
from os import listdir
from os.path import isfile, join

mypath = "./data"
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

df = pd.DataFrame()

for i in onlyfiles:
    with open(f'data/{i}', 'r') as outfile:
        data = json.load(outfile)
        df_temp = create_focusDF(data,i)
        df = pd.concat([df,df_temp])
        

df = df.reset_index()

In [5]:
df

Unnamed: 0,index,brokenFocus,rating,scheduled,appVersion,minutesInFocus,focusID,PSNR,distractionCount,distractionTime,numberOfServices,messagesReceivedInFocus,nrOfBreaks,percentageCompletedGoals,goalsAchieved,goalsSet,autoReplied,hourOfDay
0,0,0.00,6,False,exman,40.00,0d40ff22-a6e2-472c-b2e8-f3632adb08f0,p01,0,0.0,2,1,0,0.62,5,8,0,8
1,1,1.23,4,False,exman,25.00,ef98194e-2687-4985-bcdb-16c9d8937b2d,p01,0,0.0,2,0,1,0.33,1,3,0,9
2,2,0.00,,False,exman,55.00,95fbdff0-1a8d-4691-b0b7-e7033fa853cd,p01,0,0.0,2,0,0,1.00,4,4,0,10
3,3,0.00,5,False,exman,15.00,1e035bbf-b591-46df-ad39-f2928e546469,p01,0,0.0,2,1,0,0.00,0,1,0,17
4,4,0.00,5,False,exman,35.00,1af896b8-5224-4ac0-a258-b63c51f1ffc6,p01,0,0.0,3,0,0,0.00,0,1,0,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
222,12,0.00,7,False,exman,120.00,571eb507-7bef-4612-98e5-122202665e7c,p17,0,0.0,4,4,0,0.50,1,2,0,19
223,13,0.00,7,False,exman,30.00,c1a0898d-f8a5-46c8-87fd-32b038a96d5e,p17,0,0.0,4,0,0,1.00,1,1,0,0
224,14,0.00,4,False,exman,90.00,e235bd8f-4ba9-4c45-a3e5-57f8e6627864,p17,0,0.0,4,0,0,0.50,1,2,0,23
225,15,1.62,7,False,exman,107.61,99cb53b0-40e8-4970-8ca5-f628d3dca1c9,p17,0,0.0,4,1,3,1.00,2,2,0,14


In [6]:
pomodoro = df[(df['appVersion'] == "pomodoro")]
exman = df[(df['appVersion'] == "exman")]

In [7]:
pomodoro

Unnamed: 0,index,brokenFocus,rating,scheduled,appVersion,minutesInFocus,focusID,PSNR,distractionCount,distractionTime,numberOfServices,messagesReceivedInFocus,nrOfBreaks,percentageCompletedGoals,goalsAchieved,goalsSet,autoReplied,hourOfDay
18,18,0.0,,False,pomodoro,25.00,1adb005c-9585-4c60-96ab-a5b187ce04b0,p01,0,0.0,3,1,0,1.00,2,2,0,11
19,19,0.0,6,False,pomodoro,25.00,83905b46-b51b-4975-8902-c72ce3e33c01,p01,0,0.0,3,0,0,1.00,2,2,0,12
20,20,0.0,,False,pomodoro,110.91,25f3ce1d-e67f-472a-a475-f8d4142d714e,p01,0,0.0,3,16,0,1.00,3,3,0,13
21,21,0.0,4,False,pomodoro,40.00,1d0fd1f8-97c9-4b4d-9a5d-47620943a4d3,p01,0,0.0,3,0,0,0.00,0,4,0,7
22,22,0.0,6,False,pomodoro,25.00,41399a10-751f-4d51-8805-d31f3419fcf9,p01,0,0.0,3,0,0,0.00,0,0,0,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
215,5,0.0,5,False,pomodoro,30.00,8de361ee-9801-4282-ba06-82d90edd4994,p17,0,0.0,4,0,0,1.00,1,1,0,3
216,6,0.0,5,False,pomodoro,57.82,f19776a6-e5e4-4250-8f9f-347012fca735,p17,0,0.0,4,0,0,0.50,1,2,0,16
217,7,0.0,6,False,pomodoro,120.00,435981ca-a369-4bfc-8a29-259aaccc4d58,p17,0,0.0,4,6,0,0.67,2,3,0,0
218,8,0.0,5,False,pomodoro,60.00,255d9c15-aaf9-48c1-a145-1e5aa4f6a8f3,p17,0,0.0,4,2,0,0.00,0,1,0,17


In [8]:
exman

Unnamed: 0,index,brokenFocus,rating,scheduled,appVersion,minutesInFocus,focusID,PSNR,distractionCount,distractionTime,numberOfServices,messagesReceivedInFocus,nrOfBreaks,percentageCompletedGoals,goalsAchieved,goalsSet,autoReplied,hourOfDay
0,0,0.00,6,False,exman,40.00,0d40ff22-a6e2-472c-b2e8-f3632adb08f0,p01,0,0.0,2,1,0,0.62,5,8,0,8
1,1,1.23,4,False,exman,25.00,ef98194e-2687-4985-bcdb-16c9d8937b2d,p01,0,0.0,2,0,1,0.33,1,3,0,9
2,2,0.00,,False,exman,55.00,95fbdff0-1a8d-4691-b0b7-e7033fa853cd,p01,0,0.0,2,0,0,1.00,4,4,0,10
3,3,0.00,5,False,exman,15.00,1e035bbf-b591-46df-ad39-f2928e546469,p01,0,0.0,2,1,0,0.00,0,1,0,17
4,4,0.00,5,False,exman,35.00,1af896b8-5224-4ac0-a258-b63c51f1ffc6,p01,0,0.0,3,0,0,0.00,0,1,0,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
222,12,0.00,7,False,exman,120.00,571eb507-7bef-4612-98e5-122202665e7c,p17,0,0.0,4,4,0,0.50,1,2,0,19
223,13,0.00,7,False,exman,30.00,c1a0898d-f8a5-46c8-87fd-32b038a96d5e,p17,0,0.0,4,0,0,1.00,1,1,0,0
224,14,0.00,4,False,exman,90.00,e235bd8f-4ba9-4c45-a3e5-57f8e6627864,p17,0,0.0,4,0,0,0.50,1,2,0,23
225,15,1.62,7,False,exman,107.61,99cb53b0-40e8-4970-8ca5-f628d3dca1c9,p17,0,0.0,4,1,3,1.00,2,2,0,14


In [9]:
df.to_pickle("./pickle/df.pkl")
exman.to_pickle("./pickle/exman.pkl")
pomodoro.to_pickle("./pickle/pomodoro.pkl")

In [10]:
exman_top50 = exman.sort_values("messagesReceivedInFocus").tail(25)

In [11]:
pomodoro_top50 = pomodoro.sort_values("messagesReceivedInFocus").tail(25)

In [12]:
# productivity per version and participants needs to be fixed
df_top100 = pd.concat([
    pomodoro_top50,
    exman_top50
],axis=0)
#df_top100 = df_top100.reset_index()

In [13]:
df_top100

Unnamed: 0,index,brokenFocus,rating,scheduled,appVersion,minutesInFocus,focusID,PSNR,distractionCount,distractionTime,numberOfServices,messagesReceivedInFocus,nrOfBreaks,percentageCompletedGoals,goalsAchieved,goalsSet,autoReplied,hourOfDay
53,2,0.0,5.0,False,pomodoro,54.62,5605c4cc-e5c1-4832-93bf-2988d178813d,p07,0,0.0,5,1,0,1.0,1,1,0,8
154,26,0.0,,False,pomodoro,25.0,55930028-53aa-4a99-aa47-4896ffd523e2,p09,0,0.0,4,1,0,0.0,0,1,1,17
23,23,0.0,6.0,False,pomodoro,102.71,2976cf4b-9ae4-47d5-b17a-9c72ba3e69cc,p01,0,0.0,3,2,0,1.0,4,4,0,8
213,3,0.0,7.0,False,pomodoro,55.0,e210b4af-1cc1-4f51-97ae-5a27179382c2,p17,0,0.0,4,2,0,0.5,1,2,0,1
212,2,49.93,4.0,False,pomodoro,80.0,6c6fa9a7-315c-4075-8f7c-8363fc5de7bb,p17,0,0.0,4,2,2,0.33,2,6,0,20
211,1,11.15,5.0,False,pomodoro,53.32,c4b0771f-3ff2-4711-9aad-929e5f4c69d0,p17,0,0.0,5,2,7,1.0,4,4,0,16
218,8,0.0,5.0,False,pomodoro,60.0,255d9c15-aaf9-48c1-a145-1e5aa4f6a8f3,p17,0,0.0,4,2,0,0.0,0,1,0,17
209,24,0.0,6.0,False,pomodoro,25.0,a2a3b0d2-93b3-4acc-b156-5a0096bdf238,p12,16,7.920617,4,2,0,0.0,0,2,0,21
33,34,0.0,,False,pomodoro,25.0,ba129272-2a0d-4bbc-9fef-e3790cfce4be,p01,4,0.300233,3,2,0,0.0,0,0,0,9
47,13,39.1,3.0,False,pomodoro,40.0,f3b3f7e7-d99f-4a05-aa48-121acf63153d,p06,0,0.0,4,2,1,1.0,1,1,0,14


In [14]:
df_top100.to_pickle("./pickle/df_top100.pkl")
exman_top50.to_pickle("./pickle/exman_top50.pkl")
pomodoro_top50.to_pickle("./pickle/pomodoro_top50.pkl")