## Timediaries

In [None]:
import pandas as pd

In [None]:
diaries = pd.read_csv('SHB_data_luisa/timediariesanswers.csv')

In [None]:
diaries.head()

In [None]:
diaries = diaries.drop(["experimentid", "userid", "instanceid", "notificationtimestamp", "answerduration", "day", "answertimestamp", "payload"], axis=1)

In [None]:
diaries
# instancetimestamp --> time in which the notification was sent
# delta --> the difference between answer and notification times, in milliseconds 
# answer --> json with the answers

## Timestamp preprocessing

In [None]:
diaries['instancetimestamp'].isnull().values.any()

In [None]:
# remname "instancetimestamp" as "timestamp"
diaries = diaries.rename(columns={"instancetimestamp": "timestamp"})

In [None]:
diaries

In [None]:
# remove seconds and decimals
diaries.timestamp = diaries.timestamp.astype(str)
diaries.timestamp = diaries.timestamp.apply(lambda x: x[0:12])
diaries

##

##  Answer processing: identifying all possible cases:

### Four question time diaries (what, where, with who, mood)

In [None]:
diaries.answer[400] # filled

### Expired time diaries

In [None]:
diaries.answer[4] # expired

Filled time diaries and expired time diaries, both present 4 look-like dictionary elements, thus can be treated as a single case (i.e. in the case of expired time diaries, the "what" answer will be "expired", the "with who" and "where" answers will be "null" and the "mood" will be 0).

### Snoozed time diaries

In [None]:
diaries.answer[0] # snoozed

### Food and drinks questions (asked two hours outside the main meals)

In [None]:
diaries.answer[37] # one/two questions (about food and drinks)

In [None]:
diaries[37:38] # at midnight

### Start of the day (8am)

In [None]:
'''
i=0
for t in diaries.timestamp:
    i+=1
    if t.endswith('0800'):
        print(diaries[i:i+1])
'''

In [None]:
diaries.answer[701]  # two questions (sleep quality & expectations on the day)

In [None]:
diaries[701:702]

### End of the day time diaries (two/three questions at 10pm)

- question at 10:00 PM:
asked (a) to rate their day; (b) if they had any problems during the day and (c) how they solve them.

In [None]:
diaries.answer[43] # how was your day aka daily mood / did you had problem today

In [None]:
diaries[43:44] # at 10pm

### Other 3 particular cases:

1. When the answer to "what are you doing?" is "Travelling", different questions are asked after, namely "what is the reson of the travel?" and "which means of transport you are using?".

2. If the answer to "what are you doing?" is "Sport", besides "where", "with who" and "mood", there is another question added ("which sport?").

3. If the answer to "what are you doing?" is "Eating", besides "where", "with who" and "mood", there is another question added ("what are you eating?")

## Testing time diaries recoding

In [None]:
a = diaries.answer[38]

import re
# regular expression
# re.findall("START.*?(?:END|ALTERNATIVE_END|ALTERNATIVE_END2)", s)
re.findall("{.*?(?:})", a)

In [None]:
r = re.findall("{.*?(?:})", a) # list of strings, where each string is in reality dictionary-looklike

import json
# convert dictionary string to dictionary
res = []
for stringa in r:
    res.append(json.loads(stringa))

In [None]:
res

In [None]:
type(res[0])

In [None]:
res[0]["cnt"]

## Recode time diaries

In [None]:
import json
import re

# what, where, with who, mood
what_lst = [] # first question of filled, expired and snoozed here
where_lst = []
with_who_lst = []
mood_lst = []
    
# 8am questions 
sleep = []
pred_day = []
    
# 10pm questions 
daily_mood = []
probl = []
solution = []
    
# food/drinks during breaks
yn = []
food = []
    
for a in diaries.answer:
    # convert answer in list of dictionaries
    r = re.findall("{.*?(?:})", a)
    res = []
    for stringa in r:
        res.append(json.loads(stringa))
    
    # filled or expired
    if len(res)==4:
        what_lst.append(res[0]["cnt"])
        if res[1]["cnt"] == 'null':
            # expired
            where_lst.append('NaN')
            with_who_lst.append('NaN')
            mood_lst.append('NaN')
        else:
            # filled
            where_lst.append(res[1]["cnt"])
            with_who_lst.append(res[2]["cnt"])
            mood_lst.append(res[3]["cnt"])
        # all the other null
        sleep.append('NaN')
        pred_day.append('NaN')
        daily_mood.append('NaN')
        probl.append('NaN')
        solution.append('NaN')
        yn.append('NaN')
        food.append('NaN')
        
    elif len(res)>4 and res[0]['cnt']=='Eating':
        # if what==eating, there is another question added ("what are you eating?")
        what_lst.append(res[0]["cnt"])
        where_lst.append(res[-3]["cnt"])
        with_who_lst.append(res[-2]["cnt"])
        mood_lst.append(res[-1]["cnt"])
        f = []
        for el in range(len(res[-len(res):-3])):
            if res[el]["cnt"] != 'Eating':
                f.append(res[el]["cnt"])
        food.append(f)
        # all the other null
        sleep.append('NaN')
        pred_day.append('NaN')
        daily_mood.append('NaN')
        probl.append('NaN')
        solution.append('NaN')
        yn.append('NaN')
    
    elif len(res)>4 and res[0]['cnt']=='Sport':
        # if what==sport, there is another question added ("which sport?")
        what_lst.append(res[0]["cnt"] + ': ' + res[1]["cnt"])
        where_lst.append(res[2]["cnt"])
        with_who_lst.append(res[3]["cnt"])
        mood_lst.append(res[4]["cnt"])
        # all the other null
        sleep.append('NaN')
        pred_day.append('NaN')
        daily_mood.append('NaN')
        probl.append('NaN')
        solution.append('NaN')
        yn.append('NaN')
        food.append('NaN')
        
    
    elif len(res)==1:
        # either food or snoozed
        if res[0]["cnt"]=='No':
            # it means it's a food/drink question -> first answer is "no" food, thus "what food" not asked
            yn.append('No')
            food.append('NaN')
            # all the other null
            sleep.append('NaN')
            pred_day.append('NaN')
            daily_mood.append('NaN')
            probl.append('NaN')
            solution.append('NaN')
            what_lst.append('NaN')
            where_lst.append('NaN')
            with_who_lst.append('NaN')
            mood_lst.append('NaN')
        else:
            # it's a snoozed question
            what_lst.append(res[0]["cnt"])
            where_lst.append('NaN')
            with_who_lst.append('NaN')
            mood_lst.append('NaN')
            # all the other null
            sleep.append('NaN')
            pred_day.append('NaN')
            daily_mood.append('NaN')
            probl.append('NaN')
            solution.append('NaN')
            yn.append('NaN')
            food.append('NaN')
    
    elif len(res)==2:
        if res[0]["cnt"].startswith('Yes'):
            # it means it's a food/drink question -> first answer is "yes", thus also asked "what food" 
            yn.append('Yes')
            food.append(res[1]["cnt"])
            # all the other null
            sleep.append('NaN')
            pred_day.append('NaN')
            daily_mood.append('NaN')
            probl.append('NaN')
            solution.append('NaN')
            what_lst.append('NaN')
            where_lst.append('NaN')
            with_who_lst.append('NaN')
            mood_lst.append('NaN')
            
        elif res[1]["cnt"]=='No':
            # 10pm question about the day
            # the answer to the 2nd question is Yes/No in the case the question was "did you have any problem today"
            # and the answer was "no", so the 3rd question about the solution found was not asked
            daily_mood.append(res[0]["cnt"])
            probl.append(res[1]["cnt"])
            solution.append('NaN')
            # all the other null
            sleep.append('NaN')
            pred_day.append('NaN')
            what_lst.append('NaN')
            where_lst.append('NaN')
            with_who_lst.append('NaN')
            mood_lst.append('NaN')
            yn.append('NaN')
            food.append('NaN')
        else: 
            # last case when len=2: it's a 8am question
            sleep.append(res[0]["cnt"])
            pred_day.append(res[1]["cnt"])
            # all the other null
            what_lst.append('NaN')
            where_lst.append('NaN')
            with_who_lst.append('NaN')
            mood_lst.append('NaN')
            yn.append('NaN')
            food.append('NaN')
            daily_mood.append('NaN')
            probl.append('NaN')
            solution.append('NaN')
        
    else: # len(res)==3
        # it's a 10pm question, the answer to the "did you have problem today" questionn was "yes",
        # thus the 3rd question about what solution you found was also asked
        if res[0]['cnt']=='Travelling':
            # when what==travelling it asked for the reason why you are travelling and the means of transport
            what_lst.append(res[0]['cnt'] + ' for: ' + res[1]['cnt'] + ', ' + res[2]['cnt'])
            daily_mood.append('NaN') 
            probl.append('NaN')
            solution.append('NaN')
        else:
            daily_mood.append(res[0]["cnt"]) 
            probl.append(res[1]["cnt"])
            solution.append(res[2]["cnt"])
            what_lst.append('NaN')
        # all the other null
        sleep.append('NaN')
        pred_day.append('NaN')
        where_lst.append('NaN')
        with_who_lst.append('NaN')
        mood_lst.append('NaN')
        yn.append('NaN')
        food.append('NaN')

In [None]:
# add columns to dataframe
diaries['what'] = what_lst
diaries['where'] = where_lst
diaries['with_who'] = with_who_lst
diaries['mood'] = mood_lst

# 8am questions 
diaries['sleep_quality'] = sleep 
diaries['pred_day'] = pred_day
    
# 10pm questions 
diaries['daily_mood'] = daily_mood 
diaries['problem'] = probl 
diaries['solution'] = solution 
    
# food/drinks during breaks
diaries['yn_food'] = yn 
diaries['food'] = food 

In [None]:
diaries

In [None]:
# remove column with answers before cleansing
diaries = diaries.drop(["answer"], axis=1)
diaries

## Recodify mood: from emoji to number (add in a new column)

In [None]:
diaries.mood.unique()

In [None]:
num_mood = []
# 0 ; '🙁' = 1, '😐' = 2, '🙂' = 3, '😁' = 4
for a in diaries.mood:
    if a == '😐':
        num_mood.append(2)
    elif a == '🙂':
        num_mood.append(3)
    elif a == '🙁':
        num_mood.append(1)
    elif a == '😁':
        num_mood.append(4)
    else: # a == 'NaN'
        num_mood.append('NaN')
# add column to df
diaries['num_mood'] = num_mood

In [None]:
diaries

## Save processed dataset

In [None]:
# save
diaries.to_csv('timediaries.csv')