In [34]:
import pandas as pd
import numpy as np
import os
import glob

path = os.getcwd()
csv_files = glob.glob(os.path.join(path, "data/*.csv"))

In [44]:
dataframes = []

i = 0
for f in csv_files:

    # read the csv file
    df = pd.read_csv(f)
    
    # skip setup files
    if len(df) < 2:
        continue
    
    # add UUID as column
    df["participantUuid"] = os.path.splitext(f)[0][-36:]
    
    # convert timestamps to datetime
    df["timestamp"] = pd.to_datetime(df["timestamp"])

    # convert timestamps to relative time span
    if "didUpdateDailyBudget" in df.columns:
        # get first daily budget update, which indicates study setup
        updates = df[~df["didUpdateDailyBudget"].isna()]
        first_setup = updates["timestamp"].min()
        
        df["secondsSinceStudySetup"] = (df["timestamp"] - first_setup) // pd.Timedelta('1s')
    else:
        df["secondsSinceStudySetup"] = (df["timestamp"] - df["timestamp"].min()) // pd.Timedelta('1s')
        
    df["weeksSinceStudySetup"] = df["secondsSinceStudySetup"].apply(lambda x: x / (604800)).apply(np.ceil).astype("Int64")
    df["secondsSinceStudySetup"] = df["secondsSinceStudySetup"].astype("Int64")
    
    # append to array
    dataframes.append(df)

In [45]:
# concatenate data frames
merged = pd.concat(dataframes)

display(merged)

Unnamed: 0,resolution,timestamp,interventionDuration,interventionType,app,purpose,participantUuid,secondsSinceStudySetup,weeksSinceStudySetup,startOfWeek,pre_study_healthyAlternativesEnabled,pre_study_showUsageStats,pre_study_interventionsSelected,didUpdateDailyBudget,terminationReason
0,dismissedAppOpening,2023-02-27 13:51:55.931578112,6.0,minimalBreathingExercise,instagram,,B6E779A4-8559-4419-8049-9A169882B7DD,0,0,,,,,,
1,dismissedAppOpening,2023-02-27 13:52:21.650917120,6.0,minimalBreathingExercise,instagram,,B6E779A4-8559-4419-8049-9A169882B7DD,25,1,,,,,,
2,dismissedAppOpening,2023-02-27 15:00:42.015766016,6.0,minimalBreathingExercise,clashOfClans,,B6E779A4-8559-4419-8049-9A169882B7DD,4126,1,,,,,,
3,dismissedAppOpening,2023-02-27 15:01:17.642582016,6.0,minimalBreathingExercise,clashOfClans,,B6E779A4-8559-4419-8049-9A169882B7DD,4161,1,,,,,,
4,dismissedAppOpening,2023-02-27 15:01:35.701256960,6.0,minimalBreathingExercise,clashOfClans,,B6E779A4-8559-4419-8049-9A169882B7DD,4179,1,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,closedApp,2023-03-09 12:27:44.682811136,,,youtube,,EC55AC57-0AE9-431E-A63D-BDC5BCAB9DB2,216566,1,,,,,,
85,openedApp,2023-03-09 21:55:43.585468928,6.0,minimalBreathingExercise,instagram,Checking Messages,EC55AC57-0AE9-431E-A63D-BDC5BCAB9DB2,250645,1,,,,,,
86,closedApp,2023-03-09 21:57:24.257032960,,,instagram,,EC55AC57-0AE9-431E-A63D-BDC5BCAB9DB2,250745,1,,,,,,
87,closedApp,2023-03-09 22:00:06.163576064,,,instagram,,EC55AC57-0AE9-431E-A63D-BDC5BCAB9DB2,250907,1,,,,,,


In [46]:
# save to csv
merged.to_csv("merged.csv")