In [None]:
import pandas as pd
import os

In [None]:
def get_app_usage_from_csv(file_path):
    """
    Reads the app usage data from the csv file and returns the dataframe.
    """
    df = pd.read_csv(file_path)
    df = df[df.type.isin(['MOVE_TO_FOREGROUND', 'MOVE_TO_BACKGROUND'])]  
    df.set_index('timestamp', drop=True, inplace=True) 
    df['timestamp'] = pd.to_datetime(df.index, unit='ms') 
    
    df['start_timestamp'] = df.index
    df['end_timestamp'] = df.index
    end_time = df.index.copy()
    end_time = end_time[1:]
    df = df[:len(df)-1]
    df['end_timestamp'] = end_time
    df = df[df.type.isin(['MOVE_TO_FOREGROUND'])]  
    
    df = df[['name', 'packageName', 'start_timestamp', 'end_timestamp']]
    
    df['start_timestamp'] = pd.to_datetime(df.start_timestamp, unit='ms', utc=True).dt.tz_convert('Asia/Seoul')
    df['end_timestamp'] = pd.to_datetime(df.end_timestamp, unit='ms', utc=True).dt.tz_convert('Asia/Seoul')
    
    df['duration'] = (df.end_timestamp - df.start_timestamp).dt.total_seconds()
    
    df = df.groupby(['timestamp']).first()
    return df




In [None]:
def get_files_with_event_entity(user_id):
    """
    Returns all filenames starting with AppUsageEventEntity in the data folder.
    """
    files = [f for f in os.listdir(f'./users/{user_id}') if f.startswith('AppUsageEventEntity')]
    return files
    

In [None]:
def read_app_usage_for_user_id(user_id):
    """
    Reads all AppUsageEventEntity files for a user id and returns a dataframe.
    """
    files = get_files_with_event_entity(user_id)
    df = pd.DataFrame()
    for file in files:
        df = pd.concat([df, get_app_usage_from_csv(f'./users/{user_id}/{file}')])
    df['user_id'] = user_id
    df['hour'] = df.start_timestamp.dt.hour
    return df


In [None]:
def get_user_folders():
    """
    Returns all filenames starting with AppUsageEventEntity in the data folder.
    """
    files =  os.listdir(f'./users')
    return files
    
    

In [None]:
def read_all_folders():
    user_folders = get_user_folders()
    df = pd.DataFrame()
    for folder in user_folders:        
        df = pd.concat([df,  read_app_usage_for_user_id(folder)])
    return df

In [None]:
app_usage_data = read_all_folders()
app_usage_data.info()
# app_usage_data.groupby(['name', 'hour']).nunique().reset_index().sort_values(by='hour').head(15) 


In [None]:
app_usage_data.to_csv('./app_usage_data2.csv')