In [None]:
import json
import pandas as pd
from pathlib import Path
from datetime import datetime
import numpy as np
from utils import UUIDS, UUID2ID_DICT

In [None]:
filepath = Path('test.json')

with open(filepath, 'r') as fp:
    data = json.load(fp)
    
ema_data = data

In [None]:
rows = []

for uuid, uuid_data in ema_data.items():
    package_name = uuid_data.get('package_name', None)
    for date, date_data in uuid_data.items():
        if date != 'package_name' and date != 'app_name':
            if isinstance(date_data, dict) and len(date_data.keys()) == 1:
                for nested_id, nested_data in date_data.items():
                    # Check type of nested_data
                    if not isinstance(nested_data, dict):
                        print(f"Unexpected data type for nested_data: {type(nested_data)}, {nested_data}")
                        continue
                    row = {
                        'UUID': uuid,
                        'date': date,
                        'package': package_name,
                        **nested_data
                    }
                    rows.append(row)
            else:
                # Check type of date_data
                if not isinstance(date_data, dict):
                    print(f"Unexpected data type for date_data: {type(date_data)}, {date_data}")
                    continue
                row = {
                    'UUID': uuid,
                    'date': date,
                    'package': package_name,
                    **date_data
                }
                rows.append(row)

df = pd.DataFrame(rows)


In [None]:
df.head()

In [None]:
def is_valid_uuid(uuid):
    return uuid in UUID2ID_DICT

In [None]:
df['date_'] = df['date'].str[:15]
df = df[df['UUID'].apply(is_valid_uuid)]
df['question1'] = df['question1'].apply(lambda x: str([x]))
df = df.drop_duplicates(subset=['UUID', 'date_', 'question1', 'question4'])

In [None]:
df['PID'] = df['UUID'].apply(lambda x: UUID2ID_DICT[x] if x in UUID2ID_DICT.keys() else "NA")
col = df.columns[-1] 
last_column = df.pop(col)

df.insert(1, col, last_column)

In [None]:
df['datetime'] = pd.to_datetime(df['date'], utc=True)
df = df.sort_values(by=['PID', 'datetime'], ascending=True)

In [None]:
df.to_csv(f'./{datetime.now().strftime("%Y-%m-%d")}_ema.csv')

# split date
df['date'] = df['date'].apply(lambda x: x.split())

# get today's date 
date = datetime.today()

month = date.strftime("%b")
day = date.strftime("%d")
year = date.strftime("%Y")



In [None]:
# create column in df that shows if the entry is for the current day
jul = ['Jul'] * 17
aug = ['Aug'] * 31
months = jul + aug

juldays = np.arange(15, 32, 1)
augdays = np.arange(1, 32, 1)
days = np.concatenate((juldays, augdays), axis=0)

for i in range(len(months)):
    print(f"{months[i]} {days[i]} 2023")
    df['today'] = df['date'].apply(lambda x: x[1] == months[i] and x[2] == days[i] and x[5] == '2023')

    # filter rows that don't have today's entries
    today_df = df[df['today']]

    # print ids of everyone that doesn't have a response
    for key in UUID2ID_DICT.keys():
        if key not in today_df['UUID'].values:
            print(f"{UUID2ID_DICT[key]}")

In [None]:
today_df