# Load libraries

In [1]:
import numpy as np
import pandas as pd
from pandasql import sqldf
import json

# Configurations & Constants

In [2]:
user = 'participant1' # participant1-10
target_freq_as_int = 15 # 15|1
target_freq_unit1 = 'min' # min|s
target_freq_unit2 = 'm' # m|s
dataset_type = '' # '' | time_series_

In [3]:
# participant information
#participant_dictionary = json.load(open(f'./data/participant_dictionary.json'))

target_freq = f'{target_freq_as_int}{target_freq_unit1}'
target_freq2 = f'{target_freq_as_int}{target_freq_unit2}'

user_id = 3290 #participant_dictionary[user]['fonlog_id']
start_date = "2025-02-01" #participant_dictionary[user]['start_date']
end_date_plus_one = "2025-02-11" #participant_dictionary[user]['end_date_plus_one']
end_date_plus_two = "2025-02-12" #participant_dictionary[user]['end_date_plus_two']

color = {
    "boxes": "Blue",
    "whiskers": "Black",
    "medians": "Red",
    "caps": "Gray"
}

wo_columns = {
    "Timestamp": "timestamp",
    "Heart Rate (in Beats per minute)": "heart_rate",
    "Stress Score": "stress_score",
    "Stress Interpretation": "stress_level",
    "Number of Steps": "steps",
    "Wearing Off": "wearing_off",
    "started_at": "wo_start",
    "finished_at": "wo_end",
    "Tremors": "wo_tremors",
    "Slowing down of movement": "wo_slowdown",
    "Change in mood or depression": "wo_moodchange",
    "Rigidity of muscles": "wo_rigidity",
    "Sharp pain or prolonged dull pain": "wo_pain",
    "Impairment of complex movements of the hand and fingers": "wo_impairment_hands",
    "Difficulty integrating thoughts or slowing down of thought": "wo_slow_thoughts",
    "Anxiety or panic attacks": "wo_anxiety",
    "Muscle spasm": "wo_muscle_spasm",
    "activity_target.activity_id": "report_id"
}

drug_intake_columns = {
    "started_at": "drug_intake_start",
    "finished_at": "drug_intake_end",
    "Sharp pain or prolonged dull pain": "drug_intake_tremors",
    "Tremors": "drug_intake_slowdown",
    "Anxiety or panic attacks": "drug_intake_moodchange",
    "Rigidity of muscles": "drug_intake_rigidity",
    "Slowing down of movement": "drug_intake_pain",
    "Difficulty integrating thoughts or slowing down of thought": "drug_intake_impairment_hands",
    "Impairment of complex movements of the hand and fingers": "drug_intake_slow_thoughts",
    "Change in mood or depression": "drug_intake_anxiety",
    "Muscle spasm": "drug_intake_muscle_spasm"
}

symptoms_dictionary = {
    "ふるえる": "Tremors",
    "動作が遅くなる": "Slowing down of movement",
    "気分が変化する、または落ち込む": "Change in mood or depression",
    "体のどこかがこわばる": "Rigidity of muscles",
    "するどい痛み、または長く続くこぶい痛みがある": "Sharp pain or prolonged dull pain",
    "手先の細かい作業がうまくできない": "Impairment of complex movements of the hand and fingers",
    "思考がまとまらない、または頭の回転がおそい": "Difficulty integrating thoughts or slowing down of thought",
    "不安になる、またはパニック状態になる": "Anxiety or panic attacks",
    "筋肉がひきつる": "Muscle spasm"
}

# FonLog

## Process wearing-off dataset

In [4]:
activity_type_id = 2988 # wearing-off's

# Load from file
# fonlog_data = pd.read_excel(f'./data/fonlog/records ABC.xlsx',
fonlog_data = pd.read_excel(f'./records.xlsx',
                           sheet_name='records', engine='openpyxl')
# Make sure to use datetime data type
fonlog_data['activity.started_at'] = pd.to_datetime(fonlog_data['activity.started_at'])
fonlog_data['activity.finished_at'] = pd.to_datetime(fonlog_data['activity.finished_at'])

filtered_fonlog_data = fonlog_data.loc[
    #(fonlog_data['activity_target.customer_id'] == user_id) &
    (fonlog_data['activity_target.user_id'] == user_id) &
    #(fonlog_data['activity_type_group.name'] == 'Wearing-Off for PD') &
    #(fonlog_data['activity_type_group.name'] == 'Wearing-off Questionnaire (WOQ)') &
    (fonlog_data['activity_type_group.name'] == 'ウェアリングオフの記録') &
    (fonlog_data['activity.started_at'] >= start_date) &
    (fonlog_data['activity.started_at'] < end_date_plus_two) &
    (fonlog_data['record_type.activity_type_id'] == activity_type_id)
]
#grouped_fonlog_data = filtered_fonlog_data.groupby(['activity_target.activity_id'])
selected_fonlog_data = filtered_fonlog_data[
    ['activity.started_at', 'activity.finished_at', 'activity_target.activity_id']
].to_numpy().astype(str)

# Extract wearing-off periods
#wearing_off_periods = grouped_fonlog_data[
#    ['activity.started_at', 'activity.finished_at']
#].agg(np.unique)
wearing_off_periods = pd.DataFrame(np.unique(selected_fonlog_data, axis=0),
                                   columns=["activity.started_at", "activity.finished_at", "activity_target.activity_id"]
                                   )
wearing_off_periods["activity.started_at"] = pd.to_datetime(wearing_off_periods["activity.started_at"])
wearing_off_periods["activity.finished_at"] = pd.to_datetime(wearing_off_periods["activity.finished_at"])
wearing_off_periods["activity_target.activity_id"] = wearing_off_periods["activity_target.activity_id"].astype(float)
wearing_off_periods = wearing_off_periods.set_index('activity_target.activity_id', drop=False)
#wearing_off_periods['activity_target.activity_id'] = wearing_off_periods.index
wearing_off_periods.rename(
    inplace=True,
    columns={
        "activity.started_at": "started_at",
        "activity.finished_at": "finished_at",
        "activity_target.activity_id": "wearing_off_id" }
)

# # Compute wearing-off duration
# wearing_off_periods['Duration'] = ( (
#     wearing_off_periods['finished_at'] - wearing_off_periods['started_at']
# ) / np.timedelta64(1, "s") ) / 60. # in minutes

# # Show wearing-off summary
# display(wearing_off_periods['Duration'].describe())
# display(wearing_off_periods)

# # Update finished_at when finished_at == started_at
def update_finished_at(row):
    if row["started_at"] == row["finished_at"]:
        return (row["finished_at"] + pd.Timedelta(minutes=15))
    else:
        return (row["finished_at"])

wearing_off_periods['finished_at'] = wearing_off_periods.apply(lambda row: update_finished_at(row), axis=1)

In [5]:
wearing_off_periods

Unnamed: 0_level_0,started_at,finished_at,wearing_off_id
activity_target.activity_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1814507.0,2025-02-01 06:58:00,2025-02-01 07:33:00,1814507.0
1818163.0,2025-02-02 06:54:48,2025-02-02 07:00:00,1818163.0
1823444.0,2025-02-03 10:30:47,2025-02-03 10:45:47,1823444.0
1826262.0,2025-02-03 21:14:56,2025-02-03 21:29:56,1826262.0
1828405.0,2025-02-04 10:38:07,2025-02-04 10:53:07,1828405.0
1832474.0,2025-02-05 06:53:53,2025-02-05 07:08:53,1832474.0
1835719.0,2025-02-05 20:17:13,2025-02-05 20:32:13,1835719.0
1837962.0,2025-02-06 10:08:00,2025-02-06 10:22:00,1837962.0
1839275.0,2025-02-06 15:00:22,2025-02-06 15:15:22,1839275.0
1840269.0,2025-02-06 18:42:49,2025-02-06 18:57:49,1840269.0


## Process symptoms dataset

In [6]:
# Pivot table to convert to columns of symptoms
symptoms_wearing_off = filtered_fonlog_data.pivot(
    index='activity_target.activity_id', columns='record_type.name', values='value'
).rename(columns=symptoms_dictionary).drop(columns='共有したい他の症状はありますか？')

In [7]:
# Merge symptoms to wearing-off periods
wearing_off_with_symptoms = pd.merge(
    wearing_off_periods,
    symptoms_wearing_off,
    left_index=True, right_index=True
)
display(wearing_off_with_symptoms)

wearing_off_with_symptoms = wearing_off_with_symptoms.fillna(0).replace({'ある':1, 'ない':0})
# # Show wearing-off symptoms
# display(wearing_off_with_symptoms.describe())
# display(wearing_off_with_symptoms.head())
# display(wearing_off_with_symptoms.tail())

Unnamed: 0_level_0,started_at,finished_at,wearing_off_id,Sharp pain or prolonged dull pain,Tremors,Anxiety or panic attacks,Rigidity of muscles,Slowing down of movement,Difficulty integrating thoughts or slowing down of thought,Impairment of complex movements of the hand and fingers,Change in mood or depression,Muscle spasm
activity_target.activity_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1814507.0,2025-02-01 06:58:00,2025-02-01 07:33:00,1814507.0,ない,ない,ある,ない,ない,ない,ない,ある,ない
1818163.0,2025-02-02 06:54:48,2025-02-02 07:00:00,1818163.0,ない,ない,ある,ない,ない,ない,ない,ない,ない
1823444.0,2025-02-03 10:30:47,2025-02-03 10:45:47,1823444.0,ない,ない,ある,ない,ない,ない,ない,ない,ない
1826262.0,2025-02-03 21:14:56,2025-02-03 21:29:56,1826262.0,ない,ない,ある,ない,ない,ない,ない,ない,ない
1828405.0,2025-02-04 10:38:07,2025-02-04 10:53:07,1828405.0,ない,ない,ある,ない,ない,ない,ない,ない,ない
1832474.0,2025-02-05 06:53:53,2025-02-05 07:08:53,1832474.0,ある,ない,ない,ない,ない,ない,ない,ない,ない
1835719.0,2025-02-05 20:17:13,2025-02-05 20:32:13,1835719.0,ない,ない,ある,ない,ない,ない,ない,ない,ない
1837962.0,2025-02-06 10:08:00,2025-02-06 10:22:00,1837962.0,ない,ない,ある,ない,ない,ない,ない,ない,ない
1839275.0,2025-02-06 15:00:22,2025-02-06 15:15:22,1839275.0,ない,ない,ある,ない,ない,ない,ない,ない,ない
1840269.0,2025-02-06 18:42:49,2025-02-06 18:57:49,1840269.0,ない,ない,ある,ない,ない,ない,ない,ない,ない


  wearing_off_with_symptoms = wearing_off_with_symptoms.fillna(0).replace({'ある':1, 'ない':0})


Remove overarching reporting when there are embedded reportings.

11:00 AM      **12:00 PM**        **12:30 PM**     1:00 PM  
    |-------------|----------------|-----------|


In [8]:
pysqldf = lambda q: sqldf(q, globals())
cond_join= '''
    select distinct
        wearing_off_2.[wearing_off_id] as for_remove_id
    from wearing_off_with_symptoms as wearing_off_1
    join wearing_off_with_symptoms as wearing_off_2
    on (wearing_off_2.[started_at] < wearing_off_1.[finished_at] AND wearing_off_2.[started_at] < wearing_off_1.[started_at]) AND
            (wearing_off_2.[finished_at] > wearing_off_1.[finished_at] AND wearing_off_2.[finished_at] > wearing_off_1.[started_at]) AND
            (wearing_off_1.[wearing_off_id] <> wearing_off_2.[wearing_off_id])
'''
for_remove_ids = pysqldf(cond_join)
# # Show id to remove
# display(for_remove_ids)

wearing_off_with_symptoms = wearing_off_with_symptoms[~wearing_off_with_symptoms['wearing_off_id'].isin(for_remove_ids['for_remove_id'])]

# # Show wearing-off symptoms
# display(wearing_off_with_symptoms.describe())
# display(wearing_off_with_symptoms.head())
# display(wearing_off_with_symptoms.tail())

wearing_off_with_symptoms['finished_at'] = wearing_off_with_symptoms.apply(lambda row: update_finished_at(row), axis=1)

In [9]:
wearing_off_with_symptoms.to_excel(f'./fonlog_preprocessed.xlsx', sheet_name='fonlog')