In [None]:
from sshtunnel import SSHTunnelForwarder
import psycopg2
import os
import pandas as pd
import numpy as np

# SSH connection details.
ssh_host = 'loki.research.cs.dal.ca'
ssh_port = 22
ssh_username = os.getenv('LOKI_USERNAME')
ssh_password = os.getenv('LOKI_PASSWORD')

# Database connection details.
db_host = '127.0.0.1'
db_port = 5432
db_name = 'staging_db'
db_user = os.getenv('STAGING_DB_USERNAME')
db_password = os.getenv('STAGING_DB_PASSWORD')

# Establish SSH tunnel and connect to PostgreSQL.
try:
    with SSHTunnelForwarder(
        (ssh_host, ssh_port),
        ssh_username=ssh_username,
        ssh_password=ssh_password,
        remote_bind_address=('127.0.0.1', db_port)  # Forwarding PostgreSQL port.
    ) as tunnel:
    
        # Connect to PostgreSQL database through the SSH tunnel.
        conn = psycopg2.connect(
            host=db_host,
            port=tunnel.local_bind_port,  # use the local port set by the tunnel.
            dbname=db_name,
            user=db_user,
            password=db_password
        )
        
        print("Database connection established")
        lock_state_query = "SELECT * FROM study_prositvd.lock_state;"
        accelerometer_query = "SELECT * FROM study_prositvd.accelerometer;"
        df_lock_state = pd.read_sql_query(lock_state_query, conn)
        df_accelerometer = pd.read_sql_query(accelerometer_query, conn)
        
        # Close database connection.
        conn.close()

except Exception as e:
    print(f"An error occurred: {e}")

In [None]:
# Raw data.
df_lock_state.head(10)

In [None]:
df_accelerometer.head(10)

## Screen time from Lock state data.

In [None]:
df_lock_state['measuredat'] = pd.to_datetime(df_lock_state['measuredat'])

# Define the logical time window.
window_start_time = pd.to_datetime('22:00:00').time()  # 10 PM
window_end_time = pd.to_datetime('07:00:00').time()    # 7 AM

# Function to assign logical day based on the window (because window can span over two calendar days).
def assign_logical_day(row):
    measured_time = row['measuredat'].time()
    
    if measured_time >= window_start_time:
        return row['measuredat'].date()  # Assign to the current day.
    else:
        return (row['measuredat'] - pd.Timedelta(days=1)).date()  # Assign to the previous day.

# Assign logical day to the DataFrame.
df_lock_state['logical_day'] = df_lock_state.apply(assign_logical_day, axis=1)

# Function to calculate screen times and metrics within logical windows.
def calculate_metrics(group):
    locked_time = pd.Timedelta(0)
    unlocked_time = pd.Timedelta(0)
    unlock_durations = []
    total_unlocks = 0

    previous_state = None
    previous_time = None
    
    # Set the logical start and end time boundaries based on the logical day.
    logical_start = pd.Timestamp.combine(group['logical_day'].iloc[0], window_start_time)
    logical_end = pd.Timestamp.combine(group['logical_day'].iloc[0] + pd.Timedelta(days=1), window_end_time)

    for _, row in group.iterrows():
        current_state = row['value0']
        current_time = row['measuredat']
        
        # Adjust previous time if crossing into the time window.
        if previous_time and previous_time < logical_start < current_time:
            previous_time = logical_start
        
        # Adjust current time if crossing the window end.
        if current_time > logical_end:
            current_time = logical_end

        # Valid transition: LOCKED -> UNLOCKED
        if previous_state == 'LOCKED' and current_state == 'UNLOCKED':
            if previous_time >= logical_start and current_time <= logical_end:
                locked_time += current_time - previous_time
        
        # Valid transition: UNLOCKED -> LOCKED
        elif previous_state == 'UNLOCKED' and current_state == 'LOCKED':
            if previous_time >= logical_start and current_time <= logical_end:
                duration = current_time - previous_time
                unlocked_time += duration
                unlock_durations.append(duration.total_seconds() / 60)  # Convert to minutes.
                total_unlocks += 1

        # Update the previous state and time.
        previous_state = current_state
        previous_time = current_time

    # Calculate statistics.
    mean_unlock = np.mean(unlock_durations) if unlock_durations else 0
    median_unlock = np.median(unlock_durations) if unlock_durations else 0
    max_unlock = max(unlock_durations) if unlock_durations else 0

    return pd.Series({
        'total_screen_locked_time': locked_time.total_seconds() / 60,
        'total_screen_unlocked_time': unlocked_time.total_seconds() / 60,
        'total_number_of_unlocks': total_unlocks,
        'mean_unlocked_duration': mean_unlock,
        'median_unlocked_duration': median_unlock,
        'max_unlocked_duration': max_unlock
    })

# Group by participant id and logical day, and apply the metric calculations.
result_df_lock_state = (
    df_lock_state.groupby(['participantid', 'logical_day'])
    .apply(calculate_metrics)
    .reset_index()
)

In [None]:
result_df_lock_state.head(10)

## Screen time from Accelerometer data

In [None]:
df_accelerometer['measuredat'] = pd.to_datetime(df_accelerometer['measuredat'])

# Convert the accelerometer values to numeric (float)
df_accelerometer['value0'] = pd.to_numeric(df_accelerometer['value0'], errors='coerce')
df_accelerometer['value1'] = pd.to_numeric(df_accelerometer['value1'], errors='coerce')
df_accelerometer['value2'] = pd.to_numeric(df_accelerometer['value2'], errors='coerce')

# Calculate movement magnitude (RMS) from accelerometer values
df_accelerometer['movement_magnitude'] = np.sqrt(df_accelerometer['value0']**2 + df_accelerometer['value1']**2 + df_accelerometer['value2']**2)

In [None]:
# Define the logical time window
window_start_time = pd.to_datetime('22:00:00').time()  # 10 PM
window_end_time = pd.to_datetime('07:00:00').time()    # 7 AM

In [None]:
# Assign logical day based on the time window.
def assign_logical_day(row):
    measured_time = row['measuredat'].time()
    
    if measured_time >= window_start_time:
        return row['measuredat'].date()  # Assign to the current day
    else:
        return (row['measuredat'] - pd.Timedelta(days=1)).date()  # Assign to the previous day

df_accelerometer['logical_day'] = df_accelerometer.apply(assign_logical_day, axis=1)

In [None]:
# Calculate the median movement magnitude for each participant and logical day.
df_accelerometer['daily_median'] = df_accelerometer.groupby(['participantid', 'logical_day'])['movement_magnitude'].transform('median')

In [None]:
# Create the 'is_asleep' column based on whether movement_magnitude is below the daily median
df_accelerometer['is_asleep'] = df_accelerometer['movement_magnitude'] < df_accelerometer['daily_median']

In [None]:
# Function to calculate sleep times and other metrics, similar to how we handled the lock_state data.
def calculate_sleep_metrics(group):
    sleep_time = pd.Timedelta(0)
    wake_time = pd.Timedelta(0)
    sleep_durations = []
    total_sleep_segments = 0

    previous_state = None
    previous_time = None
    
    # Set the logical start and end time boundaries based on the logical day
    logical_start = pd.Timestamp.combine(group['logical_day'].iloc[0], window_start_time)
    logical_end = pd.Timestamp.combine(group['logical_day'].iloc[0] + pd.Timedelta(days=1), window_end_time)

    for _, row in group.iterrows():
        current_state = row['is_asleep']
        current_time = row['measuredat']
        
        # Adjust previous time if crossing into the time window.
        if previous_time and previous_time < logical_start < current_time:
            previous_time = logical_start
        
        # Adjust current time if crossing the window end.
        if current_time > logical_end:
            current_time = logical_end

        # Transition: Awake -> Asleep
        if previous_state == False and current_state == True:
            if previous_time >= logical_start and current_time <= logical_end:
                wake_time += current_time - previous_time
        
        # Transition: Asleep -> Awake
        elif previous_state == True and current_state == False:
            if previous_time >= logical_start and current_time <= logical_end:
                duration = current_time - previous_time
                sleep_time += duration
                sleep_durations.append(duration.total_seconds() / 60)  # Convert to minutes.
                total_sleep_segments += 1

        # Update the previous state and time.
        previous_state = current_state
        previous_time = current_time

    # Calculate statistics.
    mean_sleep_duration = np.mean(sleep_durations) if sleep_durations else 0
    median_sleep_duration = np.median(sleep_durations) if sleep_durations else 0
    max_sleep_duration = max(sleep_durations) if sleep_durations else 0

    return pd.Series({
        'total_sleep_time': sleep_time.total_seconds() / 60,
        'total_wake_time': wake_time.total_seconds() / 60,
        'total_sleep_segments': total_sleep_segments,
        'mean_sleep_duration': mean_sleep_duration,
        'median_sleep_duration': median_sleep_duration,
        'max_sleep_duration': max_sleep_duration
    })

# Group by participant id and logical day, and apply the metric calculations.
result_df_accel = (
    df_accelerometer.groupby(['participantid', 'logical_day'])
    .apply(calculate_sleep_metrics)
    .reset_index()
)

In [None]:
result_df_accel.head(10)

In [None]:
result_df_lock_state.head(10)