In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset
import os
from sys import exit as e
from datetime import datetime
from datetime import timedelta
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.model_selection import GroupShuffleSplit
import math
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score
import torch.optim as optim
import torch.cuda.amp
import warnings


In [None]:
# Define the device: use CUDA if available, otherwise fall back to CPU
if torch.cuda.is_available():
    DEVICE = torch.device("cuda")
    print(f"PyTorch tasks will run on: {torch.cuda.get_device_name(0)}")
else:
    DEVICE = torch.device("cpu")
    print("PyTorch tasks will run on: CPU (GPU not available or detected)")

PyTorch tasks will run on: NVIDIA GeForce RTX 3090


In [None]:
# Configurations

BATCH_SIZE = 16
MAX_CONTEXT_LENGTH = 7  # look back these many days of fitbit HR for context

In [None]:
# loading the data

df_hr = pd.read_csv('../data/Combined_Fitbit/Filtered_Fitbit_HR.csv')
df_ema = pd.read_csv('../data/EMA/Filtered_EMA_Q1_Q4.csv')


In [None]:
# creating a dict for easy lookup: {(participant_id, date): hr_data_array}
hr_data_dict = {}
for index, row in df_hr.iterrows():
    participant_id = row['subject']
    date, time = pd.to_datetime(row['datetime']).date(), pd.to_datetime(row['datetime']).time()

    hr_value = row['value']
    key = (participant_id, date)
    if key not in hr_data_dict:
        hr_data_dict[key] = np.zeros((1440,)) 
    
    minute_of_day = time.hour * 60 + time.minute
    hr_data_dict[key][minute_of_day] = hr_value



In [None]:
hr_data_dict2 = hr_data_dict.copy()

In [None]:
hr_data_dict = hr_data_dict2.copy()

# Per user normalization

per_user_data = df_hr.groupby('subject')

# compute per user mean and std dev
per_sub_mean = per_user_data['value'].mean()
per_sub_std = per_user_data['value'].std()

# normalize
for sub, sub_mean in per_sub_mean.items():

    sub_std = per_sub_std[sub]

    for key in hr_data_dict:
        if sub in key:
            # only normalize if non-zero
            hr_data_dict[key] = np.where(hr_data_dict[key] != 0, 
                                        (hr_data_dict[key] - sub_mean) / sub_std, 
                                        0)


In [None]:
# demo usage

participant_id = 'CR001'
date = pd.to_datetime('2021-07-28').date()

# Get the array for that participant and date
hr_array = hr_data_dict[(participant_id, date)]
print(hr_array)  # This will print the HR data array for the specified participant and date

[-0.45133713 -0.54642306 -0.64150898 ... -0.16607936 -0.35625121
 -0.35625121]


In [None]:
# X: (BATCH_SIZE, MAX_CONTEXT_LENGTH, 1440, 1)
# Y: (N, 4) where N is total number of EMA samples



X_list, Y_list, sub_list = [], [], []

for idx, row in df_ema.iterrows():
    participant = row['User_ID']
    target_date = pd.to_datetime(row['datetime']).date()

    # Create an empty block for this person: Shape (7, 1440, 1)
    sample_block = np.zeros((MAX_CONTEXT_LENGTH, 1440, 1))

    for i in range(MAX_CONTEXT_LENGTH):
        look_back_date = target_date - timedelta(days=(i+1))

        if (participant, look_back_date) in hr_data_dict:
            sample_block[i, :, 0] = hr_data_dict[(participant, look_back_date)]


        X_list.append(sample_block)
        Y_list.append([row['Q1'], row['Q2'], row['Q3'], row['Q4']])
        sub_list.append(participant)


In [None]:
X_final = np.stack(X_list)
Y_final = np.array(Y_list)

X_final = X_final.astype(np.float32)
Y_final = Y_final.astype(np.float32)

In [None]:
class FitbitEMADataset(Dataset):
    def __init__(self, x_data, y_data):
        # Store the data as Tensors
        self.x_data = torch.from_numpy(x_data)
        self.y_data = torch.from_numpy(y_data)
        
    def __len__(self):
        # Tells the loader how many samples total
        return len(self.y_data)
    
    def __getitem__(self, idx):
        # 1. Grab the feature and label
        x = self.x_data[idx]  # Shape: (7, 1440, 1)
        y = self.y_data[idx]  # Shape: (4,)
        
        # 2. Create the Masks dynamically
        # -------------------------------
        
        # Mask A: The Day Mask (Shape: 7)
        # If the whole day is 0s, it's a padding day.
        # We check if the sum of absolute values in a day is 0.
        # (This works because you normalized data, so an actual full day of 0.0 is statistically impossible)
        day_sum = torch.sum(torch.abs(x), dim=(1, 2)) # Sum across 1440 mins and 1 channel
        day_mask = (day_sum == 0) # True if day is empty, False if data exists
        
        # Mask B: The Minute Mask (Shape: 7, 1440)
        # We need to flatten the last dim first -> (7, 1440)
        # True if value is 0 (missing), False if real
        min_mask = (x.squeeze(-1) == 0)
        
        return {
            'x': x,
            'y': y,
            'day_mask': day_mask,
            'min_mask': min_mask
        }

In [None]:
# 1. Instantiate the Dataset
dataset = FitbitEMADataset(X_final, Y_final)

# 2. Create the Loader
train_loader = DataLoader(
    dataset,
    batch_size=32,      # The B in your dimensions
    shuffle=True,       # Shuffle only for Training!
    num_workers=0       # Set to 2 or 4 if using Linux/Colab
)

# --- Test it out ---
# Let's grab one batch to verify shapes
batch = next(iter(train_loader))

print("Input Batch Shape:", batch['x'].shape)          # Should be [32, 7, 1440, 1]
print("Label Batch Shape:", batch['y'].shape)          # Should be [32, 4]
print("Day Mask Shape:   ", batch['day_mask'].shape)   # Should be [32, 7]
print("Min Mask Shape:   ", batch['min_mask'].shape)   # Should be [32, 7, 1440]

Input Batch Shape: torch.Size([32, 7, 1440, 1])
Label Batch Shape: torch.Size([32, 4])
Day Mask Shape:    torch.Size([32, 7])
Min Mask Shape:    torch.Size([32, 7, 1440])


In [None]:

# 1. Setup your lists (assuming you have these)
# X_list: List of (7, 1440, 1) arrays
# Y_list: List of (4,) arrays
# groups: List of user_ids corresponding to each sample (e.g. ['P1', 'P1', 'P2'...])

X_all = np.stack(X_list).astype(np.float32)
Y_all = np.array(Y_list).astype(np.float32)
groups = np.array(sub_list) 

# 2. Perform the Split
# We use GroupShuffleSplit to ensure users don't leak across sets
splitter = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, test_idx = next(splitter.split(X_all, Y_all, groups))

# 3. Create the final arrays
X_train, X_test = X_all[train_idx], X_all[test_idx]
Y_train, Y_test = Y_all[train_idx], Y_all[test_idx]

print(f"Training Samples: {len(X_train)}")
print(f"Testing Samples:  {len(X_test)}")
print(f"Unique Users in Train: {len(np.unique(groups[train_idx]))}")
print(f"Unique Users in Test:  {len(np.unique(groups[test_idx]))}")

# 4. Create your Datasets/Loaders (using the class we defined earlier)
train_dataset = FitbitEMADataset(X_train, Y_train)
test_dataset  = FitbitEMADataset(X_test, Y_test)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader  = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

Training Samples: 9058
Testing Samples:  2163
Unique Users in Train: 18
Unique Users in Test:  5


In [None]:


# ==========================================
# 1. THE POSITION "BADGE" FACTORY
# ==========================================
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        # Create a matrix of [max_len, d_model] to hold the badges
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        
        # Calculate the "speed" of the clock hands
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        
        # Fill even columns with Sin, odd columns with Cos
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        
        # Add batch dimension: (1, max_len, d_model)
        pe = pe.unsqueeze(0)
        
        # Register as part of model state, but not learnable
        self.register_buffer('pe', pe)

    def forward(self, x):
        # x shape: (Batch, Seq_Len, Dim)
        # Slice the 'pe' matrix to match the length of x and add it
        return x + self.pe[:, :x.size(1)]

# ==========================================
# 2. THE MAIN HIERARCHICAL MODEL
# ==========================================
class HierarchicalPAT(nn.Module):
    def __init__(self):
        super().__init__()
        
        # --- Config ---
        self.d_model = 64
        self.nhead = 4
        self.dim_feedforward = 128
        
        # --- LEVEL 1: MINUTE ENCODER (PAT) ---
        self.input_proj = nn.Linear(1, self.d_model) # 1 HR val -> 64 features
        self.min_pos_encoder = PositionalEncoding(self.d_model, max_len=1441)
        self.min_cls_token = nn.Parameter(torch.randn(1, 1, self.d_model)) # The Day Secretary
        
        min_layer = nn.TransformerEncoderLayer(
            d_model=self.d_model, 
            nhead=self.nhead, 
            dim_feedforward=self.dim_feedforward, 
            batch_first=True
        )
        self.min_transformer = nn.TransformerEncoder(min_layer, num_layers=2)

        # --- LEVEL 2: DAY ENCODER (History Aggregator) ---
        self.day_pos_encoder = PositionalEncoding(self.d_model, max_len=8)
        self.day_cls_token = nn.Parameter(torch.randn(1, 1, self.d_model)) # The Week Secretary
        
        day_layer = nn.TransformerEncoderLayer(
            d_model=self.d_model, nhead=self.nhead, 
            dim_feedforward=self.dim_feedforward, batch_first=True
        )
        self.day_transformer = nn.TransformerEncoder(day_layer, num_layers=2)

        # --- LEVEL 3: HEAD ---
        self.head = nn.Linear(self.d_model, 4) # Predict Q1, Q2, Q3, Q4

    def forward(self, x, min_mask, day_mask):
        """
        x: (Batch, 7, 1440, 1)
        min_mask: (Batch, 7, 1440)  -> True where minute is missing
        day_mask: (Batch, 7)        -> True where day is missing
        """
        batch_size, num_days, num_mins, _ = x.shape
        
        # ---------------------------------------------------------
        # STAGE 1: Process Minutes (Flatten Batch & Days)
        # ---------------------------------------------------------
        x_flat = x.view(batch_size * num_days, num_mins, 1)         # (224, 1440, 1)
        min_mask_flat = min_mask.view(batch_size * num_days, num_mins)
        
        # Project & Add CLS
        x_emb = self.input_proj(x_flat)                             # (224, 1440, 64)
        cls_tokens = self.min_cls_token.expand(batch_size * num_days, -1, -1)
        x_emb = torch.cat((cls_tokens, x_emb), dim=1)               # (224, 1441, 64)
        
        # Add Time Badges (Minute 0 vs Minute 500)
        x_emb = self.min_pos_encoder(x_emb)
        
        # Update Mask for CLS (Always keep Index 0)
        cls_mask_col = torch.zeros((batch_size * num_days, 1), dtype=torch.bool, device=x.device)
        min_mask_with_cls = torch.cat((cls_mask_col, min_mask_flat), dim=1)
        
        # Run Minute Transformer
        day_embeddings = self.min_transformer(x_emb, src_key_padding_mask=min_mask_with_cls)
        
        # Extract Summary (Index 0)
        day_vectors = day_embeddings[:, 0, :]                       # (224, 64)
        
        # ---------------------------------------------------------
        # STAGE 2: Process Days (Un-flatten & Aggregate)
        # ---------------------------------------------------------
        day_vectors = day_vectors.view(batch_size, num_days, self.d_model) # (32, 7, 64)
        
        # Add CLS Token for the Week
        cls_tokens_day = self.day_cls_token.expand(batch_size, -1, -1)
        day_seq = torch.cat((cls_tokens_day, day_vectors), dim=1)   # (32, 8, 64)
        
        # Add Recency Badges (Yesterday vs Last Week)
        day_seq = self.day_pos_encoder(day_seq)
        
        # Update Mask for CLS (Always keep Index 0)
        cls_day_mask = torch.zeros((batch_size, 1), dtype=torch.bool, device=x.device)
        day_mask_with_cls = torch.cat((cls_day_mask, day_mask), dim=1)
        
        # Run Day Transformer
        final_seq = self.day_transformer(day_seq, src_key_padding_mask=day_mask_with_cls)
        
        # Extract Final Summary (Index 0)
        final_vector = final_seq[:, 0, :]                           # (32, 64)
        
        # ---------------------------------------------------------
        # STAGE 3: Prediction
        # ---------------------------------------------------------
        logits = self.head(final_vector)                            # (32, 4)
        return logits



In [None]:
# ==========================================
# 4. QUICK TEST RUN
# ==========================================

# Create Dummy Data (Batch of 32, 7 days, 1440 mins)
dummy_x = np.random.randn(32, 7, 1440, 1).astype(np.float32)
# Simulate missing data (set some zeros)
dummy_x[:, 3:, :, :] = 0  # Last 4 days are missing for everyone
dummy_y = np.random.randint(0, 2, (32, 4)).astype(np.float32)

# Init Dataset & Loader
dataset = FitbitEMADataset(dummy_x, dummy_y)
loader = DataLoader(dataset, batch_size=32)
batch = next(iter(loader))

# Init Model
model = HierarchicalPAT()

# Run Forward Pass
logits = model(batch['x'], batch['min_mask'], batch['day_mask'])

print("Input Shape: ", batch['x'].shape)     # [32, 7, 1440, 1]
print("Output Shape:", logits.shape)         # [32, 4]
print("Success! The pipes are connected.")

Input Shape:  torch.Size([32, 7, 1440, 1])
Output Shape: torch.Size([32, 4])
Success! The pipes are connected.
