In [None]:
import os
import argparse
import torch
from sklearn.model_selection import train_test_split
from lib.utils import (
    load_config,
    get_experiment_dir,
    make_windowed_dataset_from_sessions,
    get_participant_id,
    get_participant_projects,
    get_raw_dataset_path,
    get_sessions_for_project,
    generate_dataset_summary,
    load_data_for_participant
)

import pandas as pd
import torch
import matplotlib.pyplot as plt
import plotly.express as px
from torch import nn
from torch.nn.functional import relu
from sklearn.metrics import f1_score
from lib.models import ObnoxiouslySimpleCNN

# ejaz
# asfik
# alsaad
# anam
# iftakhar
# mariah, tj, stephanie, ashlin
#  might be fine with window split
participant = 'ejaz'
labeling = f'andrew smoking labels'
window_size = 3000
window_stride = 3000
sensor_config = {'use_accelerometer': True, 'use_gyroscope': True}
model = 'ObnoxiouslySimpleCNN'

X_train, y_train, X_val, y_val, train_sessions, val_sessions = load_data_for_participant(participant, window_size, window_stride, labeling, sensor_config, split_across_windows=True)

Participant ID: 14
Projects for ashlin: ['ashlin_phase1', 'ashlin_phase2']
Processing project: ashlin_phase1
Processing project: ashlin_phase2
Splitting data across windows for cross-validation.
torch.Size([5067, 6, 3000]) torch.Size([5067, 1])
torch.Size([3379, 6, 3000]) torch.Size([3379, 1])


In [4]:
model = ObnoxiouslySimpleCNN(input_channels=6, channels=[64,64,64,64,128], kernel_sizes=[7,3,3,3,3], dilations=[1,2,4,8,16], dropout=0.5)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
criterion = nn.BCEWithLogitsLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', factor=0.5, patience=5)
trainloader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_train, y_train), batch_size=32, shuffle=True)
valloader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_val, y_val), batch_size=32, shuffle=False)

In [5]:
lossi = []
val_lossi = []
val_f1i = []
val_i = []

epoch = 0


model.train()

model.to('cuda')
criterion = criterion.to('cuda')

for _ in range(500):
    loss_epoch = 0
    for Xi,yi in trainloader:
        Xi = Xi.to('cuda')
        yi = yi.to('cuda')
        logits = model(Xi)
        loss = criterion(logits, yi)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_epoch += loss.item()
    loss_epoch /= len(trainloader)
    lossi.append(loss_epoch)

    model.eval()
    val_loss_total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for Xi, yi in valloader:
            Xi = Xi.to('cuda')
            yi = yi.to('cuda')
            logits = model(Xi)
            loss = criterion(logits, yi)
            val_loss_total += loss.item()

            all_preds.append(torch.sigmoid(logits).round().cpu())
            all_labels.append(yi.cpu())

    val_loss = val_loss_total / len(valloader)
    val_f1 = f1_score(
    torch.cat(all_labels).numpy(),
    torch.cat(all_preds).numpy(),
    average='macro'
    )

    val_lossi.append(val_loss)
    val_i.append(epoch)
    val_f1i.append(val_f1)

    fig,ax= plt.subplots(nrows=2,ncols=1,figsize=(7.2,10))
    ax[0].plot(lossi)
    ax[0].plot(val_i, val_lossi, color='red')
    ax[0].set_yscale('log')

    ax[1].plot(val_i, val_f1i, color='green')

    plt.savefig(f'loss.png')
    plt.close()
        
    print(f'Epoch {epoch}: train loss {loss_epoch:.4f}, val loss {val_loss:.4f}, val f1 {val_f1:.4f}, lr {optimizer.param_groups[0]["lr"]:.6f}')
    model.train()
    epoch += 1

Epoch 0: train loss 0.1684, val loss 0.1130, val f1 0.4934, lr 0.000300
Epoch 1: train loss 0.1134, val loss 0.1124, val f1 0.4934, lr 0.000300
Epoch 2: train loss 0.1128, val loss 0.1027, val f1 0.4934, lr 0.000300
Epoch 3: train loss 0.1064, val loss 0.1058, val f1 0.4934, lr 0.000300
Epoch 4: train loss 0.1056, val loss 0.1029, val f1 0.4934, lr 0.000300
Epoch 5: train loss 0.1064, val loss 0.1029, val f1 0.4934, lr 0.000300
Epoch 6: train loss 0.1041, val loss 0.1034, val f1 0.4934, lr 0.000300
Epoch 7: train loss 0.1034, val loss 0.0986, val f1 0.4934, lr 0.000300
Epoch 8: train loss 0.1013, val loss 0.1004, val f1 0.4934, lr 0.000300
Epoch 9: train loss 0.0998, val loss 0.1061, val f1 0.4934, lr 0.000300
Epoch 10: train loss 0.0984, val loss 0.0971, val f1 0.5158, lr 0.000300
Epoch 11: train loss 0.1013, val loss 0.0970, val f1 0.5569, lr 0.000300
Epoch 12: train loss 0.0978, val loss 0.0954, val f1 0.5569, lr 0.000300
Epoch 13: train loss 0.0960, val loss 0.0989, val f1 0.5475, 

In [None]:
labeling = f'andrew smoking labels'
window_stride_eval = 3000
sensor_config = {'use_accelerometer': True, 'use_gyroscope': True}
from lib.utils import load_data, resample

for index,session in enumerate(val_sessions):
    session_name = session['session_name']
    raw_dataset_path = session['raw_dataset_path']
    start_ns = session.get('start_ns')
    stop_ns = session.get('stop_ns')

    X = []
    y = []

    sensor_columns = []
    if sensor_config.get('use_accelerometer', True):
        sensor_columns.extend(['accel_x', 'accel_y', 'accel_z'])
    if sensor_config.get('use_gyroscope', False):
        sensor_columns.extend(['gyro_x', 'gyro_y', 'gyro_z'])

    bouts = [b for b in session['bouts'] if b['label'] == labeling]
    df = load_data(raw_dataset_path, session_name, sensor_config, start_ns, stop_ns)
    df = resample(df)
    df['label'] = 0

    for bout in bouts:
        start = bout['start']
        end = bout['end']
        df.loc[(df['ns_since_reboot'] >= start) & (df['ns_since_reboot'] <= end), 'label'] = 1

    if 'accel_x' not in df.columns and sensor_config.get('use_accelerometer', True):
        if 'x' in df.columns:
            df.rename(columns={'x': 'accel_x', 'y': 'accel_y', 'z': 'accel_z'}, inplace=True)
    data_columns = sensor_columns + ['label']
    missing_columns = [col for col in data_columns if col not in df.columns]
    if missing_columns:
        print(f"Warning: Missing columns {missing_columns} in session {session_name}")
        raise ValueError(f"Missing columns {missing_columns} in session {session_name}")

    data = torch.tensor(df[data_columns].values, dtype=torch.float32)
    if data.shape[0] < window_size:
        padding_length = window_size - data.shape[0]
        padding = torch.zeros((padding_length, data.shape[1]), dtype=torch.float32)
        data = torch.cat([data, padding], dim=0)
        print(f"Zero-padded session {session_name} from {data.shape[0] - padding_length} to {data.shape[0]} samples")

    windowed_data = data.unfold(dimension=0,size=window_size,step=window_stride_eval)
    X.append(windowed_data[:,:-1,:])
    y.append(windowed_data[:,-1,:])

    X = torch.cat(X)
    y = (~(torch.cat(y) == 0).all(axis=1)).float()

    model.eval()

    with torch.no_grad():
        logits = model(X.to('cuda'))
        predictions = torch.sigmoid(logits).cpu().numpy()

    num_windows = predictions.shape[0]  # 907

    # Initialize output
    time_domain_length = (num_windows - 1) * window_stride_eval + window_size
    time_domain_preds = torch.zeros(time_domain_length)
    overlap_counts = torch.zeros(time_domain_length)

    # Accumulate predictions
    for i, pred in enumerate(predictions):
        start_idx = i * window_stride_eval
        end_idx = start_idx + window_size
        time_domain_preds[start_idx:end_idx] += pred.item()
        overlap_counts[start_idx:end_idx] += 1

    # Average where overlapping
    time_domain_preds = time_domain_preds / overlap_counts

    df = df.iloc[:len(time_domain_preds)]
    df['logits'] = time_domain_preds
    df['y_pred'] = (time_domain_preds > .5).int()

    from sklearn.metrics import ConfusionMatrixDisplay, classification_report
    y_true = df['label'].values
    y_pred = df['y_pred'].astype(int).values
    # ConfusionMatrixDisplay.from_predictions(y_true, y_pred)
    # print(classification_report(y_true, y_pred))
    print(index,session_name,f1_score(y_true, y_pred, average='macro'))

In [None]:
# Visualize one of x_train sessions in time domain with model predictions
session = val_sessions[0]
window_size = 3000
window_stride_eval = 3000
sensor_config = {'use_accelerometer': True, 'use_gyroscope': True}

session_name = session['session_name']
raw_dataset_path = session['raw_dataset_path']
start_ns = session.get('start_ns')
stop_ns = session.get('stop_ns')


X = []
y = []

# Determine which columns to use based on sensor config
sensor_columns = []
if sensor_config.get('use_accelerometer', True):
    sensor_columns.extend(['accel_x', 'accel_y', 'accel_z'])
if sensor_config.get('use_gyroscope', False):
    sensor_columns.extend(['gyro_x', 'gyro_y', 'gyro_z'])

bouts = [b for b in session['bouts'] if b['label'] == labeling]

from lib.utils import load_data, resample

df = load_data(raw_dataset_path, session_name, sensor_config, start_ns, stop_ns)
df = resample(df)
df['label'] = 0

for bout in bouts:
    start = bout['start']
    end = bout['end']
    df.loc[(df['ns_since_reboot'] >= start) & (df['ns_since_reboot'] <= end), 'label'] = 10

if 'accel_x' not in df.columns and sensor_config.get('use_accelerometer', True):
    if 'x' in df.columns:
        df.rename(columns={'x': 'accel_x', 'y': 'accel_y', 'z': 'accel_z'}, inplace=True)
data_columns = sensor_columns + ['label']
missing_columns = [col for col in data_columns if col not in df.columns]
if missing_columns:
    print(f"Warning: Missing columns {missing_columns} in session {session_name}")
    raise ValueError(f"Missing columns {missing_columns} in session {session_name}")

data = torch.tensor(df[data_columns].values, dtype=torch.float32)

if data.shape[0] < window_size:
    # Zero pad the data to window size
    padding_length = window_size - data.shape[0]
    padding = torch.zeros((padding_length, data.shape[1]), dtype=torch.float32)
    data = torch.cat([data, padding], dim=0)
    print(f"Zero-padded session {session_name} from {data.shape[0] - padding_length} to {data.shape[0]} samples")

windowed_data = data.unfold(dimension=0,size=window_size,step=window_stride_eval)
X.append(windowed_data[:,:-1,:])
y.append(windowed_data[:,-1,:])

X = torch.cat(X)
y = (~(torch.cat(y) == 0).all(axis=1)).float()

model.eval()

with torch.no_grad():
    logits = model(X.to('cuda'))
    predictions = torch.sigmoid(logits).cpu().numpy()

num_windows = predictions.shape[0]  # 907
time_domain_length = (num_windows - 1) * window_stride_eval + window_size  # 909000
time_domain_preds = torch.zeros(time_domain_length)
overlap_counts = torch.zeros(time_domain_length)
for i, pred in enumerate(predictions):
    start_idx = i * window_stride_eval
    end_idx = start_idx + window_size
    time_domain_preds[start_idx:end_idx] += pred.item()
    overlap_counts[start_idx:end_idx] += 1
time_domain_preds = time_domain_preds / overlap_counts
df = df.iloc[:len(time_domain_preds)]
df['logits'] = time_domain_preds*10
df['y_pred'] = (time_domain_preds > .5).int()*10
from sklearn.metrics import ConfusionMatrixDisplay, classification_report
y_true = df['label'].values
y_pred = df['y_pred'].astype(int).values
ConfusionMatrixDisplay.from_predictions(y_true, y_pred)
print(classification_report(y_true, y_pred))
import plotly.express as px
fig = px.line(df.iloc[::30], x='ns_since_reboot', y=['accel_x','accel_y','accel_z','label','y_pred','logits'], title=f'Session {session_name} Smoking Prediction')
fig.show(renderer='browser')
session['raw_dataset_path']