In [1]:
import numpy as np
import random
import time
import sys
from functools import partial
import torch
import torch.nn as nn
from torchvision.datasets import DatasetFolder
from torch.utils.data import DataLoader, WeightedRandomSampler
from torch.utils.tensorboard.writer import SummaryWriter
from tqdm.notebook import tqdm
import pandas as pd

# from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.utils.class_weight import compute_class_weight

# from sklearn.model_selection import LeaveOneGroupOut, cross_validate

#### PYTORCH CONFIGURATION SETTINGS ######
if not torch.backends.mps.is_available():
    if not torch.backends.mps.is_built():
        print(
            "MPS not available because the current PyTorch install was not "
            "built with MPS enabled."
        )
    else:
        print(
            "MPS not available because the current MacOS version is not 12.3+ "
            "and/or you do not have an MPS-enabled device on this machine."
        )
device = torch.device("mps") if torch.has_mps else torch.device("cpu")
print(f"Using {device.type} device ")

### DATA LOCATION ###
data_dir = "/Users/jrudoler/data/small_scalp_features/"


Using mps device 


## Test MPS speed

In [2]:
import time
import torch
import torchvision


def test_speed(device):
    print(f"Running on {device}...")

    model = torchvision.models.resnet18(pretrained=True).to(device)
    model.eval()

    input_data = torch.randn(64, 3, 224, 224).to(device)

    start_time = time.time()

    with torch.no_grad():
        for _ in range(100):
            _ = model(input_data)

    end_time = time.time()
    elapsed_time = end_time - start_time

    print(f"Elapsed time: {elapsed_time:.2f} seconds")


In [3]:
test_speed(device)


Running on mps...




Elapsed time: 5.52 seconds


In [4]:
test_speed(torch.device("cpu"))


Running on cpu...
Elapsed time: 100.65 seconds


## Define helper functions

In [7]:
def set_seed(seed=None, seed_torch=True):
    """
    Function that controls randomness. NumPy and random modules must be imported.

    Args:
      seed : Integer
        A non-negative integer that defines the random state. Default is `None`.
      seed_torch : Boolean
        If `True` sets the random seed for pytorch tensors, so pytorch module
        must be imported. Default is `True`.

    Returns:
      Nothing.
    """
    if seed is None:
        seed = np.random.choice(2**32)
    random.seed(seed)
    np.random.seed(seed)
    if seed_torch:
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

    print(f"Random seed {seed} has been set.")


class SimpleDataset(torch.utils.data.Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


class LogisticRegressionTorch(torch.nn.Module):
    def __init__(self, input_dim, output_dim=1):
        super().__init__()
        self.logistic = torch.nn.Sequential(
            torch.nn.Linear(input_dim, output_dim, bias=True), torch.nn.Sigmoid()
        )

    def forward(self, x):
        # logits = torch.sigmoid(self.linear(x))
        probs = self.logistic(x)
        return probs


def train_loop(
    dataloader,
    model,
    loss_fn,
    optimizer,
    profiler=None,
    writer=None,
    log_num=4,
    global_step=0,
):
    size = len(dataloader.dataset)
    running_loss = 0.0
    for batch, (X, y) in enumerate(dataloader):
        X = X.float().to(device)
        y = y.float().to(device)
        # Compute prediction and loss
        pred = torch.squeeze(model(X))
        loss = loss_fn(pred, y)
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # Profile code according to schedule
        if profiler:
            if profiler.schedule(batch).value > 0:
                profiler.step()
            else:
                profiler.stop()
        running_loss += loss.item()
        if batch % log_num == (log_num - 1):
            avg_loss = running_loss / log_num
            running_loss = 0.0
            pred_eval, y_eval = pred.detach().cpu().numpy(), y.detach().cpu().numpy()
            train_auc = roc_auc_score(y_true=y_eval, y_score=pred_eval)
            if writer:
                # Log the training loss and performance
                writer.add_scalar("Loss/training", avg_loss, global_step)
                writer.add_scalar("AUC/training", train_auc, global_step)
                # Update the global step
                global_step += 1
                writer.flush()
            else:
                print(
                    f"loss: {avg_loss:>7f}  [{batch * len(X):>5d}/{size:>5d}] "
                    + f"\tAUC:{train_auc}"
                )
    return global_step


# def test_loop(dataloader, model, loss_fn):
#     size = len(dataloader.dataset)
#     num_batches = len(dataloader)
#     test_loss, correct = 0, 0

#     # we don't want to track gradients here because we're just doing
#     # a forward pass to evaluate predictions
#     with torch.no_grad():
#         for X, y in dataloader:
#             pred = torch.squeeze(model(X))
#             test_loss += loss_fn(pred, y).item()
#             # round predicted probs to get label prediction, compute n correct
#             correct += (pred.round() == y).type(torch.float).sum().item()

#     test_loss /= num_batches
#     correct /= size
#     print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


def test_auc_score(dataloader, model, loss_fn, writer=None, global_step=0):
    num_batches = len(dataloader)
    test_loss, test_auc = 0, 0
    with torch.no_grad():
        scores = []
        for X, y in dataloader:
            X = X.float().to(device)
            y = y.float().to(device)
            pred = torch.squeeze(model(X))
            test_loss += loss_fn(pred, y).item()
            pred, y = pred.detach().cpu().numpy(), y.detach().cpu().numpy()
            test_auc += roc_auc_score(y_true=y, y_score=pred)
        test_loss /= num_batches
        test_auc /= num_batches
        if writer:
            writer.add_scalar("Loss/test", test_loss, global_step)
            writer.add_scalar("AUC/test", test_auc, global_step)
            global_step += 1
        else:
            print("Average Test AUC:", test_auc)
    return test_auc, global_step


## Preconditioning Model (Features)

In [25]:
class PrecondFeatLogisticRegressionTorch(torch.nn.Module):
    def __init__(self, input_dim, output_dim=None):
        super().__init__()
        if output_dim is None:
            output_dim = input_dim
        self.condition = nn.Sequential(
            nn.Conv1d(
                in_channels=input_dim,
                out_channels=2 * input_dim,
                kernel_size=1,
                padding=1,
                groups=input_dim,
            ),
            nn.ReLU(),
            nn.AvgPool1d(kernel_size=2),
            nn.Conv1d(
                in_channels=2 * input_dim,
                out_channels=4 * input_dim,
                kernel_size=2,
                padding=1,
                groups=2 * input_dim,
            ),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),
            nn.Conv1d(
                in_channels=4 * input_dim,
                out_channels=2 * input_dim,
                kernel_size=4,
                padding=1,
                groups=2 * input_dim,
            ),
            nn.ReLU(),
            nn.AvgPool1d(kernel_size=4),
            nn.Conv1d(
                in_channels=2 * input_dim,
                out_channels=input_dim,
                kernel_size=2,
                padding=0,
                groups=input_dim,
            ),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=4),
            nn.Flatten(),
        )
        self.logistic = nn.Sequential(nn.Linear(output_dim, 1, bias=True), nn.Sigmoid())

    def forward(self, x):
        x_cond = self.condition(x)
        probs = self.logistic(x_cond)
        return probs


model = PrecondFeatLogisticRegressionTorch(2480)
dummy_input = torch.zeros(
    1, 2480, 140
)  # Adjust the shape of this tensor to match the input shape of your model
writer = SummaryWriter("./logs/model_graph_precond_feats")
writer.add_graph(model, dummy_input)


In [23]:
input_dim = 2480
dummy_input = torch.ones(5, 2480, 140)
condition = nn.Sequential(
    nn.Conv1d(
        in_channels=input_dim,
        out_channels=2 * input_dim,
        kernel_size=2,
        padding=1,
        groups=input_dim,
    ),
    nn.ReLU(),
    nn.AvgPool1d(kernel_size=4),
    nn.Conv1d(
        in_channels=2 * input_dim,
        out_channels=2 * input_dim,
        kernel_size=4,
        padding=1,
        groups=2 * input_dim,
    ),
    nn.ReLU(),
    nn.MaxPool1d(kernel_size=4),
    nn.Conv1d(
        in_channels=2 * input_dim,
        out_channels=input_dim,
        kernel_size=4,
        padding=1,
        groups=input_dim,
    ),
    nn.ReLU(),
    nn.AvgPool1d(kernel_size=4),
)
condition(dummy_input).shape


torch.Size([5, 2480, 1])

#### Training

In [10]:
subject = "LTP093"
set_seed(56)
torch.use_deterministic_algorithms(True)
torch.backends.cudnn.deterministic = True  # type: ignore
torch_auc_list = []
timestr = time.strftime("%m%d-%H%M%S")
### HYPERPARAMETERS ####
lr = 1e-2  # 1e-2
weight_decay = 1e-8  # 1e-4
batch_size = 256
########################
log_dir = "/Users/jrudoler/Library/CloudStorage/Box-Box/JR_CML/pytorch_logs/"
# profiler = torch.profiler.profile(
#     schedule=torch.profiler.schedule(wait=0, warmup=2, active=3, repeat=2),
#     on_trace_ready=torch.profiler.tensorboard_trace_handler(f'./logs/precond_LR_{lr:.0e}_WD_{weight_decay:.0e}_BS_{batch_size}_{subject}_{timestr}'),
#     record_shapes=True, with_stack=True, profile_memory=True
# )
# profiler.start()
# writer = SummaryWriter(f'./logs/precond_LR_{lr:.0e}_WD_{weight_decay:.0e}_BS_{batch_size}_{subject}_{timestr}')

for sess in range(24):
    print(f"{'#'*30}\nSESSION {sess}\n{'#'*30}")
    ## log run ##
    run_dir = f"smaller_precond_LR_{lr:.0e}_WD_{weight_decay:.0e}_BS_{batch_size}_{subject}_{sess}_{timestr}"
    profiler = None
    writer = SummaryWriter(log_dir + run_dir)
    start = time.time()
    ## data ##
    try:
        test_file_crit = (
            lambda s: s.endswith(".pt")
            and s.count(f"sub_{subject}")
            and s.count(f"sess_{sess}")
        )
        test_dataset = DatasetFolder(
            data_dir,
            loader=partial(torch.load),  # map_location=device
            #  target_transform=partial(torch.tensor, device=device),
            is_valid_file=test_file_crit,
        )
        train_file_crit = (
            lambda s: s.endswith(".pt")
            and s.count(f"sub_{subject}")
            and not s.count(f"sess_{sess}")
        )
        train_dataset = DatasetFolder(
            data_dir,
            loader=partial(torch.load),
            #   target_transform=partial(torch.tensor, device=device),
            is_valid_file=train_file_crit,
        )
    except FileNotFoundError:
        print(f"no session {sess}")
        continue
    ## class balancing ##
    cls_weights = compute_class_weight(
        class_weight="balanced",
        classes=np.unique(train_dataset.targets),
        y=train_dataset.targets,
    )
    weights = cls_weights[train_dataset.targets]
    sampler = WeightedRandomSampler(weights, len(train_dataset), replacement=True)
    ## data loaders ##
    train_dataloader = DataLoader(
        train_dataset, batch_size=batch_size, sampler=sampler
    )  # , num_workers=2, prefetch_factor=2, persistent_workers=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    ## create model ##
    n_features = train_dataset[0][0].shape[0]
    model = PrecondFeatLogisticRegressionTorch(n_features).to(device)
    loss_fn = torch.nn.BCELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)
    ## training epochs ##
    print(f"{time.time()-start} s to initialize session training")
    EPOCHS = 200
    global_step = 0
    for t in range(EPOCHS):
        # print(f"{'-'*30}\nEpoch {t+1}\n{'-'*30}")
        train_loop(
            train_dataloader, model, loss_fn, optimizer, profiler, writer, log_num=2
        )
        out = test_auc_score(test_dataloader, model, writer)
    torch_auc_list.append(out)
np.save("torch_precond_auc.npy", np.array(torch_auc_list))


Random seed 56 has been set.
##############################
SESSION 0
##############################
0.18145179748535156 s to initialize session training
------------------------------
Epoch 1
------------------------------


STAGE:2023-04-20 17:49:45 78736:4191434 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-04-20 17:49:51 78736:4191434 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-04-20 17:49:51 78736:4191434 ActivityProfilerController.cpp:321] Completed Stage: Post Processing
STAGE:2023-04-20 17:49:58 78736:4191434 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-04-20 17:50:02 78736:4191434 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-04-20 17:50:02 78736:4191434 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


Average Test AUC: 0.5384615384615385
------------------------------
Epoch 2
------------------------------
Average Test AUC: 0.5378395189715944
------------------------------
Epoch 3
------------------------------
Average Test AUC: 0.5380468588015759
------------------------------
Epoch 4
------------------------------
Average Test AUC: 0.5380468588015759
------------------------------
Epoch 5
------------------------------
Average Test AUC: 0.5368028198216876
------------------------------
Epoch 6
------------------------------
Average Test AUC: 0.5359734605017623
------------------------------
Epoch 7
------------------------------
Average Test AUC: 0.5358697905867718
------------------------------
Epoch 8
------------------------------
Average Test AUC: 0.5346257516068837
------------------------------
Epoch 9
------------------------------
Average Test AUC: 0.5322413435620983
------------------------------
Epoch 10
------------------------------
Average Test AUC: 0.5328633630520423

KeyboardInterrupt: 

## Preconditioning with Fewer Layers

In [13]:
class SmallerPrecondFeatLogisticRegressionTorch(torch.nn.Module):
    def __init__(self, input_dim, output_dim=None):
        super().__init__()
        if output_dim is None:
            output_dim = input_dim
        self.condition = nn.Sequential(
            nn.Conv1d(
                in_channels=input_dim,
                out_channels=2 * input_dim,
                kernel_size=2,
                padding=1,
                groups=input_dim,
            ),
            nn.ReLU(),
            nn.AvgPool1d(kernel_size=4),
            nn.Conv1d(
                in_channels=2 * input_dim,
                out_channels=2 * input_dim,
                kernel_size=4,
                padding=1,
                groups=2 * input_dim,
            ),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=4),
            nn.Conv1d(
                in_channels=2 * input_dim,
                out_channels=input_dim,
                kernel_size=4,
                padding=1,
                groups=input_dim,
            ),
            nn.ReLU(),
            nn.AvgPool1d(kernel_size=4),
            nn.Flatten(),
        )
        self.logistic = nn.Sequential(nn.Linear(output_dim, 1, bias=True), nn.Sigmoid())

    def forward(self, x):
        x_cond = self.condition(x)
        probs = self.logistic(x_cond)
        return probs


In [21]:
subject = "LTP093"
set_seed(56)
torch.use_deterministic_algorithms(True)
torch.backends.cudnn.deterministic = True
torch_auc_list = []
timestr = time.strftime("%m%d-%H%M%S")
### HYPERPARAMETERS ####
lr = 1e-2
weight_decay = 1e-4  # 1e-4
batch_size = 256
########################
log_dir = "/Users/jrudoler/Library/CloudStorage/Box-Box/JR_CML/pytorch_logs/"

for sess in range(24):
    print(f"{'#'*30}\nSESSION {sess}\n{'#'*30}")
    start = time.time()
    ## data ##
    try:
        test_file_crit = (
            lambda s: s.endswith(".pt")
            and s.count(f"sub_{subject}")
            and s.count(f"sess_{sess}")
        )
        test_dataset = DatasetFolder(
            data_dir, loader=partial(torch.load), is_valid_file=test_file_crit
        )
        train_file_crit = (
            lambda s: s.endswith(".pt")
            and s.count(f"sub_{subject}")
            and not s.count(f"sess_{sess}")
        )
        train_dataset = DatasetFolder(
            data_dir, loader=partial(torch.load), is_valid_file=train_file_crit
        )
    except FileNotFoundError:
        print(f"no session {sess}")
        continue
    ## log run ##
    run_dir = f"smaller_precond_LR_{lr:.0e}_WD_{weight_decay:.0e}_BS_{batch_size}_{subject}_{sess}_{timestr}"
    profiler = None
    writer = SummaryWriter(log_dir + run_dir)
    ## class balancing ##
    cls_weights = compute_class_weight(
        class_weight="balanced",
        classes=np.unique(train_dataset.targets),
        y=train_dataset.targets,
    )
    weights = cls_weights[train_dataset.targets]
    sampler = WeightedRandomSampler(weights, len(train_dataset), replacement=True)
    ## data loaders ##
    train_dataloader = DataLoader(
        train_dataset, batch_size=batch_size, sampler=sampler
    )  # , num_workers=2, prefetch_factor=2, persistent_workers=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    ## create model ##
    n_features = train_dataset[0][0].shape[0]
    model = SmallerPrecondFeatLogisticRegressionTorch(n_features).to(device)
    loss_fn = torch.nn.BCELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)
    ## training epochs ##
    print(f"{time.time()-start} s to initialize session training")
    EPOCHS = 150
    global_step = 0
    for t in range(EPOCHS):
        # print(f"{'-'*30}\nEpoch {t+1}\n{'-'*30}")
        global_step = train_loop(
            train_dataloader,
            model,
            loss_fn,
            optimizer,
            profiler,
            writer,
            log_num=4,
            global_step=global_step,
        )
        out, global_step = test_auc_score(
            test_dataloader, model, loss_fn, writer, global_step=global_step
        )
    torch_auc_list.append(out)
np.save("torch_small_precond_auc.npy", np.array(torch_auc_list))


Random seed 56 has been set.
##############################
SESSION 0
##############################
0.028130054473876953 s to initialize session training
##############################
SESSION 1
##############################
0.026775121688842773 s to initialize session training
##############################
SESSION 2
##############################
0.01861715316772461 s to initialize session training
##############################
SESSION 3
##############################
0.02695488929748535 s to initialize session training
##############################
SESSION 4
##############################
0.026250839233398438 s to initialize session training
##############################
SESSION 5
##############################
0.028114795684814453 s to initialize session training
##############################
SESSION 6
##############################
0.028380870819091797 s to initialize session training
##############################
SESSION 7
##############################
0.01862883567810058

## Logistic Regression Network

In [5]:
subject = "LTP093"
set_seed(56)
torch.use_deterministic_algorithms(True)
torch.backends.cudnn.deterministic = True  # type: ignore
torch_auc_list = []
timestr = time.strftime("%Y%m%d-%H%M%S")
### HYPERPARAMETERS ####
lr = 1e-4
weight_decay = 10
batch_size = 256
########################
log_dir = "/Users/jrudoler/Library/CloudStorage/Box-Box/JR_CML/pytorch_logs/"
# profiler = torch.profiler.profile(
#     schedule=torch.profiler.schedule(wait=0, warmup=2, active=3, repeat=2),
#     on_trace_ready=torch.profiler.tensorboard_trace_handler(log_dir+run_dir),
#     record_shapes=True, with_stack=True, profile_memory=True
# )
# profiler.start()

for sess in range(24):
    print(f"{'#'*30}\nSESSION {sess}\n{'#'*30}")
    start = time.time()
    ## data ##
    try:
        test_file_crit = (
            lambda s: s.endswith(".pt")
            and s.count(f"sub_{subject}")
            and s.count(f"sess_{sess}")
        )
        test_dataset = DatasetFolder(
            data_dir,
            loader=partial(torch.load),
            transform=partial(torch.mean, dim=-1),
            is_valid_file=test_file_crit,
        )
        train_file_crit = (
            lambda s: s.endswith(".pt")
            and s.count(f"sub_{subject}")
            and not s.count(f"sess_{sess}")
        )
        train_dataset = DatasetFolder(
            data_dir,
            loader=partial(torch.load),
            transform=partial(torch.mean, dim=-1),
            is_valid_file=train_file_crit,
        )
    except FileNotFoundError:
        print(f"no session {sess}")
        continue
    ## log run ##
    run_dir = f"logreg_LR_{lr:.0e}_WD_{weight_decay:.0e}_BS_{batch_size}_{subject}_{sess}_{timestr}"
    profiler = None
    writer = SummaryWriter(log_dir + run_dir)
    ## class balancing ##
    cls_weights = compute_class_weight(
        class_weight="balanced",
        classes=np.unique(train_dataset.targets),
        y=train_dataset.targets,
    )
    weights = cls_weights[train_dataset.targets]
    sampler = WeightedRandomSampler(weights, len(train_dataset), replacement=True)
    ## data loaders ##
    train_dataloader = DataLoader(
        train_dataset, batch_size=batch_size, sampler=sampler
    )  # , num_workers=2, prefetch_factor=2, persistent_workers=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    ## create model ##
    n_features = train_dataset[0][0].shape[0]
    model = LogisticRegressionTorch(n_features).to(device)
    loss_fn = torch.nn.BCELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)
    ## training epochs ##
    print(f"{time.time()-start} s to initialize session training")
    EPOCHS = 150
    #     scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[2, 4, 6, 8])
    for t in range(EPOCHS):
        train_loop(
            train_dataloader, model, loss_fn, optimizer, profiler, writer, log_num=4
        )
        out = test_auc_score(test_dataloader, model, loss_fn, writer)
    #         if t in scheduler.milestones:
    #             scheduler.step()
    torch_auc_list.append(out)
np.save("torch_logreg_auc.npy", np.array(torch_auc_list))


Random seed 56 has been set.
##############################
SESSION 0
##############################
0.031739234924316406 s to initialize session training
##############################
SESSION 1
##############################
0.02824878692626953 s to initialize session training
##############################
SESSION 2
##############################
0.028312206268310547 s to initialize session training
##############################
SESSION 3
##############################
0.03293585777282715 s to initialize session training
##############################
SESSION 4
##############################
0.028208017349243164 s to initialize session training
##############################
SESSION 5
##############################
0.02835702896118164 s to initialize session training
##############################
SESSION 6
##############################
0.028221845626831055 s to initialize session training
##############################
SESSION 7
##############################
0.02892613410949707 

KeyboardInterrupt: 

## Scratch

In [16]:
train_dataloader = DataLoader(
    train_dataset,
    batch_size=256,
    sampler=sampler,
    num_workers=4,
    prefetch_factor=10,
    persistent_workers=True,
)
times = []
last = time.time()
i = 0
for X, y in train_dataloader:
    if i > 100:
        break
    diff = time.time() - last
    times.append(diff)
    i += 1
    last = time.time()
print("mean:", np.mean(times))
print("median:", np.median(times))
print("std:", np.std(times, ddof=1))
print("max:", np.max(times))
print("min:", np.min(times))


mean: 0.8604953289031982
median: 9.202957153320312e-05
std: 2.7879848599983923
max: 9.26577091217041
min: 4.792213439941406e-05


In [13]:
train_dataloader = DataLoader(
    train_dataset, batch_size=256, sampler=sampler, num_workers=0
)
times = []
tot = time.time()
last = time.time()
i = 0
for X, y in train_dataloader:
    if i > 100:
        break
    diff = time.time() - last
    times.append(diff)
    i += 1
    last = time.time()
print("mean:", np.mean(times))
print("median:", np.median(times))
print("std:", np.std(times, ddof=1))
print("max:", np.max(times))
print("min:", np.min(times))
print("total time:", time.time() - tot)


mean: 0.11424001780423251
median: 0.11072301864624023
std: 0.022017329251564764
max: 0.15985512733459473
min: 0.07237982749938965
total time: 1.2573590278625488


## Preconditioning Model (Raw signal)

In [None]:
class PrecondLogisticRegressionTorch(torch.nn.Module):
    def __init__(self, input_dim, output_dim=3968, n_elec=124):
        super().__init__()
        self.condition = nn.Sequential(
            nn.Conv1d(
                in_channels=n_elec,
                out_channels=2 * n_elec,
                kernel_size=1,
                padding=0,
                groups=n_elec,
            ),
            nn.ReLU(),
            nn.AvgPool1d(kernel_size=2),
            nn.Conv1d(
                in_channels=2 * n_elec,
                out_channels=4 * n_elec,
                kernel_size=2,
                padding=0,
                groups=2 * n_elec,
            ),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),
            nn.Conv1d(
                in_channels=4 * n_elec,
                out_channels=8 * n_elec,
                kernel_size=4,
                padding=0,
                groups=4 * n_elec,
            ),
            nn.ReLU(),
            nn.AvgPool1d(kernel_size=2),
            nn.Conv1d(
                in_channels=8 * n_elec,
                out_channels=16 * n_elec,
                kernel_size=8,
                padding=0,
                groups=8 * n_elec,
            ),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),
            nn.Conv1d(
                in_channels=16 * n_elec,
                out_channels=8 * n_elec,
                kernel_size=8,
                padding=0,
                groups=8 * n_elec,
            ),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=8),
            nn.Flatten(),
        )
        self.logistic = nn.Sequential(nn.Linear(output_dim, 1), nn.Sigmoid())

    def forward(self, x):
        x_cond = self.condition(x)
        probs = self.logistic(x_cond)
        return probs


#### Training

In [None]:
# ts = TimeSeries.from_hdf("/scratch/jrudoler/scalp_features/LTP093_eeg.h5")
# ts = ts.stack(row = ("event", "channel"), create_index=False).T
# sessions = ts.session.values
# trial = ts.trial.values
# serialpos = ts.serialpos.values
# y = torch.tensor(ts.recalled.values).float()
# X = torch.tensor(ts.data).float()#[:, None, :]
# del ts


In [None]:
# # conv = nn.Conv1d(in_channels=1, out_channels=2, kernel_size=2, stride=1, groups=1)
# # avgpool = nn.AvgPool1d(kernel_size=2)
# # out = conv(X[0:1])
# # print(out.shape)
# # pooled = avgpool(out)
# # print(pooled.shape)

# n_elec = 124
# condition = nn.Sequential(
#     nn.Conv1d(in_channels=n_elec, out_channels=2*n_elec, kernel_size=1, padding=0, groups=n_elec),
#     nn.ReLU(),
#     nn.AvgPool1d(kernel_size=2),
#     nn.Conv1d(in_channels=2*n_elec, out_channels=4*n_elec, kernel_size=2, padding=0, groups=2*n_elec),
#     nn.ReLU(),
#     nn.MaxPool1d(kernel_size=2),
#     nn.Conv1d(in_channels=4*n_elec, out_channels=8*n_elec, kernel_size=4, padding=0, groups=4*n_elec),
#     nn.ReLU(),
#     nn.AvgPool1d(kernel_size=2),
#     nn.Conv1d(in_channels=8*n_elec, out_channels=16*n_elec, kernel_size=8, padding=0, groups=8*n_elec),
#     nn.ReLU(),
#     nn.MaxPool1d(kernel_size=2),
# #     nn.Conv1d(in_channels=16*n_elec, out_channels=16*n_elec, kernel_size=8, padding=0, groups=16*n_elec),
# #     nn.ReLU(),
#     nn.AvgPool1d(kernel_size=2),
#     nn.Conv1d(in_channels=16*n_elec, out_channels=8*n_elec, kernel_size=8, padding=0, groups=8*n_elec),
#     nn.ReLU(),
#     nn.MaxPool1d(kernel_size=8),
#     nn.Flatten()
# )

# condition = nn.Sequential(
#     nn.Conv1d(in_channels=1, out_channels=2, kernel_size=1, padding=0),
#     nn.AvgPool1d(kernel_size=2),
#     nn.Conv1d(in_channels=2, out_channels=4, kernel_size=2, padding=0),
#     nn.MaxPool1d(kernel_size=2),
#     nn.Conv1d(in_channels=4, out_channels=8, kernel_size=4, padding=0),
#     nn.AvgPool1d(kernel_size=2),
#     nn.Conv1d(in_channels=8, out_channels=16, kernel_size=8, padding=0),
#     nn.MaxPool1d(kernel_size=2),
#     nn.Conv1d(in_channels=16, out_channels=16, kernel_size=8, padding=0),
#     nn.AvgPool1d(kernel_size=2),
#     nn.Flatten()
# )


In [None]:
# out = condition(X[0:5])
# print(out.shape)
# pooled = avgpool(out)
# print(pooled.shape)


torch.Size([5, 992])


In [None]:
# sched = torch.profiler.schedule(wait=0, warmup=2, active=3, repeat=2)
# prof = torch.profiler.profile(
#         schedule=sched,
#         on_trace_ready=torch.profiler.tensorboard_trace_handler('/home1/jrudoler/logs/precond_raw'),
#         record_shapes=True,
#         with_stack=True)


In [None]:
# prof.start()
# set_seed(56)
# torch.use_deterministic_algorithms(True)
# # torch.backends.cudnn.deterministic = True
# torch_auc_list = []
# sk_auc_list = []
# logo = LeaveOneGroupOut()
# for (i, (train_idx, test_idx)) in tqdm(list(enumerate(logo.split(X, y, groups=sessions)))):
#     print(f"{'#'*30}\nSESSION {i}\n{'#'*30}")
#     ## create model ##
#     model = PrecondLogisticRegressionTorch(X.shape[-1])
#     loss_fn = torch.nn.BCELoss()
#     lr = 1e-4
#     weight_decay = 1
#     optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)
#     ## data ##
#     train_set = SimpleDataset(X[train_idx], y[train_idx])
#     test_set = SimpleDataset(X[test_idx], y[test_idx])
#     ## class balancing ##
#     cls_weights = compute_class_weight(
#         class_weight="balanced",
#         classes=np.unique(train_set.y.detach().numpy()),
#         y=train_set.y.detach().numpy(),
#     )
#     weights = cls_weights[train_set.y.detach().numpy().astype(int)]
#     sampler = WeightedRandomSampler(
#         weights, len(train_set.y.detach().numpy()), replacement=True
#     )

#     train_dataloader = DataLoader(train_set, batch_size=200, sampler=sampler)
#     test_dataloader = DataLoader(test_set, batch_size=200, shuffle=True)

#     ## training epochs ##
#     EPOCHS = 10
# #     scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[2, 4, 6, 8])
#     for t in range(EPOCHS):
#         print(f"{'-'*30}\nEpoch {t+1}\n{'-'*30}")
#         train_loop(train_dataloader, model, loss_fn, optimizer, prof, log_num=2)
#         test_loop(test_dataloader, model, loss_fn)
#         out = test_auc_score(test_set, model)
# #         if t in scheduler.milestones:
# #             scheduler.step()
#     torch_auc_list.append(out)


Random seed 56 has been set.


NameError: name 'X' is not defined