In [1]:
from lib.imports import *
import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def load_data():
    skip_idx = [19,24,26,32,34,38,40,45,52,55,70]
    all_idx = list(range(71))
    for idx in skip_idx:
        all_idx.remove(idx)
    random.seed(0)
    random.shuffle(all_idx)
    k_folds = 5
    foldi = 0
    fold_size = int(len(all_idx)/k_folds)
    test_idx = all_idx[foldi*fold_size:(foldi+1)*fold_size]
    for idx in test_idx:
        all_idx.remove(idx)
    train_idx = all_idx
    window_size = 101
    X,y = load_and_window_nursing_list(train_idx,window_size=window_size,data_dir=f'/home/andrew/smoking/data/nursingv1/',label_dir=f'/home/andrew/smoking/data/nursingv1_andrew/')
    X_train,X_dev,y_train,y_dev = train_test_split(X,y,test_size=.05,stratify=y,random_state=0)

    return X_train,X_dev,y_train,y_dev,test_idx

In [3]:
from ray import tune
from ray.air import Checkpoint, session
from ray.tune.schedulers import ASHAScheduler

In [11]:
def train_mlp(config,data_dir=None):
    model = MLP(window_size=101)
    device = 'cuda'
    model.to(device)

    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(),lr=config["lr"])

    checkpoint = session.get_checkpoint()

    if checkpoint:
        checkpoint_state = checkpoint.to_dict()
        start_epoch = checkpoint_state["epoch"]
        model.load_state_dict(checkpoint_state["net_state_dict"])
        optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
    else:
        start_epoch = 0

    X_train,X_dev,y_train,y_dev,test_idx = load_data()

    trainloader = DataLoader(TensorDataset(X_train,y_train),batch_size=config['batch_size'],shuffle=True)
    devloader = DataLoader(TensorDataset(X_dev,y_dev),batch_size=config['batch_size'],shuffle=True)

    pbar = tqdm(range(start_epoch,10))

    for epoch in pbar:
        running_loss = 0.0
        epoch_steps = 0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            epoch_steps += 1
            if i % 2000 == 1999:  # print every 2000 mini-batches
                print(
                    "[%d, %5d] loss: %.3f"
                    % (epoch + 1, i + 1, running_loss / epoch_steps)
                )
                running_loss = 0.0
        # Validation loss
        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(devloader, 0):
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.cpu().numpy()
                val_steps += 1

        checkpoint_data = {
            "epoch": epoch,
            "net_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
        }
        checkpoint = Checkpoint.from_dict(checkpoint_data)

        session.report(
            {"loss": val_loss / val_steps},
            checkpoint=checkpoint,
        )
    print("Finished Training")

In [12]:
from functools import partial

def main(num_samples=1, max_num_epochs=5, gpus_per_trial=1):
    config = {
        "lr": tune.loguniform(1e-4, 1e-1),
        "batch_size": tune.choice([16,32,64,128]),
    }
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2,
    )
    result = tune.run(
        partial(train_mlp),
        resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
    )

    best_trial = result.get_best_trial("loss", "min", "last")
    print(f"Best trial config: {best_trial.config}")
    print(f"Best trial final validation loss: {best_trial.last_result['loss']}")
    print(f"Best trial final validation accuracy: {best_trial.last_result['accuracy']}")

    best_trained_model = MLP(window_size=101)
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    best_checkpoint = best_trial.checkpoint.to_air_checkpoint()
    best_checkpoint_data = best_checkpoint.to_dict()

    best_trained_model.load_state_dict(best_checkpoint_data["net_state_dict"])

    # test_acc = test_accuracy(best_trained_model, device)
    # print("Best trial test set accuracy: {}".format(test_acc))


In [13]:
main()

0,1
Current time:,2023-05-22 13:03:22
Running for:,00:05:42.94
Memory:,14.2/31.3 GiB

Trial name,status,loc,batch_size,lr,iter,total time (s),loss
train_mlp_c290c_00000,TERMINATED,192.168.1.148:25221,32,0.000104125,5,341.084,0.119332


[2m[36m(func pid=25221)[0m E0522 12:57:41.361542416   25255 fork_posix.cc:76]           Other threads are currently calling into gRPC, skipping fork() handlers
[2m[36m(func pid=25221)[0m E0522 12:57:41.372231545   25255 fork_posix.cc:76]           Other threads are currently calling into gRPC, skipping fork() handlers
  0%|          | 0/10 [00:00<?, ?it/s]


[2m[36m(func pid=25221)[0m [1,  2000] loss: 0.326
[2m[36m(func pid=25221)[0m [1,  4000] loss: 0.133
[2m[36m(func pid=25221)[0m [1,  6000] loss: 0.082
[2m[36m(func pid=25221)[0m [1,  8000] loss: 0.056
[2m[36m(func pid=25221)[0m [1, 10000] loss: 0.041
[2m[36m(func pid=25221)[0m [1, 12000] loss: 0.032
[2m[36m(func pid=25221)[0m [1, 14000] loss: 0.026
[2m[36m(func pid=25221)[0m [1, 16000] loss: 0.021
[2m[36m(func pid=25221)[0m [1, 18000] loss: 0.018
[2m[36m(func pid=25221)[0m [1, 20000] loss: 0.016
[2m[36m(func pid=25221)[0m [1, 22000] loss: 0.014
[2m[36m(func pid=25221)[0m [1, 24000] loss: 0.012
[2m[36m(func pid=25221)[0m [1, 26000] loss: 0.011
[2m[36m(func pid=25221)[0m [1, 28000] loss: 0.010
[2m[36m(func pid=25221)[0m [1, 30000] loss: 0.009
[2m[36m(func pid=25221)[0m [1, 32000] loss: 0.009
[2m[36m(func pid=25221)[0m [1, 34000] loss: 0.008
[2m[36m(func pid=25221)[0m [1, 36000] loss: 0.007
[2m[36m(func pid=25221)[0m [1, 38000] loss:

Trial name,date,done,hostname,iterations_since_restore,loss,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
train_mlp_c290c_00000,2023-05-22_13-03-22,True,tau,5,0.119332,192.168.1.148,25221,True,341.084,62.2923,341.084,1684775002,5,c290c_00000


 10%|█         | 1/10 [01:07<10:03, 67.03s/it]


[2m[36m(func pid=25221)[0m [2,  2000] loss: 0.133
[2m[36m(func pid=25221)[0m [2,  4000] loss: 0.065
[2m[36m(func pid=25221)[0m [2,  6000] loss: 0.043
[2m[36m(func pid=25221)[0m [2,  8000] loss: 0.032
[2m[36m(func pid=25221)[0m [2, 10000] loss: 0.026
[2m[36m(func pid=25221)[0m [2, 12000] loss: 0.021
[2m[36m(func pid=25221)[0m [2, 14000] loss: 0.019
[2m[36m(func pid=25221)[0m [2, 16000] loss: 0.016
[2m[36m(func pid=25221)[0m [2, 18000] loss: 0.014
[2m[36m(func pid=25221)[0m [2, 20000] loss: 0.013
[2m[36m(func pid=25221)[0m [2, 22000] loss: 0.012
[2m[36m(func pid=25221)[0m [2, 24000] loss: 0.010
[2m[36m(func pid=25221)[0m [2, 26000] loss: 0.010
[2m[36m(func pid=25221)[0m [2, 28000] loss: 0.009
[2m[36m(func pid=25221)[0m [2, 30000] loss: 0.008
[2m[36m(func pid=25221)[0m [2, 32000] loss: 0.008
[2m[36m(func pid=25221)[0m [2, 34000] loss: 0.007
[2m[36m(func pid=25221)[0m [2, 36000] loss: 0.007
[2m[36m(func pid=25221)[0m [2, 38000] loss:

 20%|██        | 2/10 [02:12<08:48, 66.05s/it]


[2m[36m(func pid=25221)[0m [3,  2000] loss: 0.122
[2m[36m(func pid=25221)[0m [3,  4000] loss: 0.063
[2m[36m(func pid=25221)[0m [3,  6000] loss: 0.041
[2m[36m(func pid=25221)[0m [3,  8000] loss: 0.031
[2m[36m(func pid=25221)[0m [3, 10000] loss: 0.025
[2m[36m(func pid=25221)[0m [3, 12000] loss: 0.021
[2m[36m(func pid=25221)[0m [3, 14000] loss: 0.018
[2m[36m(func pid=25221)[0m [3, 16000] loss: 0.016
[2m[36m(func pid=25221)[0m [3, 18000] loss: 0.014
[2m[36m(func pid=25221)[0m [3, 20000] loss: 0.012
[2m[36m(func pid=25221)[0m [3, 22000] loss: 0.011
[2m[36m(func pid=25221)[0m [3, 24000] loss: 0.010
[2m[36m(func pid=25221)[0m [3, 26000] loss: 0.010
[2m[36m(func pid=25221)[0m [3, 28000] loss: 0.009
[2m[36m(func pid=25221)[0m [3, 30000] loss: 0.008
[2m[36m(func pid=25221)[0m [3, 32000] loss: 0.008
[2m[36m(func pid=25221)[0m [3, 34000] loss: 0.007
[2m[36m(func pid=25221)[0m [3, 36000] loss: 0.007
[2m[36m(func pid=25221)[0m [3, 38000] loss:

 30%|███       | 3/10 [03:14<07:29, 64.21s/it]


[2m[36m(func pid=25221)[0m [4,  2000] loss: 0.121
[2m[36m(func pid=25221)[0m [4,  4000] loss: 0.059
[2m[36m(func pid=25221)[0m [4,  6000] loss: 0.041
[2m[36m(func pid=25221)[0m [4,  8000] loss: 0.031
[2m[36m(func pid=25221)[0m [4, 10000] loss: 0.025
[2m[36m(func pid=25221)[0m [4, 12000] loss: 0.021
[2m[36m(func pid=25221)[0m [4, 14000] loss: 0.018
[2m[36m(func pid=25221)[0m [4, 16000] loss: 0.016
[2m[36m(func pid=25221)[0m [4, 18000] loss: 0.014
[2m[36m(func pid=25221)[0m [4, 20000] loss: 0.012
[2m[36m(func pid=25221)[0m [4, 22000] loss: 0.011
[2m[36m(func pid=25221)[0m [4, 24000] loss: 0.010
[2m[36m(func pid=25221)[0m [4, 26000] loss: 0.009
[2m[36m(func pid=25221)[0m [4, 28000] loss: 0.009
[2m[36m(func pid=25221)[0m [4, 30000] loss: 0.008
[2m[36m(func pid=25221)[0m [4, 32000] loss: 0.007
[2m[36m(func pid=25221)[0m [4, 34000] loss: 0.007
[2m[36m(func pid=25221)[0m [4, 36000] loss: 0.006
[2m[36m(func pid=25221)[0m [4, 38000] loss:

 40%|████      | 4/10 [04:17<06:21, 63.60s/it]


[2m[36m(func pid=25221)[0m [5,  2000] loss: 0.117
[2m[36m(func pid=25221)[0m [5,  4000] loss: 0.059
[2m[36m(func pid=25221)[0m [5,  6000] loss: 0.039
[2m[36m(func pid=25221)[0m [5,  8000] loss: 0.030
[2m[36m(func pid=25221)[0m [5, 10000] loss: 0.024
[2m[36m(func pid=25221)[0m [5, 12000] loss: 0.020
[2m[36m(func pid=25221)[0m [5, 14000] loss: 0.017
[2m[36m(func pid=25221)[0m [5, 16000] loss: 0.015
[2m[36m(func pid=25221)[0m [5, 18000] loss: 0.013
[2m[36m(func pid=25221)[0m [5, 20000] loss: 0.012
[2m[36m(func pid=25221)[0m [5, 22000] loss: 0.011
[2m[36m(func pid=25221)[0m [5, 24000] loss: 0.010
[2m[36m(func pid=25221)[0m [5, 26000] loss: 0.009
[2m[36m(func pid=25221)[0m [5, 28000] loss: 0.008
[2m[36m(func pid=25221)[0m [5, 30000] loss: 0.008
[2m[36m(func pid=25221)[0m [5, 32000] loss: 0.008
[2m[36m(func pid=25221)[0m [5, 34000] loss: 0.007
[2m[36m(func pid=25221)[0m [5, 36000] loss: 0.007
[2m[36m(func pid=25221)[0m [5, 38000] loss:

2023-05-22 13:03:22,442	INFO tune.py:945 -- Total run time: 342.95 seconds (342.93 seconds for the tuning loop).


Best trial config: {'lr': 0.00010412540797618336, 'batch_size': 32}
Best trial final validation loss: 0.11933238725527238


KeyError: 'accuracy'

In [None]:
from sklearn.metrics import f1_score,recall_score,precision_score
f1i = []
recalli = []
precisioni = []
for idx in test_idx:
    X,y = load_and_window_nursing_list([idx])
    loss,y_true,y_pred = test_evaluation(DataLoader(TensorDataset(X,y),batch_size=32,shuffle=True),model,criterion,plot=True)
    f1i.append(f1_score(y_true=y_true,y_pred=y_pred.round(),average='macro'))
    recalli.append(recall_score(y_true=y_true,y_pred=y_pred.round(),average='macro'))
    precisioni.append(precision_score(y_true=y_true,y_pred=y_pred.round(),average='macro'))

In [None]:
sns.kdeplot(recalli,bw_adjust=.4)
sns.rugplot(recalli)
print(torch.tensor(recalli).mean()) #.4958
print(torch.tensor(recalli).std()) #.0063

In [None]:
sns.kdeplot(f1i,bw_adjust=.4)
sns.rugplot(f1i)
print(torch.tensor(f1i).mean()) #.4958
print(torch.tensor(f1i).std()) #.0063

In [None]:
sns.kdeplot(precisioni,bw_adjust=.4)
sns.rugplot(precisioni)
print(torch.tensor(precisioni).mean()) #.4958
print(torch.tensor(precisioni).std()) #.0063