In [27]:
%pip install torch parameterfree pandas numpy scikit-learn matplotlib cloudpickle --quiet

Note: you may need to restart the kernel to use updated packages.


### Load the data

In [1]:
import pandas as pd
df = pd.read_csv('sims_features.csv')

### Utility Methods

In [2]:
def is_pow2(i):
    return i & (i-1) == 0

class OnlineMean:
    def __init__(self):
        self.state = [0,0]
    def add(self, num):
        n,v = self.state
        self.state[1] += (num-v)/(n+1)
        self.state[0] += 1
    def __str__(self):
        return str(round(self.state[1],4))
    def __repr__(self):
        return str(round(self.state[1],4))

def plot_results(args,xlabel=None,ylabel='AUC',llabel=None):
    from operator import itemgetter
    from itertools import groupby
    from matplotlib import pyplot as plt
    
    args = sorted(args)
    
    if len(args[0]) == 3:
        for lbl, group in groupby(args,key=itemgetter(0)):
            x,y = zip(*[g[1:] for g in group])
            plt.plot(x,y,label=lbl)

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.legend(title=llabel)
    plt.show()

### Helpful Sources about Architectures

+ If normalizing do it before activation https://forums.fast.ai/t/why-perform-batch-norm-before-relu-and-not-after/81293/4
+ What are residual networks https://arxiv.org/pdf/1512.03385.pdf
+ When building residual networks use pre-activation https://arxiv.org/abs/1603.05027
+ There is not one universal best normalization method https://proceedings.neurips.cc/paper/2021/hash/2578eb9cdf020730f77793e8b58e165a-Abstract.html
+ Adam is the standard optimizer but takes a lot of tuning. COCOB tunes automatically https://github.com/bremen79/parameterfree
+ Smaller batches tend to be better? (this seems to be with fixed epochs though so may not relevant) https://arxiv.org/pdf/1804.07612.pdf

### Basic Architectures

In [3]:
from itertools import chain
import torch

class Linear(torch.nn.Module):
    def __init__(self, in_features, out_features, norm='l'):
        super().__init__()

        if norm == 'l': norm = [torch.nn.LayerNorm  (in_features)]
        if norm == 'b': norm = [torch.nn.BatchNorm1d(in_features)]

        input_norm   = norm or []
        output_layer = torch.nn.Linear(in_features=in_features, out_features=out_features)
        self.layers  = torch.nn.Sequential(*input_norm, output_layer)

    def forward(self, Xs):
        return self.layers(Xs)

class Mlp(torch.nn.Module):
    def __init__(self, in_features, out_features, depth=3, width=None, norm='l'):
        super().__init__()

        def norm_layer(in_width, out_width, norm):
            norm = torch.nn.LayerNorm(out_width) if norm == 'l' else torch.nn.BatchNorm1d(out_width)
            return torch.nn.Sequential(torch.nn.Linear(in_features=in_width, out_features=out_width), norm, torch.nn.ReLU())

        def no_norm_layer(in_width, out_width):
            return torch.nn.Sequential(torch.nn.Linear(in_features=in_width, out_features=out_width), torch.nn.ReLU())

        def layer(in_width, out_width, norm):
            if norm:
                return norm_layer(in_width, out_width, norm)
            else:
                return no_norm_layer(in_width, out_width)

        width = width or in_features

        input_layer   =   layer(in_features, width       , norm)
        hidden_layers = [ layer(width      , width       , norm) for _ in range(depth) ]
        output_layer  =   torch.nn.Linear(in_features=width, out_features=out_features)

        self.layers  = torch.nn.Sequential(input_layer,*hidden_layers,output_layer)

    def forward(self, Xs):
        return self.layers(Xs)

class ResNet(torch.nn.Module):
    def __init__(self, in_features, out_features, depth=3, width=None, norm='l'):
        super().__init__()        

        def norm_layer(in_width, out_width, norm):
            norm = torch.nn.LayerNorm(in_width) if norm == 'l' else torch.nn.BatchNorm1d(width)
            return torch.nn.Sequential(norm,torch.nn.ReLU(),torch.nn.Linear(in_features=in_width,out_features=out_width))

        def no_norm_layer(in_width, out_width):
            return torch.nn.Sequential(torch.nn.ReLU(),torch.nn.Linear(in_features=in_width, out_features=out_width))

        def layer(in_width, out_width, norm):
            return norm_layer(in_width, out_width, norm) if norm else no_norm_layer(in_width, out_width)

        class PreActivationResidualBlock(torch.nn.Module):
            def __init__(self, in_width, norm) -> None:
                super().__init__()
                self.layers = torch.nn.Sequential(layer(in_width,in_width,norm),layer(in_width,in_width,norm))
            def forward(self, Xs):
                return Xs+self.layers(Xs)

        width  = width or in_features

        input_layer   = torch.nn.Linear(in_features=in_features, out_features=width)
        hidden_layers = [PreActivationResidualBlock(width,norm) for _ in range(depth)]
        output_layer  = layer(width,out_features,norm)

        self.layers  = torch.nn.Sequential(input_layer,*hidden_layers,output_layer)

    def forward(self, Xs):
        return self.layers(Xs)

class ResNetDropout(torch.nn.Module):
    def __init__(self, in_features, out_features, depth=3, width=None, drop=[.2,.5]):
        super().__init__()

        def layer(in_width,out_width,drop):
            return torch.nn.Sequential(torch.nn.ReLU(),torch.nn.Dropout(drop),torch.nn.Linear(in_features=in_width, out_features=out_width))
        
        class PreActivationResidualBlock(torch.nn.Module):
            def __init__(self, in_width, drop) -> None:
                super().__init__()
                self.layers = torch.nn.Sequential(layer(in_width,in_width,drop),layer(in_width,in_width,drop))
            def forward(self, Xs):
                return Xs+self.layers(Xs)

        width  = width or in_features

        input_layer   = torch.nn.Linear(in_features=in_features, out_features=width)
        hidden_layers = [PreActivationResidualBlock(width,drop[1]) for _ in range(depth)]
        output_layer  = layer(width,out_features,drop[1])

        self.layers  = torch.nn.Sequential(torch.nn.Dropout(drop[0]),input_layer,*hidden_layers,output_layer)

    def forward(self, Xs):
        return self.layers(Xs)


### Simple Training Loop

In [23]:
%%time
import cloudpickle
from eval import eval
from itertools import product
from concurrent.futures import ProcessPoolExecutor

numthreads = 1
maxworkers = 12

b = 16

def local(device):
    from itertools import product
    from sklearn.metrics import roc_auc_score 
    from collections import defaultdict

    ins  = []
    outs = defaultdict(lambda:([],[]))

    for d,w,n,e in product([4,5],[16],['b'],[1,2]):
        model = cloudpickle.dumps(ResNet(13,1,depth=d,width=w,norm=n))
        key   = (d,w,n,e)
        for _ in range(3):
            for pid in sorted(set(df.participant_id)):
                ins.append( (model,pid,key,b,e,numthreads,device,None) )

    with ProcessPoolExecutor(max_workers=maxworkers) as executor:
        mapper = map if maxworkers == 1 else executor.map
        for k,s,l in mapper(eval,ins):
            outs[k][0].extend(s)
            outs[k][1].extend(l)

    return [ (*k,roc_auc_score(labels,scores)) for k,(scores,labels) in outs.items() ]

local('cpu')

CPU times: total: 62.5 ms
Wall time: 25.6 s


[(4, 16, 'b', 1, 0.559564942543666),
 (4, 16, 'b', 2, 0.5828707672679304),
 (5, 16, 'b', 1, 0.5706564252663544),
 (5, 16, 'b', 2, 0.5920975530904609)]

### Hyperparameter Sweeps

In [15]:
def local(device):
    import parameterfree
    from itertools import product
    from sklearn.metrics import roc_auc_score 

    X_all = torch.tensor(df.iloc[:,7:].to_numpy())
    y_all = torch.tensor(((df['experience_id'] != 1) & (df['phase_id'] == 1)).astype(int).to_numpy())[:,None]
    
    X_all = X_all.float()
    y_all = y_all.float()

    outs = []
    for d,w,n in product([2,4,6],[8,16,24],['l','b',None]):
        scores,labels=[],[]
        for pid in sorted(set(df.participant_id)):
            X_trn = X_all[df.participant_id!=pid]
            y_trn = y_all[df.participant_id!=pid]
            X_tst = X_all[df.participant_id==pid]
            y_tst = y_all[df.participant_id==pid]
    
            model = ResNet(13,1,depth=d,width=w,norm=n).to(device)
            loss  = torch.nn.BCEWithLogitsLoss()
            opt   = parameterfree.COCOB(model.parameters())
    
            model = train_model(X_trn,y_trn,model,opt,None,loss,24,3,device)
            with torch.no_grad():
                scores.extend(model(X_tst.to(device)).squeeze().tolist())
                labels.extend(y_tst.squeeze().tolist())
        
        outs.append((d,w,n,roc_auc_score(labels,scores)))
    return outs
outs1 = local('cpu')
from operator import itemgetter
sorted(outs1,key=itemgetter(-1),reverse=True)

[(4, 24, 'b', 0.6274477844250891),
 (4, 16, 'l', 0.6249837388295325),
 (6, 24, 'b', 0.6247434561223881),
 (2, 16, 'b', 0.6241366275021158),
 (2, 16, 'l', 0.6236882018129866),
 (6, 16, 'b', 0.6228793520631408),
 (2, 24, None, 0.6227844633507652),
 (6, 8, None, 0.6224531180890026),
 (2, 24, 'b', 0.6212180343650184),
 (4, 16, 'b', 0.6212126777441586),
 (6, 16, None, 0.6209134722075553),
 (6, 16, 'l', 0.6198742877607335),
 (6, 24, None, 0.618057628057674),
 (4, 24, None, 0.6173256840787453),
 (4, 16, None, 0.616851240516868),
 (2, 24, 'l', 0.6157642290980828),
 (2, 16, None, 0.612146214322992),
 (4, 24, 'l', 0.6117957382724439),
 (6, 8, 'b', 0.6112187536826769),
 (6, 24, 'l', 0.6107703279935476),
 (2, 8, None, 0.6097028299793235),
 (4, 8, 'l', 0.6047758866355369),
 (4, 8, None, 0.6010951228732302),
 (4, 8, 'b', 0.6002973689808799),
 (2, 8, 'l', 0.5992711934704322),
 (2, 8, 'b', 0.5971844070297231),
 (6, 8, 'l', 0.5901519290722179)]

In [5]:
def local(device):
    import parameterfree
    from itertools import product
    from sklearn.metrics import roc_auc_score 

    X_all = torch.tensor(df.iloc[:,7:].to_numpy())
    y_all = torch.tensor(((df['experience_id'] != 1) & (df['phase_id'] == 1)).astype(int).to_numpy())[:,None]
    
    X_all = X_all.float()
    y_all = y_all.float()

    outs = []
    for d,w,n,e in product([4,6,8],[16,24,32],[.2,.4],[3,6]):
        scores,labels=[],[]
        for pid in sorted(set(df.participant_id)):
            X_trn = X_all[df.participant_id!=pid]
            y_trn = y_all[df.participant_id!=pid]
            X_tst = X_all[df.participant_id==pid]
            y_tst = y_all[df.participant_id==pid]
    
            model = ResNetDropout(13,1,depth=d,width=w,drop=[.2,n]).to(device)
            loss  = torch.nn.BCEWithLogitsLoss()
            opt   = parameterfree.COCOB(model.parameters())
    
            model = train_model(X_trn,y_trn,model,opt,None,loss,24,e,device)
            with torch.no_grad():
                scores.extend(model(X_tst.to(device)).squeeze().tolist())
                labels.extend(y_tst.squeeze().tolist())
        
        outs.append((d,w,n,e,roc_auc_score(labels,scores)))
    return outs
outs2 = local('cpu')
from operator import itemgetter
sorted(outs2,key=itemgetter(-1),reverse=True)

[(6, 32, 0.2, 6, 0.6316634450418352),
 (4, 32, 0.4, 6, 0.6297855668146625),
 (4, 16, 0.2, 3, 0.628193885187719),
 (6, 32, 0.4, 6, 0.6228797346789166),
 (8, 32, 0.2, 6, 0.6222977760840653),
 (6, 24, 0.4, 6, 0.620748947424001),
 (8, 16, 0.2, 6, 0.6207160424672902),
 (8, 16, 0.4, 6, 0.6206747199635138),
 (6, 32, 0.2, 3, 0.6193355647485372),
 (4, 16, 0.2, 6, 0.6183920342456423),
 (8, 24, 0.2, 6, 0.6179512608720272),
 (8, 32, 0.4, 6, 0.6177836751622673),
 (6, 24, 0.2, 3, 0.6177779359256317),
 (4, 24, 0.4, 6, 0.6169564598551875),
 (6, 16, 0.2, 3, 0.6167865784507734),
 (8, 24, 0.4, 6, 0.6156701056172588),
 (6, 32, 0.4, 3, 0.6155859301466031),
 (6, 24, 0.2, 6, 0.6155407814850696),
 (8, 32, 0.4, 3, 0.6148130462796737),
 (4, 24, 0.2, 6, 0.6140524061175672),
 (8, 32, 0.2, 3, 0.6133797675838731),
 (6, 24, 0.4, 3, 0.6133782371207703),
 (6, 16, 0.2, 6, 0.6129183329583698),
 (4, 32, 0.2, 3, 0.6120635693154391),
 (4, 24, 0.4, 3, 0.6104190867114481),
 (8, 16, 0.2, 3, 0.6104175562483452),
 (6, 16, 0.4, 

In [6]:
def local(device):
    import parameterfree
    from itertools import product
    from sklearn.metrics import roc_auc_score 

    X_all = torch.tensor(df.iloc[:,7:].to_numpy())
    y_all = torch.tensor(((df['experience_id'] != 1) & (df['phase_id'] == 1)).astype(int).to_numpy())[:,None]
    
    X_all = X_all.float()
    y_all = y_all.float()

    outs = []
    for d,w,n,e in product([4],[16,24],['b'],[3,4,5]):
        scores,labels=[],[]
        for _ in range(3):
            for pid in sorted(set(df.participant_id)):
                X_trn = X_all[df.participant_id!=pid]
                y_trn = y_all[df.participant_id!=pid]
                X_tst = X_all[df.participant_id==pid]
                y_tst = y_all[df.participant_id==pid]
        
                model = ResNet(13,1,depth=d,width=w,norm=n).to(device)
                loss  = torch.nn.BCEWithLogitsLoss()
                opt   = parameterfree.COCOB(model.parameters())
        
                model = train_model(X_trn,y_trn,model,opt,None,loss,24,e,device)
                with torch.no_grad():
                    scores.extend(model(X_tst.to(device)).squeeze().tolist())
                    labels.extend(y_tst.squeeze().tolist())
        outs.append((d,w,n,e,roc_auc_score(labels,scores)))
    return outs
outs3 = local('cpu')
from operator import itemgetter
sorted(outs3,key=itemgetter(-1),reverse=True)

[(4, 16, 'b', 5, 0.6304536139590479),
 (4, 24, 'b', 5, 0.6247887748353774),
 (4, 16, 'b', 4, 0.6229591487088078),
 (4, 24, 'b', 3, 0.6180165181182173),
 (4, 24, 'b', 4, 0.6167185578684254),
 (4, 16, 'b', 3, 0.6072754304725066)]

In [7]:
def local(device):
    import parameterfree
    from itertools import product
    from sklearn.metrics import roc_auc_score
    
    X_all = torch.tensor(df.iloc[:,7:].to_numpy())
    y_all = torch.tensor(((df['experience_id'] != 1) & (df['phase_id'] == 1)).astype(int).to_numpy())[:,None]
    
    X_all = X_all.float()
    y_all = y_all.float()

    outs = []
    for d,w,n,e in product([4,5],[16],['b'],[6,7]):
        scores,labels=[],[]
        for _ in range(3):
            for pid in sorted(set(df.participant_id)):
                X_trn = X_all[df.participant_id!=pid]
                y_trn = y_all[df.participant_id!=pid]
                X_tst = X_all[df.participant_id==pid]
                y_tst = y_all[df.participant_id==pid]
        
                model = ResNet(13,1,depth=d,width=w,norm=n).to(device)
                loss  = torch.nn.BCEWithLogitsLoss()
                opt   = parameterfree.COCOB(model.parameters())
        
                model = train_model(X_trn,y_trn,model,opt,None,loss,24,e,device)
                with torch.no_grad():
                    scores.extend(model(X_tst.to(device)).squeeze().tolist())
                    labels.extend(y_tst.squeeze().tolist())
        outs.append((d,w,n,e,roc_auc_score(labels,scores)))
    return outs
outs3 = local('cpu')
from operator import itemgetter
sorted(outs3,key=itemgetter(-1),reverse=True)

[(4, 16, 'b', 6, 0.6272364954911707),
 (5, 16, 'b', 7, 0.624532804881429),
 (4, 16, 'b', 7, 0.619482446693545),
 (5, 16, 'b', 6, 0.6168667151993521)]

In [522]:
def local(device):
    import parameterfree
    from itertools import product
    from sklearn.metrics import roc_auc_score

    sigmoid = torch.nn.Sigmoid()
    
    X_all = torch.tensor(df.iloc[:,7:].to_numpy())
    y_all = torch.tensor(((df['experience_id'] != 1) & (df['phase_id'] == 1)).astype(int).to_numpy())[:,None]
    
    X_all = X_all.float()
    y_all = y_all.float()

    outs = []
    for d,w,n,e in product([4,5],[16],['b'],[6,7]):
        scores,labels=[],[]
        for _ in range(1):
            for pid in sorted(set(df.participant_id))[:1]:
                X_trn = X_all[df.participant_id!=pid]
                y_trn = y_all[df.participant_id!=pid]
                X_tst = X_all[df.participant_id==pid]
                y_tst = y_all[df.participant_id==pid]
        
                model = ResNet(13,1,depth=d,width=w,norm=n).to(device)
                loss  = torch.nn.BCEWithLogitsLoss()
                opt   = parameterfree.COCOB(model.parameters())
        
                model = train_model(X_trn,y_trn,model,opt,None,loss,24,e,device)
                with torch.no_grad():
                    scores.extend(sigmoid(model(X_tst.to(device))).squeeze().tolist())
                    labels.extend(y_tst.squeeze().tolist())
        outs.append((d,w,n,e,roc_auc_score(labels,scores)))
    return outs
outs3 = local('cpu')
from operator import itemgetter
sorted(outs3,key=itemgetter(-1),reverse=True)

[(5, 16, 'b', 7, 0.7040816326530612),
 (4, 16, 'b', 7, 0.5918367346938775),
 (4, 16, 'b', 6, 0.3137755102040816),
 (5, 16, 'b', 6, 0.24744897959183673)]

In [148]:
import timeit


def func1():
    torch.max(L, torch.abs(grad), out=L)
    sum_negative_gradients.sub_(grad)
    grad_norm_sum.add_(torch.abs(grad))
    reward.addcmul_(grad, data.sub(x0), value=-1)
    torch.maximum(reward, torch.zeros_like(reward), out=reward)
    den = torch.maximum(grad_norm_sum.add(L), L.mul(alpha)).mul(L)
    data.copy_(reward.add(L).mul(sum_negative_gradients).div(den).add(x0))
    print(data)
    
def func2():
    absgrad = torch.abs(grad)
    torch.maximum(L, absgrad, out=L)
    sum_negative_gradients.sub_(grad)
    grad_norm_sum.add_(absgrad)
    reward.sub_(grad*old).clamp_(0)
    den = torch.maximum(grad_norm_sum+L, L*alpha)*L
    new = reward.add(L)*sum_negative_gradients.div(den)
    data.sub_(old).add_(new)
    old.copy_(new)
    print(data)

sum_negative_gradients, grad_norm_sum, L, reward, x0 = state['sum_negative_gradients'],state['grad_norm_sum'],state['L'],state['reward'],state['x0']
data,grad,sum_negative_gradients = torch.clone(p.data),torch.clone(p.grad),torch.clone(sum_negative_gradients)
grad_norm_sum,L,reward,x0 = torch.clone(grad_norm_sum),torch.clone(L),torch.clone(reward),torch.clone(x0)
alpha,Z,O = 100,torch.zeros_like(reward),torch.ones_like(reward)
print(timeit.timeit(func1,number=3))

sum_negative_gradients, grad_norm_sum, L, reward, x0 = state['sum_negative_gradients'],state['grad_norm_sum'],state['L'],state['reward'],state['x0']
data,grad,sum_negative_gradients = torch.clone(p.data),torch.clone(p.grad),torch.clone(sum_negative_gradients)
grad_norm_sum,L,reward,x0 = torch.clone(grad_norm_sum),torch.clone(L),torch.clone(reward),torch.clone(x0)
old = p.data-x0
alpha,Z = 100,torch.zeros_like(reward)
print(timeit.timeit(func2,number=3))

tensor([-0.2075])
tensor([-0.2182])
tensor([-0.2298])
0.0032805311493575573
tensor([-0.2075])
tensor([-0.2182])
tensor([-0.2298])
0.0017711040563881397


In [304]:
%%time
from torch.utils.data import TensorDataset, DataLoader
from itertools import islice

def train_model(Xs, ys, model, opt, sched, loss, batch=8, epoch=1, device='cpu',autotype=None):
    loader = DataLoader(TensorDataset(Xs,ys),batch_size=batch,pin_memory=(device!='cpu'),drop_last=True,shuffle=False)
    for _ in range(epoch):
        for X,y in loader:
            opt.zero_grad()
            X,y = X.to(device),y.to(device)
            if not autotype:
                l = loss(model(X),y)
            else:
                with torch.autocast(device_type=device,dtype=autotype):
                    l = loss(model(X),y)
            l.backward()
            opt.step()
        if sched: sched.step()
    return model.eval()

def local(device):
    import parameterfree
    from itertools import product
    from sklearn.metrics import roc_auc_score 

    sigmoid = torch.nn.Sigmoid()
    
    X_all = torch.tensor(df.iloc[:,7:].to_numpy())
    y_all = torch.tensor(((df['experience_id'] != 1) & (df['phase_id'] == 1)).astype(int).to_numpy())[:,None]

    X_all = X_all.float()
    y_all = y_all.float()

    outs = []
    for d,w,n,e in product([4,5],[16],['b'],[1]):
        scores,labels=[],[]
        for _ in range(1):
            for pid in sorted(set(df.participant_id)):
                
                X_trn = X_all[df.participant_id!=pid]
                y_trn = y_all[df.participant_id!=pid]
                X_tst = X_all[df.participant_id==pid]
                y_tst = y_all[df.participant_id==pid]

                #model = ResNet(13,1,depth=d,width=w,norm=n).to(device)
                model = copy.deepcopy(M).to(device)
                loss  = torch.nn.BCEWithLogitsLoss()

                opt   = parameterfree.COCOB(model.parameters())
                #opt   = COCOB(model.parameters(),i=None)

                model = train_model(X_trn,y_trn,model,opt,None,loss,16,1,device)
                #model = train_model(X_trn,y_trn,model,opt,None,loss,16,e,device,autotype=torch.bfloat16)
                
                with torch.no_grad():
                    scores.extend(sigmoid(model(X_tst.to(device))).squeeze().tolist())
                    labels.extend(y_tst.squeeze().tolist())
        outs.append((d,w,n,e,roc_auc_score(labels,scores)))
    return outs
local('cpu')

CPU times: user 39min 50s, sys: 1.2 s, total: 39min 51s
Wall time: 2min 1s


[(4, 16, 'b', 1, 0.536012562041148), (5, 16, 'b', 1, 0.536012562041148)]

ValueError: Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.

In [105]:
t1 = torch.tensor([1,2])
t2 = torch.tensor([3,4])

import timeit

print(timeit.timeit(lambda:t1.add(t2),number=100000))
print(timeit.timeit(lambda:t1.sub(t2),number=100000))

torch.maximum(t1,t2)
#t1.clamp(min=t2)

0.27971268631517887
0.2534223939292133


tensor([3, 4])

In [None]:
import copy
M = ResNet(13,1,depth=4,width=16,norm='b')
copy.deepcopy(M)


In [19]:
%%time
def local(device):
    import parameterfree
    from itertools import product
    from sklearn.metrics import roc_auc_score 

    sigmoid = torch.nn.Sigmoid()
    
    X_all = torch.tensor(df.iloc[:,7:].to_numpy())
    y_all = torch.tensor(((df['experience_id'] != 1) & (df['phase_id'] == 1)).astype(int).to_numpy())[:,None]
    
    X_all = X_all.float()
    y_all = y_all.float()

    outs = []
    for d,w,n,e in product([4,5],[16],['b'],[5]):
        scores,labels=[],[]
        for _ in range(1):
            for pid in sorted(set(df.participant_id))[:1]:
                X_trn = X_all[df.participant_id!=pid]
                y_trn = y_all[df.participant_id!=pid]
                X_tst = X_all[df.participant_id==pid]
                y_tst = y_all[df.participant_id==pid]
        
                model = ResNet2(13,1,depth=d,width=w,norm=n).to(device)
                loss  = torch.nn.BCELoss()
                opt   = parameterfree.COCOB(model.parameters())
        
                model = train_model(X_trn,y_trn,model,opt,None,loss,24,e,device)
                with torch.no_grad():
                    scores.extend((model(X_tst)).squeeze().tolist())
                    labels.extend(y_tst.squeeze().tolist())
        outs.append((d,w,n,e,roc_auc_score(labels,scores)))
    return outs
outs3 = local('cpu')

CPU times: user 4min, sys: 163 ms, total: 4min
Wall time: 12.7 s


In [39]:
from torch.utils.data import TensorDataset, DataLoader
def train_model(Xs, ys, model, opt, sched, loss, batch=8, epoch=1, device='cpu'):
    loader = DataLoader(TensorDataset(Xs,ys),batch_size=batch,drop_last=True,shuffle=True)
    for _ in range(epoch):
        for X,y in loader:
            #opt.zero_grad()
            loss(model(X),y).backward()
            #opt.step()
        if sched: sched.step()
    return model.eval()