In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
#Expects timeseries.py in same folder as nb. 
from fastai.vision import *
from pathlib import Path
import pdb
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from timeseries import TimeSeriesItem, TimeSeriesList, UCRArchive
from scipy.signal import resample

from sklearn.cluster import KMeans
from sklearn.utils.linear_assignment_ import linear_assignment
from scipy.optimize import linear_sum_assignment
import sklearn.metrics

In [None]:
#init UCR archive helper class
ucr = UCRArchive()

#### Step 1: Create dataloaders
To run this notebook, you'll need the archive available here <https://www.cs.ucr.edu/~eamonn/time_series_data_2018/>
The password to the archive is 'someone'. Place the unziped directory in the same location as this nb.

The UCR Time Series Classification Archive contains 128 univariate timeseries data sets, of varying lengths and number of classes. They are presorted into training and validation sets, however the ratio between the two differs wildly from set to set. Below, I combine all the data and make my own validation set, set at 30% of the total data.

In [None]:
#Below are some datasets that seem to have good transfer properties:
#CinCECGTorso, MedicalImages, TwoPatterns, InsectWingbeatSound, UWaveGestureLibraryAll, WordSynonyms, SemgHandGenderCh2
dataset_name = "InsectWingbeatSound"
#Visualize class distribution of initial train and validation sets
vc = ucr.category_distribution(dataset_name)

In [None]:
#All input is resampled down to a length of 96
class Resample(PreProcessor):
    def process_one(self,item):
        return np.concatenate([item[[0]],resample(item[1:],96)]) if len(item) > 97 else item

In [None]:
#Here we create the classification dataset. The fastai data_block api doesn't directly support what I am trying to do, namely
#remove most of the labels AFTER creating the validation set. Thus, a bit of hackery is required. 
#Not sure about the warning, seems to be ok...

src = TimeSeriesList.from_csv_list(ucr.get_csv_files(dataset_name),labelCol=0,processor=Resample())
data = src.split_by_csv_name([f"{dataset_name}_TEST.tsv"])
data = data.label_from_col()
bs = min(64,len(data.x)//50)
data = data.databunch(bs=bs,num_workers=0)
len(data.train_ds) #We end up with a training data set consisting of 10% of the training data as well as the full validation set

In [None]:
#Here we create the autocoder dataset. The differences are the data is labeled with itself, and that we use 
#the full amount of training data

src = TimeSeriesList.from_csv_list(ucr.get_csv_files(dataset_name),labelCol=0,processor=Resample())
dataAE = src.split_by_csv_name([f"{dataset_name}_TEST.tsv"])
dataAE = dataAE.label_from_col()
dataAE = dataAE.databunch(bs=bs,num_workers=0)
len(dataAE.train_ds) #Should be ~10x the classification size

In [None]:
#Show a few inputs from the dataset
data.show_batch()

#### Step 2: Define models

In [None]:
#Helper functions, modified to 1d from fastai
def create_head_1d(nf:int, nc:int, lin_ftrs:Optional[Collection[int]]=None, ps:Floats=0.5, bn_final:bool=False):
    "Model head that takes `nf` features, runs through `lin_ftrs`, and about `nc` classes."
    lin_ftrs = [nf, 512, nc] if lin_ftrs is None else [nf] + lin_ftrs + [nc]
    ps = listify(ps)
    if len(ps)==1: ps = [ps[0]/2] * (len(lin_ftrs)-2) + ps
    actns = [nn.ReLU(inplace=True)] * (len(lin_ftrs)-2) + [None]
    layers = []
    for ni,no,p,actn in zip(lin_ftrs[:-1],lin_ftrs[1:],ps,actns):
        layers += bn_drop_lin(ni,no,True,p,actn)
    if bn_final: layers.append(nn.BatchNorm1d(lin_ftrs[-1], momentum=0.01))
    return nn.Sequential(*layers)

def conv1d(ni:int, nf:int, ks:int=3, stride:int=1, padding:int=None, bias=False, init:LayerFunc=nn.init.kaiming_normal_) -> nn.Conv1d:
    "Create and initialize `nn.Conv1d` layer. `padding` defaults to `ks//2`."
    if padding is None: padding = ks//2
    return init_default(nn.Conv1d(ni, nf, kernel_size=ks, stride=stride, padding=padding, bias=bias), init)

In [None]:
#This is the hero network, which serves as a backbone to all the other models
class HeroConvnet(nn.Module):
    def __init__(self, num_layers=3, start_nf=16, drop_p = 0.0):
        super().__init__()
        
        layers = [nn.Sequential(conv1d(1,start_nf,3,1),nn.LeakyReLU(0.2))] #First layer is stride 1, creates initial set of filters
        nf = start_nf
        for d in np.linspace(drop_p/10,drop_p,num_layers): #Then num_layers stride 2 convs, doubling the number of filters each layer
            layers.append(nn.Sequential(conv1d(nf,nf*2,3,2),nn.LeakyReLU(0.2)))
            nf *= 2
        
        self.nf = nf
        self.layers = nn.ModuleList(layers)
        self.avg = nn.AdaptiveAvgPool1d(1)
            
            
    def forward(self, x):
        actvns = [x]
        
        for l in self.layers:
            actvns.append(l(actvns[-1]))

        return self.avg(actvns[-1]), actvns[1:]

In [None]:
class BasicClassifier(nn.Module):
    def __init__(self,numClasses,num_layers=3,start_nf=16,drop_p=0.0):
        super().__init__()
        self.conv = HeroConvnet(num_layers,start_nf,drop_p=drop_p)
        self.out = create_head_1d(self.conv.nf,numClasses,ps=0.0)
              
    def forward(self,ts):
        ts = self.conv(ts.unsqueeze(1))[0].squeeze(-1)
        return self.out(ts)

In [None]:
#Basic variational autoencoder with hero net serving as encoder and a few linear layers as decoder
class TSAutoencoder(nn.Module):
    def __init__(self,seqLen,latentDim=12,num_layers=3,start_nf=16):
        super().__init__()
        self.conv = HeroConvnet(num_layers,start_nf)
        self.ls = torch.nn.Linear(self.conv.nf,latentDim)
        self.out = create_head_1d(latentDim,seqLen,lin_ftrs=[256,512],ps=0.0)
        
    def getLS(self,ts):
        ts, _ = self.conv(ts.unsqueeze(1))
        ts = ts.squeeze(-1)
        ls = self.ls(ts)
        return ls

    def forward(self,ts):
        seqLen = ts.shape[1]
        ots = ts
        ts, _ = self.conv(ts.unsqueeze(1))
        ts = ts.squeeze(-1)

        ls = self.ls(ts)
               
        return self.out(ls), ots

In [None]:
class Cluster(nn.Module):
    def __init__(self,num_classes,num_layers,start_nf):
        super().__init__()
        self.conv = HeroConvnet(num_layers,start_nf)
        self.ps = nn.Linear(self.conv.nf,num_classes)
        
    def forward(self,ts):
        ts, _ = self.conv(ts.unsqueeze(1))
        return self.ps(ts.squeeze(-1))

In [None]:
class MOA(nn.Module):
    def __init__(self,num_classes,seqLen,latentDim=12,num_layers=3,start_nf=16):
        super().__init__()
        self.aes = nn.ModuleList([TSAutoencoder(seqLen,latentDim,6,4) for _ in range(num_classes)])
        self.cluster = Cluster(num_classes,num_layers,start_nf)
        
    def getCluster(self,ts):
        ps = self.cluster(ts)
        ps = torch.softmax(ps,dim=1)
        return torch.argmax(ps,dim=1)
        
    def forward(self, ts):
        recon = [ae(ts)[0] for ae in self.aes]
        ps = self.cluster(ts)
        return ps, recon, ts

In [None]:
#The sidekick network mirrors the structure of the hero, but concats the output of each layer of the hero to the input of each 
#layer of the sidekick
class SidekickConvnet(nn.Module):
    def __init__(self, num_classes, num_layers=3, start_nf=16, start_nf_hero=16):
        super().__init__()
        
        self.hero = HeroConvnet(num_layers,start_nf_hero)
        
        layers = [nn.Sequential(conv1d(1,start_nf,3,1),nn.LeakyReLU(0.2))] 
        nf = start_nf
        nf_hero = start_nf_hero
        for _ in range(num_layers):
            layers.append(nn.Sequential(conv1d(nf+nf_hero,nf*2,3,2),nn.LeakyReLU(0.2)))
            nf *= 2
            nf_hero *= 2
        
        self.layers = nn.ModuleList(layers)
        #self.ll = conv1d(nf+nf_hero,nf+nf_hero,3,1)
        self.avg = nn.AdaptiveAvgPool1d(1)
        self.out = create_head_1d(nf + nf_hero,num_classes,ps=0.0)
    
    def forward(self,ts):
        ts = ts.unsqueeze(1)
        pt, actvns = self.hero(ts)
        
        x = self.layers[0](ts)
        for l,a in zip(self.layers[1:],actvns):
            x = l(torch.cat([x,a],dim=1))
            
        x = torch.cat([self.avg(x),pt],dim=1).squeeze(-1)
        #x = self.ll(torch.cat([x,actvns[-1]],dim=1))
        #x = self.avg(x).squeeze(-1)
        return self.out(x)

#### Step 3: Get Baseline

In [None]:
learnBase = Learner(data,BasicClassifier(data.train_ds.c,8,8,drop_p=0.0),loss_func=F.cross_entropy,metrics=[accuracy])

In [None]:
# Run one batch through the model to make sure the output size is correct
learnBase.model(next(iter(data.train_dl))[0]).size()

In [None]:
#Learning rate finder
learnBase.lr_find()
learnBase.recorder.plot()

In [None]:
learnBase.fit_one_cycle(20,1e-3,wd=0.2)

#### Step 4: Autoencoder

In [16]:
class ClusterAccuracy(LearnerCallback):
    _order=-20
    def __init__(self,learn):
        super().__init__(learn)
    def on_train_begin(self, **kwargs):
        self.learn.recorder.add_metric_names(["cluster_accuracy"])
        
    def on_epoch_end(self,**kwargs):
        self.learn.model.eval()
        dl = torch.utils.data.DataLoader(self.learn.data.train_ds,batch_size=64,collate_fn=data_collate)
        labels_pred, labels_true = [],[]
        
        with torch.no_grad():
            for ts,labels in iter(dl):
                ts = ts.cuda()
                labels_pred.append(self.learn.model.getCluster(ts).cpu().detach().numpy())
                labels_true.append(labels.numpy())
        labels_pred = np.concatenate(labels_pred)
        labels_true = np.concatenate(labels_true)
        labels_true = labels_true.astype(np.int64)
        assert labels_pred.size == labels_true.size
        
        D = max(labels_pred.max(), labels_true.max()) + 1
        w = np.zeros((D, D), dtype=np.int64)
        for i in range(labels_pred.size):
            w[labels_pred[i], labels_true[i]] += 1
        ind = linear_assignment(w.max() - w)
        #self.metric = torch.tensor(sum([w[i, j] for i, j in ind]) * 1.0 / labels_pred.size)
        self.learn.recorder.add_metrics([torch.tensor(sum([w[i, j] for i, j in ind]) * 1.0 / labels_pred.size)])

In [17]:
def AELoss(pred,target):
    return F.mse_loss(pred[0],pred[1])

In [18]:
class MOALoss(torch.nn.Module):
    def forward(self,p,target):
        p,recon, ts = p
        p = torch.softmax(p,dim=1)
        rerrs = torch.stack([torch.exp(-0.5 * ((ts-r)**2).sum(dim=1)) for r in recon],dim=1)
        berr = (p * rerrs).sum(dim=1)
        berr = torch.log(berr)
        return -berr.sum()

In [19]:
learnAE = Learner(dataAE,MOA(data.train_ds.c,len(data.train_ds[0][0].data),latentDim=12,num_layers=8,start_nf=8),
                  loss_func=MOALoss(),callback_fns=[ClusterAccuracy])

In [None]:
learnAE.model(next(iter(data.train_dl))[0])[0].size()

In [None]:
learnAEInit = Learner(dataAE,TSAutoencoder(len(data.train_ds[0][0].data),num_layers=8,start_nf=8),loss_func=AELoss)

In [None]:
learnAEInit.fit_one_cycle(20,1e-2)

In [None]:
learnAEInit.model.eval()
out = []
dl = torch.utils.data.DataLoader(dataAE.train_ds,batch_size=64,collate_fn=data_collate)
with torch.no_grad():
    for x,y in iter(dl):
        ls = learnAEInit.model.getLS(x.cuda())
        out.append(ls)
out = torch.cat(out).cpu().detach().numpy()

kmeans = KMeans(n_clusters=data.train_ds.c,n_init=20)
kmeans.fit_predict(out)

def assignClusters(x):
    x = resample(x[1:],96)
    ls = learnAEInit.model.getLS(torch.tensor(x,dtype=torch.float).unsqueeze(0).cuda())
    pred =  kmeans.predict(ls.cpu().detach().numpy())[0]
    return pred

In [None]:
src = TimeSeriesList.from_csv_list(ucr.get_csv_files(dataset_name),labelCol=0,processor=Resample())
dataAEInit = src.filter_out_idx(valIdxs)
dataAEInit = dataAEInit.no_split()
dataAEInit = dataAEInit.label_from_func(assignClusters)
dataAEInit = dataAEInit.databunch(bs=bs,num_workers=0)

In [None]:
plt.hist(dataAEInit.train_ds.y.items)

In [20]:
learnClusterInit = Learner(data,learnAE.model.cluster,loss_func=F.cross_entropy,metrics=accuracy)

In [21]:
learnClusterInit.fit_one_cycle(20,1e-3)

epoch,train_loss,valid_loss,accuracy
1,1.877053,1.377259,0.481818
2,1.388547,1.470252,0.522222
3,1.218181,1.601922,0.502525
4,1.315376,1.415524,0.451010
5,1.258670,1.499835,0.481313
6,1.225418,1.446105,0.486869
7,1.147927,1.547378,0.491919
8,1.126443,1.365668,0.559596
9,0.946688,1.723986,0.519697
10,0.787562,1.612592,0.548485
11,0.736719,2.054502,0.508081
12,0.646941,1.894672,0.581818
13,0.505790,2.188583,0.534343
14,0.314599,2.206996,0.543434
15,0.202570,2.247856,0.576768
16,0.105510,2.412492,0.583333
17,0.049539,2.560115,0.572727
18,0.029894,2.576289,0.576263
19,0.016012,2.599968,0.578788
20,0.011139,2.605683,0.578788


In [None]:
for c,m in enumerate(learnAE.model.aes):
    src = TimeSeriesList.from_csv_list(ucr.get_csv_files(dataset_name),labelCol=0,processor=Resample())
    dataExpert = src.filter_out_idx(valIdxs)
    dataExpert = dataExpert.no_split()
    dataExpert.train.filter_by_func(lambda x: assignClusters(x) == c)
    dataExpert = dataExpert.label_from_self()
    dataExpert = dataExpert.databunch(bs=4,num_workers=0)
    
    learnExpert = Learner(dataExpert,m,loss_func=AELoss)
    learnExpert.fit_one_cycle(5,1e-2)

In [22]:
for c,m in enumerate(learnAE.model.aes):
    d = data.train_ds.x.items.copy()
    src = TimeSeriesList.from_numpy(d,labeled=0)
    src = src.filter_by_func(lambda x: int(x[0]) - 1 == c)
    dataExpert = src.no_split()
    dataExpert = dataExpert.label_from_self()
    dataExpert = dataExpert.databunch(bs=4,num_workers=0)
    
    learnExpert = Learner(dataExpert,m,loss_func=AELoss)
    learnExpert.fit_one_cycle(10,1e-2)

epoch,train_loss,valid_loss
1,1.060426,Unnamed: 2_level_1
2,0.985367,Unnamed: 2_level_2
3,0.965654,Unnamed: 2_level_3
4,0.898124,Unnamed: 2_level_4
5,0.802084,Unnamed: 2_level_5
6,0.716432,Unnamed: 2_level_6
7,0.635550,Unnamed: 2_level_7
8,0.597151,Unnamed: 2_level_8
9,0.547002,Unnamed: 2_level_9
10,0.503328,Unnamed: 2_level_10


epoch,train_loss,valid_loss
1,1.099081,Unnamed: 2_level_1
2,1.032355,Unnamed: 2_level_2
3,0.951976,Unnamed: 2_level_3
4,0.893443,Unnamed: 2_level_4
5,0.830667,Unnamed: 2_level_5
6,0.761764,Unnamed: 2_level_6
7,0.695304,Unnamed: 2_level_7
8,0.643739,Unnamed: 2_level_8
9,0.590973,Unnamed: 2_level_9
10,0.552113,Unnamed: 2_level_10


epoch,train_loss,valid_loss
1,1.077224,Unnamed: 2_level_1
2,1.008290,Unnamed: 2_level_2
3,0.955195,Unnamed: 2_level_3
4,0.861006,Unnamed: 2_level_4
5,0.757190,Unnamed: 2_level_5
6,0.676751,Unnamed: 2_level_6
7,0.598056,Unnamed: 2_level_7
8,0.535293,Unnamed: 2_level_8
9,0.483133,Unnamed: 2_level_9
10,0.441026,Unnamed: 2_level_10


epoch,train_loss,valid_loss
1,1.056646,Unnamed: 2_level_1
2,0.994214,Unnamed: 2_level_2
3,0.927890,Unnamed: 2_level_3
4,0.809273,Unnamed: 2_level_4
5,0.723959,Unnamed: 2_level_5
6,0.644767,Unnamed: 2_level_6
7,0.597317,Unnamed: 2_level_7
8,0.532441,Unnamed: 2_level_8
9,0.483826,Unnamed: 2_level_9
10,0.454376,Unnamed: 2_level_10


epoch,train_loss,valid_loss
1,1.067476,Unnamed: 2_level_1
2,1.002641,Unnamed: 2_level_2
3,0.987068,Unnamed: 2_level_3
4,0.897574,Unnamed: 2_level_4
5,0.811551,Unnamed: 2_level_5
6,0.744630,Unnamed: 2_level_6
7,0.671986,Unnamed: 2_level_7
8,0.609906,Unnamed: 2_level_8
9,0.562215,Unnamed: 2_level_9
10,0.532331,Unnamed: 2_level_10


epoch,train_loss,valid_loss
1,1.031040,Unnamed: 2_level_1
2,0.941828,Unnamed: 2_level_2
3,0.910953,Unnamed: 2_level_3
4,0.869127,Unnamed: 2_level_4
5,0.778186,Unnamed: 2_level_5
6,0.727049,Unnamed: 2_level_6
7,0.657575,Unnamed: 2_level_7
8,0.602799,Unnamed: 2_level_8
9,0.558779,Unnamed: 2_level_9
10,0.528167,Unnamed: 2_level_10


epoch,train_loss,valid_loss
1,1.081624,Unnamed: 2_level_1
2,1.030462,Unnamed: 2_level_2
3,0.953761,Unnamed: 2_level_3
4,0.867896,Unnamed: 2_level_4
5,0.775339,Unnamed: 2_level_5
6,0.710761,Unnamed: 2_level_6
7,0.633717,Unnamed: 2_level_7
8,0.572890,Unnamed: 2_level_8
9,0.528989,Unnamed: 2_level_9
10,0.492527,Unnamed: 2_level_10


epoch,train_loss,valid_loss
1,1.048855,Unnamed: 2_level_1
2,0.999273,Unnamed: 2_level_2
3,0.907800,Unnamed: 2_level_3
4,0.824950,Unnamed: 2_level_4
5,0.746260,Unnamed: 2_level_5
6,0.680167,Unnamed: 2_level_6
7,0.642973,Unnamed: 2_level_7
8,0.598781,Unnamed: 2_level_8
9,0.548385,Unnamed: 2_level_9
10,0.506800,Unnamed: 2_level_10


epoch,train_loss,valid_loss
1,1.063784,Unnamed: 2_level_1
2,0.957814,Unnamed: 2_level_2
3,0.893077,Unnamed: 2_level_3
4,0.836349,Unnamed: 2_level_4
5,0.746387,Unnamed: 2_level_5
6,0.693338,Unnamed: 2_level_6
7,0.648483,Unnamed: 2_level_7
8,0.593870,Unnamed: 2_level_8
9,0.554925,Unnamed: 2_level_9
10,0.515399,Unnamed: 2_level_10


epoch,train_loss,valid_loss
1,1.094623,Unnamed: 2_level_1
2,1.038693,Unnamed: 2_level_2
3,0.977180,Unnamed: 2_level_3
4,0.878901,Unnamed: 2_level_4
5,0.819255,Unnamed: 2_level_5
6,0.744536,Unnamed: 2_level_6
7,0.688277,Unnamed: 2_level_7
8,0.631247,Unnamed: 2_level_8
9,0.579034,Unnamed: 2_level_9
10,0.530782,Unnamed: 2_level_10


epoch,train_loss,valid_loss
1,1.070312,Unnamed: 2_level_1
2,1.034127,Unnamed: 2_level_2
3,0.974269,Unnamed: 2_level_3
4,0.835119,Unnamed: 2_level_4
5,0.723476,Unnamed: 2_level_5
6,0.634145,Unnamed: 2_level_6
7,0.564484,Unnamed: 2_level_7
8,0.500232,Unnamed: 2_level_8
9,0.447612,Unnamed: 2_level_9
10,0.402224,Unnamed: 2_level_10


In [24]:
torch.save(learnAE.model.state_dict(),"init.pth")

In [27]:
learnAE.model.load_state_dict(torch.load("init.pth"))

In [None]:
learnAE.lr_find()
learnAE.recorder.plot()

In [None]:
learnAE = Learner(dataAE,learnAE.model,loss_func=MOALoss(),callback_fns=[ClusterAccuracy])

In [28]:
learnAE.fit_one_cycle(1,1e-4)

epoch,train_loss,valid_loss,cluster_accuracy
1,51.454105,60.328915,0.990909


In [None]:
x,y = next(iter(dataAE.train_dl))
learnAE.model.eval()
ps,recon,ts = learnAE.model(x)
ps = torch.softmax(ps,dim=1)
ps = torch.argmax(ps,dim=1)
fig, axes = plt.subplots(3,3,figsize=(16,14))
for i,ax in enumerate(axes.flatten()):
    pred = ps[i].item()
    ax.plot(x[i])
    ax.plot(recon[pred][i])

In [None]:
#Optional: Visualize latent space
learnAE.model.eval()
out = []
classes = []
with torch.no_grad():
    for x,y in iter(data.valid_dl):
        ls = learnAE.model.mean(learnAE.model.conv(x.unsqueeze(1))[0].squeeze(-1))
        out.append(ls)
        classes.append(y)
out = torch.cat(out).cpu().detach().numpy()
classes = torch.cat(classes).cpu().detach().numpy()

In [None]:
pca = PCA(n_components=2)
f = pca.fit(out.T)
f = f.components_.T
plt.subplots(figsize=(20,15))
style = classes if data.train_ds.c < 8 else None
sns.scatterplot(x=f[:,0],y=f[:,1],hue=classes,palette="Dark2",style=style,legend="full",s=50)

In [None]:
tsne = TSNE(n_components=2)
f = tsne.fit_transform(out)
plt.subplots(figsize=(20,15))
sns.scatterplot(x=f[:,0],y=f[:,1],hue=classes,palette="Dark2",style=style,legend="full",s=50)

#### Step 5: Traditional transfer
Here we train a classifier by just adding a classifier head onto the encoder from the previous step

In [None]:
learnDT = Learner(data,BasicClassifier(data.train_ds.c,8,8),loss_func=F.cross_entropy,metrics=[accuracy],
                 callback_fns=BnFreeze,bn_wd=False,train_bn=False)
learnDT.split([*learnDT.model.conv.layers,learnDT.model.conv.avg,learnDT.model.out])

In [None]:
#Load the parameters from the encoder
learnDT.model.conv.load_state_dict(learnAE.model.cluster.conv.state_dict())

In [None]:
#Freeze up to the classifier layer
learnDT.freeze_to(-1)

In [None]:
learnDT.fit_one_cycle(20,1e-3)

In [None]:
#Fine tune
learnDT.unfreeze()
learnDT.fit_one_cycle(100,1e-4)

#### Step 6: Sidekick network

In [29]:
learnSidekick = Learner(data,SidekickConvnet(data.train_ds.c,8,8,8), loss_func=F.cross_entropy,metrics=[accuracy],
                        callback_fns=BnFreeze,bn_wd=False,train_bn=False)
learnSidekick.split([learnSidekick.model.hero,learnSidekick.model.layers[0],learnSidekick.model.out])

In [30]:
learnSidekick.model(next(iter(data.train_dl))[0]).size()

torch.Size([4, 11])

In [31]:
learnSidekick.model.hero.load_state_dict(learnAE.model.cluster.conv.state_dict())
learnSidekick.freeze_to(1)

In [None]:
learnSidekick.lr_find()
learnSidekick.recorder.plot()

In [32]:
learnSidekick.fit_one_cycle(20,1e-3,wd=0.2)

epoch,train_loss,valid_loss,accuracy


RuntimeError: CUDA out of memory. Tried to allocate 48.00 MiB (GPU 0; 8.00 GiB total capacity; 570.49 MiB already allocated; 30.06 MiB free; 9.01 MiB cached)