In [1]:
import torch
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import random
import torchvision
from pathlib import Path
from torch.utils.tensorboard import SummaryWriter
from torch.optim import lr_scheduler, swa_utils
from torchvision.datasets import ImageFolder, DatasetFolder
import time
# from joblib import Memory 

In [4]:
from diskcache import Cache
cache = Cache('.cache/diskcache')




class Ageset(Dataset):

    def __init__(self, path, transforms = None, valid=False, split_pct = 0.3):
        self.image_paths = list(Path(path).rglob("*.png"))
        random.seed(42)
        random.shuffle(self.image_paths)
        split_point = int(len(self)*0.3)
        if valid:
            self.image_paths = self.image_paths[:split_point]
            print('len validation dataset', len(self.image_paths))
        else:
            self.image_paths = self.image_paths[split_point:]
            print('len train dataset', len(self.image_paths))
    def __len__(self):
        return len(self.image_paths)

    def imgpath_to_normalized_tensor(self, imgpath):
        # makes a tensor, scales range to 0-1 and normalizes to same as imagenet
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
        return normalize(transforms.PILToTensor()(Image.open(imgpath)).float()/255)
    
    @cache.memoize(typed=True)
    def __getitem__(self,i):
        print(i)
        # return self.image_paths[i]

        if isinstance(i, slice):
            return [self[n] for n,_ in enumerate(self.image_paths[i])]
        
        return (self.imgpath_to_normalized_tensor(self.image_paths[i]),
                int(self.image_paths[i].parent.name))

In [5]:
a = Ageset("data/face_age")

len train dataset 13690


In [23]:
a[40]

40


(tensor([[[ 2.1975,  2.2147,  2.2318,  ..., -0.0972, -0.1143, -0.0972],
          [ 2.1975,  2.2147,  2.2318,  ..., -0.1314, -0.1657, -0.1486],
          [ 2.1975,  2.1975,  2.2147,  ..., -0.1657, -0.2171, -0.1999],
          ...,
          [-1.4843, -1.3987, -1.3987,  ..., -1.6384, -1.6042, -1.6042],
          [-1.4329, -1.2959, -1.2959,  ..., -1.6727, -1.7925, -1.8782],
          [-1.4329, -1.2788, -1.2445,  ..., -1.6727, -1.7754, -1.8439]],
 
         [[ 2.1134,  2.1134,  2.1310,  ..., -1.0728, -1.0728, -1.0028],
          [ 2.1134,  2.1134,  2.0959,  ..., -1.1078, -1.1253, -1.0553],
          [ 2.0784,  2.0959,  2.0784,  ..., -1.1429, -1.1779, -1.1078],
          ...,
          [-1.3004, -1.2129, -1.2129,  ..., -1.4230, -1.3880, -1.3880],
          [-1.2479, -1.1078, -1.1078,  ..., -1.4230, -1.5455, -1.6331],
          [-1.2479, -1.0903, -1.0553,  ..., -1.4230, -1.5280, -1.5980]],
 
         [[ 1.9603,  1.9254,  1.9428,  ..., -1.1073, -1.0724, -1.0201],
          [ 1.9603,  1.9254,

In [None]:
b = Ageset("data/face_age", valid=True)[:]

In [38]:
class adaptedRes(nn.Module):
    def __init__(self):
        super(adaptedRes, self).__init__()
        resnet = torchvision.models.resnet18(pretrained=True)
        modules=list(resnet.children())[:-1]
        self.resnet =nn.Sequential(*modules)
        self.fc = nn.Linear(in_features=512, out_features=1, bias=True)

        # for m in self.modules():
        # if isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(self.fc.weight)

    def forward(self,x):
        out = self.resnet(x)
        x = torch.flatten(out, 1)
        return self.fc(x)

In [43]:
def train():
    loss = {'train':[], 'val':[]}
    for epoch in range(NUM_EPOCH):
        print(f'Starting epoch {epoch}')
        start_time = time.time()
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            total_loss = 0
            for data in dls[phase]:
                x, y = data[0].to(DEVICE), data[1].to(DEVICE)
                with torch.set_grad_enabled(phase == 'train'):
                    pred = model(x)
                    loss = loss_fn(y, pred)
                    total_loss += loss * len(y)
                    if phase == 'train':
                        loss.backward()
                        opt.step()
                        opt.zero_grad()
                        if SWA_ENABLED and epoch > SWA_START:
                            swa_model.update_parameters(model)
                            swa_sched.step()
                        elif SCHED_ENABLED:
                            sched.step(loss)
                            writer.add_scalar('lr/scheduler', sched.get_last_lr()[0], epoch)
                        writer.add_scalar('lr/optparamgroup0', opt.param_groups[0]['lr'], epoch)
                writer.add_scalar('batchloss/'+phase, loss, epoch)
            
            writer.add_scalar('loss/'+phase, total_loss/len(dls[phase].dataset), epoch)
            
        print(f"loss after epoch {epoch} : {total_loss / len(dls['val'].dataset)}")
        writer.add_scalar('time', (time.time()-start_time)/60, epoch)


    if SWA_ENABLED:
        # swa_utils.update_bn(train_dl, swa_model)
        total_loss_train = 0
        total_loss_val = 0
        with torch.no_grad():
            for data in train_dl:
                    x, y = data[0].to(DEVICE), data[1].to(DEVICE)
                    total_loss_train += loss_fn(y, model(x)) * len(y)
            writer.add_scalar('loss/train', total_loss_train/len(train_set), epoch+1)

            for data in val_dl:
                    x, y = data[0].to(DEVICE), data[1].to(DEVICE)
                    total_loss_val += loss_fn(y, model(x)) * len(y)
            writer.add_scalar('loss/val', total_loss_train/len(val_set), epoch+1)

    writer.flush()
    writer.close()

In [50]:
def mae_loss(y, pred):
    return (torch.abs(y-pred.T)).mean()
loss_fn = mae_loss

NUM_EPOCH = 20
SWA_START = 20
LR = 0.001
BATCH_SIZE = 64
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
SWA_ENABLED = False
SCHED_ENABLED = True

train_set = Ageset("data/face_age")[:4000]
train_dl = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)

val_set = Ageset("data/face_age", valid=True)[:1000]
val_dl = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=True)
dls = {'train': train_dl, 'val': val_dl}



for i in range(2,3):

    runs = {0:'ReduceLROnPlateau', 1:'OneCycleLR', 2:'MultiplicativeLR'}

    writer = SummaryWriter(comment=f'{runs[i]} epoch {NUM_EPOCH} SWA_START {SWA_START} LR BATCH_SIZE {LR}')
    model = adaptedRes()
    model.to(DEVICE)
    opt = torch.optim.Adam(model.parameters(), LR)
    if SCHED_ENABLED:
        if i ==0:
            sched = torch.optim.lr_scheduler.ReduceLROnPlateau(opt)
            sched.get_last_lr = lambda: [1]
        if i == 1:
            sched = torch.optim.lr_scheduler.OneCycleLR(opt, LR, steps_per_epoch=len(train_dl), epochs=NUM_EPOCH)
        if i == 2:
            sched = torch.optim.lr_scheduler.MultiplicativeLR(opt, lr_lambda=lambda x: 1)
    if SWA_ENABLED:
        swa_model = swa_utils.AveragedModel(model)
        swa_sched = swa_utils.SWALR(opt, swa_lr = 0.0005)
    train()

len train dataset 13690
len validation dataset 5866
Starting epoch 0
loss after epoch 0 : 8.340116500854492
Starting epoch 1
loss after epoch 1 : 9.198281288146973
Starting epoch 2
loss after epoch 2 : 8.051300048828125
Starting epoch 3
loss after epoch 3 : 5.771472454071045
Starting epoch 4
loss after epoch 4 : 5.379964351654053
Starting epoch 5
loss after epoch 5 : 6.048651218414307
Starting epoch 6
loss after epoch 6 : 5.4881391525268555
Starting epoch 7
loss after epoch 7 : 4.725648880004883
Starting epoch 8
loss after epoch 8 : 5.262847423553467
Starting epoch 9
loss after epoch 9 : 4.763493061065674
Starting epoch 10
loss after epoch 10 : 4.864866733551025
Starting epoch 11
loss after epoch 11 : 4.70595121383667
Starting epoch 12
loss after epoch 12 : 4.754103660583496
Starting epoch 13
loss after epoch 13 : 5.09583854675293
Starting epoch 14
loss after epoch 14 : 4.579208850860596
Starting epoch 15
loss after epoch 15 : 4.413903713226318
Starting epoch 16
loss after epoch 16 : 5

In [46]:
sched.

AttributeError: 'ReduceLROnPlateau' object has no attribute 'lr'

In [24]:

for i in test_set[:10]:
    preds = i[1],model(i[0][None].to(DEVICE)).item()
    print(f'target {preds[0]}, predicted {preds[1]}')
    loss = abs(preds[0]-preds[1])
    # print('loss',loss)
    running += loss
print(running, running/2)

target 16, predicted -0.47240400314331055


NameError: name 'running' is not defined

In [12]:
!tensorboard.main --logdir='runs' ip

'tensorboard.main' is not recognized as an internal or external command,
operable program or batch file.


In [11]:
a=time.time()

In [13]:
(a-time.time())/60

-0.2206122080485026

In [None]:
#launch tensorboard
python -m tensorboard.main --logdir=runs --host=0.0.0.0 --port=6006

In [None]:
tensorboard dev upload --logdir runs \
--name "My latest experiment" \ # optional
--description "Simple comparison of several hyperparameters" # optional

In [42]:
class Learner():
    def __init__(self, dls, model, opt, metric, parameters, sched=None, swa_model=None, swa_sched=None):
        self.dls, self.model, self.opt, self.metric, self.p = dls, model, opt, metric, parameters
        self.sched, self.swa_model, self.swa_sched = sched, swa_model, swa_sched
        
    def train(self:)
        for epoch in range(NUM_EPOCH):
                print('epoch start')
                self.model.train()
                total_loss_train = 0
                for data in self.train_dl:
                    x, y = data[0].to(DEVICE), data[1].to(DEVICE)
                    opt.zero_grad()
                    pred = model(x)
                    loss = loss_fn(y, pred)
                    total_loss_train += loss * len(y)
                    loss.backward()
                    opt.step()
                    if epoch > SWA_START:
                        swa_model.update_parameters(model)
                        swa_sched.step(loss)
                    else:
                        sched.step()
                
                model.eval()
                total_loss_test = 0
                with torch.no_grad():
                    for data in test_dl:
                        x, y = data[0].to(DEVICE), data[1].to(DEVICE)
                        total_loss_test += loss_fn(y, model(x)) * len(y)
                register_runs(total_loss_train, total_loss_test, epoch)

    def register_runs(total_loss_train, total_loss_test, epoch): 
        writer.add_scalar('loss/train', total_loss_train/len(train_set), epoch)
        writer.add_scalar('loss/test', total_loss_test/len(test_set), epoch)
        writer.add_scalar('lr/optparamgroup0', opt.param_groups[0]['lr'], epoch)
        writer.add_scalar('lr/scheduler', sched.get_last_lr()[0], epoch)
        print(f'loss after epoch {epoch}: {total_loss_train/len(train_set)}, {total_loss_test/len(test_set)}')

    def validate_swa_model(swa_model):
        # swa_utils.update_bn(train_dl, swa_model)
        total_loss_train = 0
            total_loss_test = 0
            with torch.no_grad():
            for data in train_dl:
                    x, y = data[0].to(DEVICE), data[1].to(DEVICE)
                    total_loss_train += loss_fn(y, model(x)) * len(y)
            writer.add_scalar('loss/train', total_loss_train/len(train_set), epoch+1)

            for data in test_dl:
                    x, y = data[0].to(DEVICE), data[1].to(DEVICE)
                    total_loss_test += loss_fn(y, model(x)) * len(y)
            writer.add_scalar('loss/test', total_loss_train/len(test_set), epoch+1)

            writer.flush()
            writer.close()

SyntaxError: invalid syntax (<ipython-input-42-fcd86dbf9b07>, line 6)