In [1]:
import torch
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import random
import torchvision
from pathlib import Path
from torch.utils.tensorboard import SummaryWriter
from torch.optim import lr_scheduler, swa_utils


In [13]:
class Ageset(Dataset):

    def __init__(self, path, transforms = None, valid=False, split_pct = 0.3):
        self.image_paths = list(Path(path).rglob("*.png"))
        random.seed(42)

        random.shuffle(self.image_paths)
        print(len(self.image_paths))
        split_point = int(len(self)*0.3)
        if valid:
            self.image_paths = self.image_paths[:split_point]
            print(len(self.image_paths))
        else:
            self.image_paths = self.image_paths[split_point:]
            print(len(self.image_paths))
    def __len__(self):
        return len(self.image_paths)

    def imgpath_to_tensor(self, imgpath):
        return transforms.PILToTensor()(Image.open(imgpath)).float()

    def __getitem__(self,i):
        # return self.image_paths[i]
        if isinstance(i, slice):
            return [self[n] for n,_ in enumerate(self.image_paths[i])]
        return (self.imgpath_to_tensor(self.image_paths[i]),
                int(self.image_paths[i].parent.name))

In [3]:
class adaptedRes(nn.Module):
    def __init__(self):
        super(adaptedRes, self).__init__()
        resnet = torchvision.models.resnet18(pretrained=True)
        modules=list(resnet.children())[:-1]
        self.resnet =nn.Sequential(*modules)
        self.fc = nn.Linear(in_features=512, out_features=1, bias=True)

        # for m in self.modules():
        # if isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(self.fc.weight)

    def forward(self,x):
        out = self.resnet(x)
        x = torch.flatten(out, 1)
        return self.fc(x)

In [6]:
def mae_loss(y, pred):
    return (torch.abs(y-pred.T)).mean()
loss_fn = mae_loss

NUM_EPOCH = 7
SWA_START = 3
# LR = 0.0005
BATCH_SIZE = 64
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
SWA_ENABLED = False
SCHED_ENABLED = False

train_set = Ageset("data/face_age")[:64*50]
train_dl = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)

test_set = Ageset("data/face_age", valid=True)[:64*10]
test_dl = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)
dls = {'train': train_dl, 'test': test_dl}

for LR in [0.1,0.01,0.001]:
    writer = SummaryWriter(comment=f'epoch {NUM_EPOCH} SWA_START {SWA_START} LR BATCH_SIZE {LR}')
    model = adaptedRes()
    model.to(DEVICE)
    opt = torch.optim.Adam(model.parameters(), LR)

    if SCHED_ENABLED:
        sched = lr_scheduler.OneCycleLR(opt, max_lr=0.001,steps_per_epoch=len(train_dl), epochs=NUM_EPOCH)

    if SWA_ENABLED:
        swa_model = swa_utils.AveragedModel(model)
        swa_sched = swa_utils.SWALR(opt, swa_lr = 0.0005)
    # train()

In [None]:
def train():

    loss = {'train':[], 'test':[]}
    for epoch in range(NUM_EPOCH):
        print('epoch start')
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            total_loss = 0
            for data in dls[phase]:
                x, y = data[0].to(DEVICE), data[1].to(DEVICE)
                with torch.set_grad_enabled(phase == 'train'):
                    pred = model(x)
                    loss = loss_fn(y, pred)
                    total_loss += loss * len(y)
                    if phase == 'train':
                        loss.backward()
                        opt.step()
                        opt.zero_grad()
                        if SWA_ENABLED and epoch > SWA_START:
                            swa_model.update_parameters(model)
                            swa_sched.step()
                        elif SCHED_ENABLED:
                            sched.step()
                            writer.add_scalar('lr/scheduler', sched.get_last_lr()[0], epoch)
                        writer.add_scalar('lr/optparamgroup0', opt.param_groups[0]['lr'], epoch)
            
                    writer.add_scalar('loss/'+phase', total_loss/len(train_set), epoch)
            
        print(f'loss after epoch {epoch}: {total_loss_train/len(train_set)}, {total_loss_test/len(test_set)}')

    if SWA_ENABLED:
        # swa_utils.update_bn(train_dl, swa_model)
        total_loss_train = 0
        total_loss_test = 0
        with torch.no_grad():
            for data in train_dl:
                    x, y = data[0].to(DEVICE), data[1].to(DEVICE)
                    total_loss_train += loss_fn(y, model(x)) * len(y)
            writer.add_scalar('loss/train', total_loss_train/len(train_set), epoch+1)

            for data in test_dl:
                    x, y = data[0].to(DEVICE), data[1].to(DEVICE)
                    total_loss_test += loss_fn(y, model(x)) * len(y)
            writer.add_scalar('loss/test', total_loss_train/len(test_set), epoch+1)

    writer.flush()
    writer.close()

In [24]:

for i in test_set[:10]:
    preds = i[1],model(i[0][None].to(DEVICE)).item()
    print(f'target {preds[0]}, predicted {preds[1]}')
    loss = abs(preds[0]-preds[1])
    # print('loss',loss)
    running += loss
print(running, running/2)

target 16, predicted -0.47240400314331055


NameError: name 'running' is not defined

In [12]:
!tensorboard.main --logdir='runs'

'tensorboard.main' is not recognized as an internal or external command,
operable program or batch file.


In [21]:
opt.param_groups[0]['lr']

0.0005

In [None]:
#launch tensorboard
python -m tensorboard.main --logdir=path

In [None]:
tensorboard dev upload --logdir runs \
--name "My latest experiment" \ # optional
--description "Simple comparison of several hyperparameters" # optional

In [None]:
class Learner():
    def __init__(self, dls, model, opt, metric, parameters, sched=None, swa_model=None, swa_sched=None):
        self.dls, self.model, self.opt, self.metric, self.p = dls, model, opt, metric, parameters
        self.sched, self.swa_model, self.swa_sched = sched, swa_model, swa_sched
        
    def train(self:)
        for epoch in range(NUM_EPOCH):
                print('epoch start')
                self.model.train()
                total_loss_train = 0
                for data in self.train_dl:
                    x, y = data[0].to(DEVICE), data[1].to(DEVICE)
                    opt.zero_grad()
                    pred = model(x)
                    loss = loss_fn(y, pred)
                    total_loss_train += loss * len(y)
                    loss.backward()
                    opt.step()
                    if epoch > SWA_START:
                        swa_model.update_parameters(model)
                        swa_sched.step()
                    else:
                        sched.step()
                
                model.eval()
                total_loss_test = 0
                with torch.no_grad():
                    for data in test_dl:
                        x, y = data[0].to(DEVICE), data[1].to(DEVICE)
                        total_loss_test += loss_fn(y, model(x)) * len(y)
                register_runs(total_loss_train, total_loss_test, epoch)

    def register_runs(total_loss_train, total_loss_test, epoch): 
        writer.add_scalar('loss/train', total_loss_train/len(train_set), epoch)
        writer.add_scalar('loss/test', total_loss_test/len(test_set), epoch)
        writer.add_scalar('lr/optparamgroup0', opt.param_groups[0]['lr'], epoch)
        writer.add_scalar('lr/scheduler', sched.get_last_lr()[0], epoch)
        print(f'loss after epoch {epoch}: {total_loss_train/len(train_set)}, {total_loss_test/len(test_set)}')

    def validate_swa_model(swa_model):
        # swa_utils.update_bn(train_dl, swa_model)
        total_loss_train = 0
            total_loss_test = 0
            with torch.no_grad():
            for data in train_dl:
                    x, y = data[0].to(DEVICE), data[1].to(DEVICE)
                    total_loss_train += loss_fn(y, model(x)) * len(y)
            writer.add_scalar('loss/train', total_loss_train/len(train_set), epoch+1)

            for data in test_dl:
                    x, y = data[0].to(DEVICE), data[1].to(DEVICE)
                    total_loss_test += loss_fn(y, model(x)) * len(y)
            writer.add_scalar('loss/test', total_loss_train/len(test_set), epoch+1)

            writer.flush()
            writer.close()