In [1]:
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import  DataLoader
from mydataset import MyDataset
import torch
from myunet import Unet
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

In [4]:
batch_size = 100
device = "cuda" if torch.cuda.is_available() else "cpu"
writer = SummaryWriter()

In [5]:
train_set = MyDataset(root="data", is_train=True, transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
]))

val_set = MyDataset(root="data", is_train=False, transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
]))

train_loader = DataLoader(
    train_set,
    batch_size=batch_size,
    shuffle=True,
    num_workers=0
)

val_loader = DataLoader(
    val_set,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0
)
len(train_set), len(val_set), len(train_loader), len(val_loader)

(3800, 200, 38, 2)

In [6]:
model = Unet(6, 2)
model.cuda()

criterion = nn.CrossEntropyLoss()
optim = torch.optim.AdamW(model.parameters(), lr=0.005, weight_decay=1e-9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optim, mode='max', factor=0.5, patience=2,
    verbose=True)

In [7]:
def train_model(epoch):
    model.train()
    print(f"Epoch {epoch} Training")
    running_loss = 0.0
    step = len(train_loader) // 50 * epoch
    with tqdm(train_loader, desc=str(epoch)) as it:
        for idx, (img1, img2, mask) in enumerate(it, 0):
            img1, img2, mask = img1.cuda(), img2.cuda(), mask.cuda()
            mask = mask.long()
            optim.zero_grad()
            outputs = model(img1, img2)
            loss = criterion(outputs, mask)
            loss.backward()
            optim.step()
            _, pred = torch.max(outputs.data, 1)
            p, r, f1, iou = get_index(pred, mask)
            it.set_postfix_str(f"loss: {loss.item(): .4f} p: {p: .4f}  r: {r: .4f}  f1: {f1: .4f}  iou: {iou: .4f}")
            running_loss += loss.item()
            if idx % 50 == 49:
                running_loss /= 50
                writer.add_scalars("loss", {"": running_loss}, global_step=step)
                step += 1
                running_loss = 0.0

In [8]:
def get_index(pred, label):
    eps = 1e-7
    tp = torch.sum(label * pred)
    fp = torch.sum(pred) - tp
    fn = torch.sum(label) - tp

    p = (tp + eps) / (tp + fp + eps)
    r = (tp + eps) / (tp + fn + eps)
    f1 = (2 * p * r + eps) / (p + r + eps)
    iou = (tp + eps) / (tp + fn + fp + eps)
    return p, r, f1, iou


def test_model(epoch):
    model.eval()
    global max_score
    f1s = 0
    print(f"Epoch {epoch} Testing")
    with torch.no_grad():
        with tqdm(val_loader, desc=str(epoch)) as it:
            for img1, img2, mask in it:
                img1, img2, mask = img1.cuda(), img2.cuda(), mask.cuda()
                outputs = model(img1, img2)
                _, pred = torch.max(outputs.data, 1)
                p, r, f1, iou = get_index(pred, mask)
                f1s += f1
                it.set_postfix_str(f"p: {p: .4f}  r: {r: .4f}  f1: {f1: .4f}  iou: {iou: .4f}")
    f1s /= len(val_loader)
    scheduler.step(f1s)
    print("f1", f1s.item())
    if max_score < f1s:
        max_score = f1s
        print('max_score', max_score.item())
        torch.save(model, './best_model.pth')
        print('Model saved!')

In [9]:
num_epoch = 10
max_score = 0
for epoch in range(0, num_epoch):
    train_model(epoch=epoch)
    test_model(epoch=epoch)
writer.close()
print("completed!")
print('max_score', max_score)

Epoch 0 Training


0: 100%|██████████| 38/38 [00:33<00:00,  1.12it/s, loss:  0.5041 p:  0.6099  r:  0.2366  f1:  0.3409  iou:  0.2055]


Epoch 0 Testing


0: 100%|██████████| 2/2 [00:01<00:00,  1.59it/s, p:  0.4643  r:  0.5464  f1:  0.5020  iou:  0.3351]


f1 0.5136547157642134
max_score 0.5136547157642134
Model saved!
Epoch 1 Training


1: 100%|██████████| 38/38 [00:33<00:00,  1.13it/s, loss:  0.4660 p:  0.6287  r:  0.3874  f1:  0.4794  iou:  0.3153]


Epoch 1 Testing


1: 100%|██████████| 2/2 [00:01<00:00,  1.59it/s, p:  0.4935  r:  0.7994  f1:  0.6103  iou:  0.4391]


f1 0.6258139048167637
max_score 0.6258139048167637
Model saved!
Epoch 2 Training


2: 100%|██████████| 38/38 [00:33<00:00,  1.13it/s, loss:  0.4525 p:  0.7470  r:  0.5310  f1:  0.6207  iou:  0.4501]


Epoch 2 Testing


2: 100%|██████████| 2/2 [00:01<00:00,  1.61it/s, p:  0.6129  r:  0.5608  f1:  0.5857  iou:  0.4141]


f1 0.6081200188552918
Epoch 3 Training


3: 100%|██████████| 38/38 [00:33<00:00,  1.13it/s, loss:  0.4874 p:  0.6525  r:  0.4059  f1:  0.5005  iou:  0.3338]


Epoch 3 Testing


3: 100%|██████████| 2/2 [00:01<00:00,  1.59it/s, p:  0.7295  r:  0.4245  f1:  0.5367  iou:  0.3668]


f1 0.5652822592411064
Epoch 4 Training


4: 100%|██████████| 38/38 [00:33<00:00,  1.13it/s, loss:  0.4666 p:  0.6432  r:  0.5253  f1:  0.5783  iou:  0.4067]


Epoch 4 Testing


4: 100%|██████████| 2/2 [00:01<00:00,  1.60it/s, p:  0.7460  r:  0.3748  f1:  0.4989  iou:  0.3324]


Epoch     5: reducing learning rate of group 0 to 2.5000e-03.
f1 0.532017633241686
Epoch 5 Training


5: 100%|██████████| 38/38 [00:33<00:00,  1.12it/s, loss:  0.4423 p:  0.6570  r:  0.5532  f1:  0.6006  iou:  0.4292]


Epoch 5 Testing


5: 100%|██████████| 2/2 [00:01<00:00,  1.58it/s, p:  0.6929  r:  0.6043  f1:  0.6456  iou:  0.4766]


f1 0.6644767759607764
max_score 0.6644767759607764
Model saved!
Epoch 6 Training


6: 100%|██████████| 38/38 [00:33<00:00,  1.13it/s, loss:  0.4532 p:  0.6642  r:  0.4674  f1:  0.5487  iou:  0.3780]


Epoch 6 Testing


6: 100%|██████████| 2/2 [00:01<00:00,  1.56it/s, p:  0.7452  r:  0.4826  f1:  0.5858  iou:  0.4142]


f1 0.6035093611980233
Epoch 7 Training


7: 100%|██████████| 38/38 [00:33<00:00,  1.12it/s, loss:  0.4748 p:  0.6230  r:  0.4570  f1:  0.5273  iou:  0.3580]


Epoch 7 Testing


7: 100%|██████████| 2/2 [00:01<00:00,  1.57it/s, p:  0.7687  r:  0.4206  f1:  0.5437  iou:  0.3733]


f1 0.5712399486705011
Epoch 8 Training


8: 100%|██████████| 38/38 [00:33<00:00,  1.12it/s, loss:  0.4587 p:  0.6521  r:  0.4700  f1:  0.5463  iou:  0.3758]


Epoch 8 Testing


8: 100%|██████████| 2/2 [00:01<00:00,  1.59it/s, p:  0.7600  r:  0.4437  f1:  0.5603  iou:  0.3892]


Epoch     9: reducing learning rate of group 0 to 1.2500e-03.
f1 0.5853991597429862
Epoch 9 Training


9: 100%|██████████| 38/38 [00:33<00:00,  1.13it/s, loss:  0.4908 p:  0.7091  r:  0.4578  f1:  0.5564  iou:  0.3854]


Epoch 9 Testing


9: 100%|██████████| 2/2 [00:01<00:00,  1.60it/s, p:  0.7544  r:  0.4098  f1:  0.5311  iou:  0.3615]

f1 0.5731750871974679
completed!
max_score tensor(0.6645, device='cuda:0', dtype=torch.float64)



