In [1]:
import configparser
import logging
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import MultiStepLR
from pathlib import Path
from model import DnCNN
import data_generator as dg
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import random
from timeit import default_timer as timer
import cv2
from skimage.metrics import peak_signal_noise_ratio
from skimage.metrics import structural_similarity
import glob
from noise_model import poissonpoissonnoise as nm
import datetime
from vgg import Vgg16
from torchvision import transforms




In [2]:
STYLE_WEIGHT = 1e5
CONTENT_WEIGHT = 1e0

# manualSeed = 999
# # manualSeed = random.randint(1, 10000) # use if you want new results
# print("Random Seed: ", manualSeed)
# random.seed(manualSeed)
# np.random.seed(manualSeed)
# torch.manual_seed(manualSeed)
def gram(x):
    (bs, ch, h, w) = x.size()
    f = x.view(bs, ch, w*h)
    f_T = f.transpose(1, 2)
    G = f.bmm(f_T) / (ch * h * w)
    return G

def _tensor_transform():
    return transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

def save_model(net: nn.Module, model_save_dir, step, dose_total):
    """
    Save the trained model.

    Args:
        net: trained model.
        model_save_dir: saved model directory.
        step: checkpoint.
    """
    model_save_dir = Path(model_save_dir) / "dose{}".format(str(int(dose_total)))
    if not Path(model_save_dir).exists():
        Path.mkdir(model_save_dir)
    model_path = Path(model_save_dir) / "{}.pth".format(step + 1)

    torch.save(net.state_dict(), model_path)

    print("Saved model checkpoints {} into {}".format(step + 1, model_save_dir))


def restore_model(resume_iters, model_save_dir, net: nn.Module, train=True):
    """
    Restore the trained model.

    Args:
        resume_iters: the iteration to be loaded.
        model_save_dir: the directory for saving the model.
        net: the model instance to be loaded.
        train: if True, then the model is set to training;
               else set it to test.

    Returns:
        net: loaded model instance.

    """
    print("Loading the trained model from step {}".format(resume_iters))
    model_path = Path(model_save_dir) / "{}.pth".format(resume_iters)

    # Restore the model.
    net.load_state_dict(torch.load(model_path))

    if train:
        net.train()
    else:
        net.eval()

    return net


class LossFunc(nn.Module):
    def __init__(self, reduction="sum"):
        super(LossFunc, self).__init__()
        self.reduction = reduction
        self.mse_loss = nn.MSELoss(reduction=reduction)
        # TODO: to add TV loss.
        # self.tv_loss =
        # TODO: to add likelihood
        # self.log_loss =

    def forward(self, logits, target):
        # Return the average MSE loss.
        mse_loss = self.mse_loss(logits, target).div_(2)
        loss = mse_loss
        return loss


def train_model(config):
    # Define hyper-parameters.
    depth = int(config["DnCNN"]["depth"])
    n_channels = int(config["DnCNN"]["n_channels"])
    img_channel = int(config["DnCNN"]["img_channel"])
    kernel_size = int(config["DnCNN"]["kernel_size"])
    use_bnorm = config.getboolean("DnCNN", "use_bnorm")
    epochs = int(config["DnCNN"]["epoch"])
    batch_size = int(config["DnCNN"]["batch_size"])
    train_data_dir = config["DnCNN"]["train_data_dir"]
    test_data_dir = config["DnCNN"]["test_data_dir"]
    eta_min = float(config["DnCNN"]["eta_min"])
    eta_max = float(config["DnCNN"]["eta_max"])
    dose = float(config["DnCNN"]["dose"])
    model_save_dir = config["DnCNN"]["model_save_dir"]

    # Save logs to txt file.
    log_dir = config["DnCNN"]["log_dir"]
    log_dir = Path(log_dir) / "dose{}".format(str(int(dose * 100)))
    log_file = log_dir / "train_result.txt"

    # Define device.
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Initiate a DnCNN instance.
    # Load the model to device and set the model to training.
    model = DnCNN(depth=depth, n_channels=n_channels,
                  img_channel=img_channel,
                  use_bnorm=use_bnorm,
                  kernel_size=kernel_size)

    model = model.to(device)
    restore_model(18, model_save_dir, model, train=True)
#     model.train()

    # Define loss criterion and optimizer
    optimizer = optim.Adam(model.parameters(), lr=5e-5)
    scheduler = MultiStepLR(optimizer, milestones=[30, 60, 90], gamma=0.2)
    criterion = LossFunc(reduction="mean")

    # Get a validation test set and corrupt with noise for validation performance.
    # For every epoch, use this pre-determined noisy images.
    test_file_list = glob.glob(test_data_dir + "/*.png")
    xs_test = []
    # Can't directly convert the xs_test from list to ndarray because some images are 512*512
    # while the rest are 256*256.
    for i in range(len(test_file_list)):
        img = cv2.imread(test_file_list[i], 0)
        img = np.array(img, dtype="float32") / 255.0
        img = np.expand_dims(img, axis=0)
        img_noisy, _ = nm(img, eta_min, eta_max, dose, t=100)
        xs_test.append((img_noisy, img))

    # Train the model.
    loss_store = []
    epoch_loss_store = []
    psnr_store = []
    ssim_store = []

    psnr_tr_store = []
    ssim_tr_store = []
    
    loss_mse = torch.nn.MSELoss()

    dtype = torch.cuda.FloatTensor
    # load vgg network
    vgg = Vgg16().type(dtype)
    
    
    for epoch in range(epochs):
        # For each epoch, generate clean augmented patches from the training directory.
        # Convert the data from uint8 to float32 then scale them to make it in [0, 1].
        # Then make the patches to be of shape [N, C, H, W],
        # where N is the batch size, C is the number of color channels.
        # H and W are height and width of image patches.
        xs = dg.datagenerator(data_dir=train_data_dir)
        xs = xs.astype("float32") / 255.0
        xs = torch.from_numpy(xs.transpose((0, 3, 1, 2)))

        train_set = dg.DenoisingDatatset(xs, eta_min, eta_max, dose)
        train_loader = DataLoader(dataset=train_set, num_workers=4,
                                  drop_last=True, batch_size=batch_size,
                                  shuffle=True)  # TODO: if drop_last=True, the dropping in the
                                                 # TODO: data_generator is not necessary?

        # train_loader_test = next(iter(train_loader))

        t_start = timer()
        epoch_loss = 0
        for idx, data in enumerate(train_loader):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            img_batch_read = len(inputs)

            optimizer.zero_grad()

            outputs = model(inputs)
            
            # We can use labels for both style and content image
            
                # style image
#             style_transform = transforms.Compose([
#             normalize_tensor_transform()      # normalize with ImageNet values
#             ])
            
#             labels_t = style_transform(labels)
                        
#             labels_t = labels.repeat(1, 3, 1, 1)
#             outputs_t = outputs.repeat(1, 3, 1, 1)            
            
            labels_t = torch.zeros(128, 3, 40, 40).cuda()
            labels_t[:,1,:,:] = labels.squeeze()
            
            outputs_t = torch.zeros(128, 3, 40, 40).cuda()
            outputs_t[:,1,:,:] = outputs.squeeze()
                        
            y_c_features = vgg(labels_t)
            style_gram = [gram(fmap) for fmap in y_c_features]
            
            y_hat_features = vgg(outputs_t)
            y_hat_gram = [gram(fmap) for fmap in y_hat_features]    
            
            
            # calculate style loss
            style_loss = 0.0
            for j in range(4):
                style_loss += loss_mse(y_hat_gram[j], style_gram[j][:img_batch_read])
            style_loss = STYLE_WEIGHT*style_loss
            aggregate_style_loss = style_loss

            # calculate content loss (h_relu_2_2)
            recon = y_c_features[1]      
            recon_hat = y_hat_features[1]
            content_loss = CONTENT_WEIGHT*loss_mse(recon_hat, recon)
            aggregate_content_loss = content_loss
            
            diff_i = torch.sum(torch.abs(outputs_t[:, :, :, 1:] - outputs_t[:, :, :, :-1]))
            diff_j = torch.sum(torch.abs(outputs_t[:, :, 1:, :] - outputs_t[:, :, :-1, :]))
            tv_loss = (1e-4)*(diff_i + diff_j)
            aggregate_tv_loss = tv_loss
            
            loss = aggregate_content_loss + aggregate_style_loss + aggregate_tv_loss
#             loss = criterion(outputs, labels)
            
            loss_store.append(loss.item())
            epoch_loss += loss.item()

            loss.backward()

            optimizer.step()

            if idx % 100 == 0:
                print("Epoch [{} / {}], step [{} / {}], loss = {:.5f}, lr = {:.6f}, elapsed time = {:.2f}s".format(
                    epoch + 1, epochs, idx, len(train_loader), loss.item(), *scheduler.get_last_lr(), timer()-t_start))

        epoch_loss_store.append(epoch_loss / len(train_loader))

        # At each epoch validate the result.
        model = model.eval()

        # # Firstly validate on training sets. This takes a long time so I commented.
        # tr_psnr = []
        # tr_ssim = []
        # # t_start = timer()
        # with torch.no_grad():
        #     for idx, train_data in enumerate(train_loader):
        #         inputs, labels = train_data
        #         # print(inputs.shape)
        #         # inputs = np.expand_dims(inputs, axis=0)
        #         # inputs = torch.from_numpy(inputs).to(device)
        #         inputs = inputs.to(device)
        #         labels = labels.squeeze().numpy()
        #
        #         outputs = model(inputs)
        #         outputs = outputs.squeeze().cpu().detach().numpy()
        #
        #         tr_psnr.append(peak_signal_noise_ratio(labels, outputs))
        #         tr_ssim.append(structural_similarity(outputs, labels))
        # psnr_tr_store.append(sum(tr_psnr) / len(tr_psnr))
        # ssim_tr_store.append(sum(tr_ssim) / len(tr_ssim))
        # # print("Elapsed time = {}".format(timer() - t_start))
        #
        # print("Validation on train set: epoch [{} / {}], aver PSNR = {:.2f}, aver SSIM = {:.4f}".format(
        #     epoch + 1, epochs, psnr_tr_store[-1], ssim_tr_store[-1]))

        # Validate on test set
        val_psnr = []
        val_ssim = []
        with torch.no_grad():
            for idx, test_data in enumerate(xs_test):
                inputs, labels = test_data
                inputs = np.expand_dims(inputs, axis=0)
                inputs = torch.from_numpy(inputs).to(device)
                labels = labels.squeeze()

                outputs = model(inputs)
                outputs = outputs.squeeze().cpu().detach().numpy()

                val_psnr.append(peak_signal_noise_ratio(labels, outputs))
                val_ssim.append(structural_similarity(outputs, labels))

        psnr_store.append(sum(val_psnr) / len(val_psnr))
        ssim_store.append(sum(val_ssim) / len(val_ssim))
        
        fig, ax = plt.subplots()
        ax.plot(psnr_store)
        ax.set_title("PSNR")
        ax.set_xlabel("epoch")
        fig.show()
        
        fig, ax = plt.subplots()
        ax.plot(ssim_store)
        ax.set_title("SSIM")
        ax.set_xlabel("epoch")
        fig.show()

        print("Validation on test set: epoch [{} / {}], aver PSNR = {:.2f}, aver SSIM = {:.4f}".format(
            epoch + 1, epochs, psnr_store[-1], ssim_store[-1]))

        # Set model to train mode again.
        model = model.train()

        scheduler.step()

        # Save model
        save_model(model, model_save_dir, epoch, dose * 100)

        # Save the loss and validation PSNR, SSIM.

        if not log_dir.exists():
            Path.mkdir(log_dir)
        with open(log_file, "a+") as fh:
            # fh.write("{} Epoch [{} / {}], loss = {:.6f}, train PSNR = {:.2f}dB, train SSIM = {:.4f}, "
            #          "validation PSNR = {:.2f}dB, validation SSIM = {:.4f}".format(
            #          datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S:"),
            #          epoch + 1, epochs, epoch_loss_store[-1],
            #          psnr_tr_store[-1], ssim_tr_store[-1],
            #          psnr_store[-1], ssim_store[-1]))
            fh.write("{} Epoch [{} / {}], loss = {:.6f}, "
                     "validation PSNR = {:.2f}dB, validation SSIM = {:.4f}\n".format(
                     datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S:"),
                     epoch + 1, epochs, epoch_loss_store[-1],
                     psnr_store[-1], ssim_store[-1]))

        # np.savetxt(log_file, np.hstack((epoch + 1, epoch_loss_store[-1], psnr_store[-1], ssim_store[-1])), fmt="%.6f", delimiter=",  ")

        fig, ax = plt.subplots()
        ax.plot(loss_store[-len(train_loader):])
        ax.set_title("Last 1862 losses")
        ax.set_xlabel("iteration")
        fig.show()

    # print("Continue")





In [None]:
if __name__=="__main__":
    config = configparser.ConfigParser()

    config.read("/projectnb/ec523/mcokbas/Final_project/DnCNN_poisson_perceptual/cfg_tv.ini")

    train_model(config)

Loading the trained model from step 18
Training data process finished.
Epoch [1 / 30], step [0 / 1862], loss = 3.82666, lr = 0.000050, elapsed time = 1.91s
Epoch [1 / 30], step [100 / 1862], loss = 3.63008, lr = 0.000050, elapsed time = 37.70s
Epoch [1 / 30], step [200 / 1862], loss = 3.66965, lr = 0.000050, elapsed time = 73.50s
Epoch [1 / 30], step [300 / 1862], loss = 3.62165, lr = 0.000050, elapsed time = 109.37s
Epoch [1 / 30], step [400 / 1862], loss = 4.00361, lr = 0.000050, elapsed time = 145.26s
Epoch [1 / 30], step [500 / 1862], loss = 3.74176, lr = 0.000050, elapsed time = 181.01s
Epoch [1 / 30], step [600 / 1862], loss = 3.95030, lr = 0.000050, elapsed time = 216.91s
Epoch [1 / 30], step [700 / 1862], loss = 3.59763, lr = 0.000050, elapsed time = 252.19s
Epoch [1 / 30], step [800 / 1862], loss = 3.56265, lr = 0.000050, elapsed time = 287.13s
Epoch [1 / 30], step [900 / 1862], loss = 3.73924, lr = 0.000050, elapsed time = 322.57s
Epoch [1 / 30], step [1000 / 1862], loss = 3.

Epoch [5 / 30], step [500 / 1862], loss = 3.44763, lr = 0.000050, elapsed time = 178.66s
Epoch [5 / 30], step [600 / 1862], loss = 4.01834, lr = 0.000050, elapsed time = 213.92s
Epoch [5 / 30], step [700 / 1862], loss = 3.83434, lr = 0.000050, elapsed time = 248.45s
Epoch [5 / 30], step [800 / 1862], loss = 3.49231, lr = 0.000050, elapsed time = 283.75s
Epoch [5 / 30], step [900 / 1862], loss = 3.98067, lr = 0.000050, elapsed time = 319.11s
Epoch [5 / 30], step [1000 / 1862], loss = 3.82155, lr = 0.000050, elapsed time = 354.43s
Epoch [5 / 30], step [1100 / 1862], loss = 3.83323, lr = 0.000050, elapsed time = 390.05s
Epoch [5 / 30], step [1200 / 1862], loss = 3.42923, lr = 0.000050, elapsed time = 425.24s
Epoch [5 / 30], step [1300 / 1862], loss = 3.52505, lr = 0.000050, elapsed time = 460.56s
Epoch [5 / 30], step [1400 / 1862], loss = 3.98573, lr = 0.000050, elapsed time = 495.87s
Epoch [5 / 30], step [1500 / 1862], loss = 3.64190, lr = 0.000050, elapsed time = 531.10s
Epoch [5 / 30],



Training data process finished.
Epoch [8 / 30], step [0 / 1862], loss = 4.02661, lr = 0.000050, elapsed time = 1.97s
Epoch [8 / 30], step [100 / 1862], loss = 3.91728, lr = 0.000050, elapsed time = 37.31s
Epoch [8 / 30], step [200 / 1862], loss = 3.56886, lr = 0.000050, elapsed time = 72.79s
Epoch [8 / 30], step [300 / 1862], loss = 3.43161, lr = 0.000050, elapsed time = 108.07s
Epoch [8 / 30], step [400 / 1862], loss = 4.05611, lr = 0.000050, elapsed time = 143.06s
Epoch [8 / 30], step [500 / 1862], loss = 3.71875, lr = 0.000050, elapsed time = 178.23s
Epoch [8 / 30], step [600 / 1862], loss = 4.24191, lr = 0.000050, elapsed time = 213.45s
Epoch [8 / 30], step [700 / 1862], loss = 3.68540, lr = 0.000050, elapsed time = 248.66s
Epoch [8 / 30], step [800 / 1862], loss = 3.79869, lr = 0.000050, elapsed time = 283.84s
Epoch [8 / 30], step [900 / 1862], loss = 3.61723, lr = 0.000050, elapsed time = 319.10s
Epoch [8 / 30], step [1000 / 1862], loss = 3.82956, lr = 0.000050, elapsed time = 35



Validation on test set: epoch [8 / 30], aver PSNR = 16.49, aver SSIM = 0.7007
Saved model checkpoints 8 into /projectnb/ec523/mcokbas/Final_project/DnCNN_poisson_perceptual/results/model_path_just_last/dose20/dose20
Training data process finished.
Epoch [9 / 30], step [0 / 1862], loss = 3.73983, lr = 0.000050, elapsed time = 1.98s
Epoch [9 / 30], step [100 / 1862], loss = 3.38970, lr = 0.000050, elapsed time = 37.20s
Epoch [9 / 30], step [200 / 1862], loss = 3.48867, lr = 0.000050, elapsed time = 72.65s
Epoch [9 / 30], step [300 / 1862], loss = 3.70417, lr = 0.000050, elapsed time = 108.12s
Epoch [9 / 30], step [400 / 1862], loss = 3.69824, lr = 0.000050, elapsed time = 143.36s
Epoch [9 / 30], step [500 / 1862], loss = 3.43960, lr = 0.000050, elapsed time = 178.49s
Epoch [9 / 30], step [600 / 1862], loss = 3.70229, lr = 0.000050, elapsed time = 213.57s
Epoch [9 / 30], step [700 / 1862], loss = 3.63471, lr = 0.000050, elapsed time = 248.86s
Epoch [9 / 30], step [800 / 1862], loss = 3.50