In [6]:
import os, time, pickle, json
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision import transforms
from lib import utils, networks
from lib.edge_promoting import edge_promoting
import itertools

## cleanup last results

In [18]:
results_folder_name = 'project_name_results'
attempt_folder_name = results_folder_name + '/' +'cycle_attempt_3'
os.system('mkdir ' + attempt_folder_name)
os.system('mv ' + results_folder_name + '/' + 'D_A.pkl' +' ' + attempt_folder_name + '/' + 'D_A.pkl')
os.system('mv ' + results_folder_name + '/' + 'D_B.pkl' +' ' + attempt_folder_name + '/' + 'D_B.pkl')
os.system('mv ' + results_folder_name + '/' + 'G_A.pkl' +' ' + attempt_folder_name + '/' + 'G_A.pkl')
os.system('mv ' + results_folder_name + '/' + 'G_B.pkl' +' ' + attempt_folder_name + '/' + 'G_B.pkl')

result_folder_A = results_folder_name +'/Cycle_G_A'
result_folder_B = results_folder_name +'/Cycle_G_B'
result_zip = results_folder_name +'/results.zip'
os.system('zip -r '+result_zip + ' ' + result_folder_A +' '+result_folder_B )
os.system('mv '+ result_zip+' '+attempt_folder_name +'/results.zip')
os.system('rm -r ' + result_folder_A)
os.system('rm -r ' + result_folder_B)

train_hist_file_name = 'train_hist.json'
os.system('mv '+ results_folder_name + '/' + train_hist_file_name + ' ' + attempt_folder_name+'/'+train_hist_file_name)

0

In [20]:
train_hist_path

'project_name_results/train_hist.json'

### Parameters 

In [2]:
name='project_name'
#source data path
src_data='src_data_path_new'
#target data path
tgt_data='tgt_data_path'
#input channel for generator
in_ngc=3
#output channel for generator
out_ngc=3
#input channel for discriminator
in_ndc=3
#output channel for discriminator
out_ndc=1
batch_size=8
ngf=64
ndf=32
#the number of resnet block layer for generator
nb=9
#input size
input_size=256
train_epoch=5
#Discriminator learning rate, default=0.0002
lrD=0.0002
#Generator learning rate, default=0.0002
lrG=0.0002
#lambda for loss
lambdaA=1
lambdaB=1
lambda_idt = 0.1
decay_epoch = 10

#beta1 for Adam optimizer
beta1=0.5
#beta2 for Adam optimizer
beta2=0.999

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if torch.backends.cudnn.enabled:
    torch.backends.cudnn.benchmark = True

In [4]:
device

device(type='cuda')

In [19]:
# results save path
if not os.path.isdir(os.path.join(name + '_results', 'Cycle_G_A')):
    os.makedirs(os.path.join(name + '_results', 'Cycle_G_A'))
if not os.path.isdir(os.path.join(name + '_results', 'Cycle_G_B')):
    os.makedirs(os.path.join(name + '_results', 'Cycle_G_B'))

In [6]:
#setup source and target folder
if not os.path.isdir(os.path.join('data',tgt_data,'train')):
    os.makedirs(os.path.join('data',tgt_data,'train'))
if not os.path.isdir(os.path.join('data',tgt_data,'test')):
    os.makedirs(os.path.join('data',tgt_data,'test'))
if not os.path.isdir(os.path.join('data',src_data,'train')):
    os.makedirs(os.path.join('data',src_data,'train'))
if not os.path.isdir(os.path.join('data',src_data,'test')):
    os.makedirs(os.path.join('data',src_data,'test'))

In [5]:
# data_loader
transform = transforms.Compose([
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])
train_loader_A = utils.data_load(os.path.join('data', src_data), 'train', transform, batch_size, shuffle=True, drop_last=True)
train_loader_B = utils.data_load(os.path.join('data', tgt_data), 'train', transform, batch_size, shuffle=True, drop_last=True)
test_loader_A = utils.data_load(os.path.join('data', src_data), 'test', transform, 1, shuffle=True, drop_last=True)

In [6]:
# network
G_A = networks.generator(in_ngc, out_ngc, ngf, nb)
G_B = networks.generator(in_ngc, out_ngc, ngf, nb)
D_A = networks.discriminator(in_ndc, out_ndc, ndf)
D_B = networks.discriminator(in_ndc, out_ndc, ndf)

G_A.to(device)
G_B.to(device)
D_A.to(device)
D_B.to(device)

discriminator(
  (convs): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.2, inplace)
    (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (3): LeakyReLU(negative_slope=0.2, inplace)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (6): LeakyReLU(negative_slope=0.2, inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (8): LeakyReLU(negative_slope=0.2, inplace)
    (9): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (11): LeakyReLU(negative_slope=0.2, inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, trac

### Load existing model parameters

In [8]:
G_A.load_state_dict(torch.load(os.path.join(name + '_results', 'G_A.pkl')))
G_B.load_state_dict(torch.load(os.path.join(name + '_results', 'G_B.pkl')))

### end loading parameters 

In [9]:
# loss
MSE_loss = nn.MSELoss().to(device)
L1_loss = nn.L1Loss().to(device)

In [10]:
G_optimizer = optim.Adam(itertools.chain(G_A.parameters(), G_B.parameters()), lr=lrG, betas=(beta1, beta2))
D_A_optimizer = optim.Adam(D_A.parameters(), lr=lrD, betas=(beta1, beta2))
D_B_optimizer = optim.Adam(D_B.parameters(), lr=lrD, betas=(beta1, beta2))

## Load train hist

In [11]:
train_hist = {}

In [17]:
train_hist_path = os.path.join(name+'_results', 'train_hist.json')
with open(train_hist_path, 'r') as file:
    train_hist = json.load(file)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

## End loading

In [12]:
train_hist['per_epoch_time'] = []
train_hist['total_time'] = []
train_hist['G_loss_one_epoch']=[]
train_hist['D_A_loss_one_epoch']=[]
train_hist['D_B_loss_one_epoch']=[]

In [15]:
#change the starting_epoch if needed
starting_epoch = 17

In [16]:
print('training start!')
start_time = time.time()
num_pool = 50
fake_A_pool = utils.ImagePool(num_pool)
fake_B_pool = utils.ImagePool(num_pool)
for epoch in range(train_epoch):
    epoch_start_time = time.time()
    print("==> Epoch {}/{}".format(epoch + 1, train_epoch))
    if (epoch + 1) > decay_epoch:
        D_A_optimizer.param_groups[0]['lr'] -= lrD / 10
        D_B_optimizer.param_groups[0]['lr'] -= lrD / 10
        G_optimizer.param_groups[0]['lr'] -= lrG / 10
    
    G_losses = []
    D_A_losses = []
    D_B_losses = []
    for (real_A,_),(real_B,_) in zip(train_loader_A, train_loader_B):
        G_A.train()
        G_B.train()

        # input image data
        real_A = real_A.to(device)
        real_B = real_B.to(device)

        # Train generator G
        # A -> B
        fake_B = G_A(real_A)
        D_B_fake_decision = D_B(fake_B)
        G_A_loss = MSE_loss(D_B_fake_decision, torch.ones(D_B_fake_decision.size(), device=device))

        # identity loss
        G_A_idt_loss = L1_loss(fake_B, real_A) * lambdaA * lambda_idt
        
        # forward cycle loss
        recon_A = G_B(fake_B)
        cycle_A_loss = L1_loss(recon_A, real_A) * lambdaA

        # B -> A
        fake_A = G_B(real_B)
        D_A_fake_decision = D_A(fake_A)
        G_B_loss = MSE_loss(D_A_fake_decision, torch.ones(D_A_fake_decision.size(), device=device))
        
        # identity loss
        G_B_idt_loss = L1_loss(fake_A, real_B) * lambdaB * lambda_idt
        
        # backward cycle loss
        recon_B = G_A(fake_A)
        cycle_B_loss = L1_loss(recon_B, real_B) * lambdaB
        
        #fix D parameters
        for model in [D_A, D_B]:
            for param in D_A.parameters():
                param.requires_grad = False
        
        
        # Back propagation
        G_loss = G_A_loss + G_B_loss + cycle_A_loss + cycle_B_loss + G_A_idt_loss + G_B_idt_loss
        G_losses.append(G_loss)
        G_optimizer.zero_grad()
        G_loss.backward()
        G_optimizer.step()

        #train D parameters
        for model in [D_A, D_B]:
            for param in D_A.parameters():
                param.requires_grad = True
                
        # Train discriminator D_A
        D_A_real_decision = D_A(real_A)
        D_A_real_loss = MSE_loss(D_A_real_decision, torch.ones(D_A_real_decision.size(), device=device))
        fake_A = fake_A_pool.query(fake_A)
        D_A_fake_decision = D_A(fake_A)
        D_A_fake_loss = MSE_loss(D_A_fake_decision, torch.zeros(D_A_fake_decision.size(), device=device))

        # Back propagation
        D_A_loss = (D_A_real_loss + D_A_fake_loss) * 0.5
        D_A_losses.append(D_A_loss)
        D_A_optimizer.zero_grad()
        D_A_loss.backward()
        D_A_optimizer.step()

        # Train discriminator D_B
        D_B_real_decision = D_B(real_B)
        D_B_real_loss = MSE_loss(D_B_real_decision, torch.ones(D_B_real_decision.size(), device=device))
        fake_B = fake_B_pool.query(fake_B)
        D_B_fake_decision = D_B(fake_B)
        D_B_fake_loss = MSE_loss(D_B_fake_decision, torch.zeros(D_B_fake_decision.size(), device=device))

        # Back propagation
        D_B_loss = (D_B_real_loss + D_B_fake_loss) * 0.5
        D_B_losses.append(D_B_loss)
        D_B_optimizer.zero_grad()
        D_B_loss.backward()
        D_B_optimizer.step()
    
    per_epoch_time = time.time() - epoch_start_time
    train_hist['per_epoch_time'].append(per_epoch_time)
    
    G_loss_avg = float(torch.mean(torch.FloatTensor(G_losses)).cpu().numpy())
    D_A_loss_avg = float(torch.mean(torch.FloatTensor(D_A_losses)).cpu().numpy())
    D_B_loss_avg =  float(torch.mean(torch.FloatTensor(D_B_losses)).cpu().numpy())
    
    train_hist['G_loss_one_epoch'].append(G_loss_avg)
    train_hist['D_A_loss_one_epoch'].append(D_A_loss_avg)
    train_hist['D_B_loss_one_epoch'].append(D_B_loss_avg)
    
    print(
    '[%d/%d] - time: %.2f, G loss: %.3f, D_A loss: %.3f, D_B loss: %.3f' % ((epoch + 1), train_epoch, per_epoch_time, G_loss_avg, D_A_loss_avg, D_B_loss_avg))
    
    #Save image result
    with torch.no_grad():
        G_A.eval()
        G_B.eval()
        for n, (x, _) in enumerate(train_loader_A):
            x = x.to(device)
            G_A_result = G_A(x)
            G_A_recon = G_B(G_A_result)
            result = torch.cat((x[0], G_A_result[0], G_A_recon[0]), 2)
            path = os.path.join(name + '_results', 'Cycle_G_A', str(epoch+starting_epoch) + '_epoch_'  + '_train_' + str(n + 1) + '.png')
            plt.imsave(path, (result.cpu().numpy().transpose(1, 2, 0) + 1) / 2)
            if n == 4:
                break

        for n, (x, _) in enumerate(test_loader_A):
            x = x.to(device)
            G_A_result = G_A(x)
            G_A_recon = G_B(G_A_result)
            result = torch.cat((x[0], G_A_result[0], G_A_recon[0]), 2)
            path = os.path.join(name + '_results', 'Cycle_G_A', str(epoch+starting_epoch) + '_epoch_'  + '_test_' + str(n + 1) + '.png')
            plt.imsave(path, (result.cpu().numpy().transpose(1, 2, 0) + 1) / 2)
            if n == 4:
                break

        for n, (x,_) in enumerate(train_loader_B):
            x = x.to(device)
            G_B_result = G_B(x)
            G_B_recon = G_A(G_B_result)
            result = torch.cat((x[0],G_B_result[0],G_B_recon[0]),2)
            path = os.path.join(name+'_results','Cycle_G_B',str(epoch+starting_epoch) + '_epoch_' +'_train_'+str(n+1)+'.png')
            plt.imsave(path, (result.cpu().numpy().transpose(1, 2, 0) + 1) / 2)
            if n == 4:
                break
                
        torch.save(G_A.state_dict(), os.path.join(name + '_results', 'G_A.pkl'))
        torch.save(G_B.state_dict(), os.path.join(name + '_results', 'G_B.pkl')) 
        torch.save(D_A.state_dict(), os.path.join(name + '_results', 'D_A.pkl'))
        torch.save(D_B.state_dict(), os.path.join(name + '_results', 'D_B.pkl'))


training start!
==> Epoch 1/5
[1/5] - time: 2600.00, G loss: 0.952, D_A loss: 0.211, D_B loss: 0.202
==> Epoch 2/5
[2/5] - time: 2598.45, G loss: 1.026, D_A loss: 0.207, D_B loss: 0.181
==> Epoch 3/5
[3/5] - time: 2597.95, G loss: 1.131, D_A loss: 0.200, D_B loss: 0.157
==> Epoch 4/5
[4/5] - time: 2597.55, G loss: 1.114, D_A loss: 0.198, D_B loss: 0.162
==> Epoch 5/5
[5/5] - time: 2597.43, G loss: 1.122, D_A loss: 0.189, D_B loss: 0.159


In [25]:
for i,(x,y,z) in enumerate(zip(train_hist['G_loss_one_epoch'], train_hist['D_A_loss_one_epoch'], train_hist['D_B_loss_one_epoch'])):
    train_hist['G_loss_one_epoch'][i] = float(x.cpu().numpy())
    train_hist['D_A_loss_one_epoch'][i] = float(y.cpu().numpy())
    train_hist['D_B_loss_one_epoch'][i] = float(z.cpu().numpy())

In [17]:
train_hist_path = os.path.join(name+'_results', 'train_hist.json')
with open(train_hist_path, 'w') as file:
    json.dump(train_hist,file)