In [1]:
# connect to drive
from google.colab import drive
drive.mount("/content/drive", force_remount=True)
path = 'drive/My Drive/senior_1/DLCV/HW/hw3'

Mounted at /content/drive


In [None]:
!gdown --id '1Cz5eLSP7QRMkO0PqxZLldJ36nK6EWHR8' --output hw3_data.zip # 下載資料集
!unzip hw3_data.zip # 解壓縮
'''
1.  hw3_data/digits/mnistm/
      # num of data: 60,000 (training) / 10,000 (testing) # num of classes: 10 (0~9) # Image size: 28*28*3
      train/images.png
      test/images.png
      train.csv
      test.csv
2.  hw3_data/digits/svhn/
      # num of data: 73,257 (training) / 26,032 (testing) # num of classes: 10 (0~9) # Image size: 28*28*3
      train/images.png
      test/images.png
      train.csv
      test.csv
3.  hw3_data/digits/usps/
      # num of data: 7,291 (training) / 2,007 (testing) # num of classes: 10 (0~9) # Image size: 28*28*1
      train/images.png  
      test/images.png  
      train.csv
      test.csv
'''

In [None]:
# set packages
!pip3 install certifi==2020.6.20
!pip3 install cycler==0.10.0
!pip3 install joblib==0.17.0
!pip3 install kiwisolver==1.2.0
!pip3 install matplotlib==3.3.2
!pip3 install numpy==1.18.1
!pip3 install pandas==1.1.3
!pip3 install Pillow==8.0.0
!pip3 install pyparsing==2.4.7
!pip3 install python-dateutil==2.8.1
!pip3 install pytz==2020.1
!pip3 install scikit-learn==0.21.3
!pip3 install scipy==1.2.1
!pip3 install six==1.15.0
!pip3 install torch==1.4.0
!pip3 install torchvision==0.5.0

In [2]:
# Import 需要的套件
import os
import numpy as np
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import time
import pandas as pd
import random
import scipy.misc
import argparse
import imageio
from torch.autograd import Variable
from torch.optim import Adam, AdamW
import csv
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import manifold
from torch.autograd import Function
import torch.optim as optim

# 固定隨機種子
def same_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    np.random.seed(seed)  # Numpy module.
    random.seed(seed)  # Python random module.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    print("torchvision.__version__ =", torchvision.__version__)
    print("torch.cuda.is_available() =", torch.cuda.is_available())

same_seeds(0)

torchvision.__version__ = 0.5.0
torch.cuda.is_available() = True


## Dataset

In [3]:
def sortfile(path):
    index = []
    image_dir = sorted(os.listdir(path)) # 把圖檔按照編號排列
    for i, file in enumerate(image_dir):
        index.append([file, int(file.replace('.png', ''))])
    index = sorted(index, key = lambda s: s[1])
    return index

def readfile(path, index, mode):
    x = np.zeros((len(index), 28, 28, 3), dtype=np.uint8)
    y = np.zeros((len(index)), dtype=np.uint8)
    pd_y = pd.read_csv(os.path.join(path, mode + ".csv"))
    for i, file in enumerate(index):
        img = imageio.imread(os.path.join(os.path.join(path, mode), file[0]))
        if 'usps' in path:
            temp_x = img.reshape(28, 28, 1) # expand dim
            x[i, :, :] = np.concatenate((temp_x, temp_x, temp_x), axis = 2)
        else:
            x[i, :, :] = img 
        y[i] = pd_y['label'][i] 
    return x, y

In [5]:
# 分別將 training set、testing set 用 sortfile, readfile 函式讀進來
workspace_dir = './hw3_data/digits'
digits_data_list = ['mnistm', 'svhn', 'usps']

for index in digits_data_list:
    print("Reading " + index + " data")
    split_ratio = 0.2
    train_index = sortfile(os.path.join(workspace_dir, os.path.join(index, "train")))
    test_index = sortfile(os.path.join(workspace_dir, os.path.join(index, "test")))
    if index == 'mnistm':
        mnistm_train_x, mnistm_train_y = readfile(os.path.join(workspace_dir, index), train_index, "train")
        # split train / val, ratio = split_ratio
        mnistm_train_x, mnistm_val_x, mnistm_train_y, mnistm_val_y = train_test_split(mnistm_train_x, mnistm_train_y, test_size = split_ratio, random_state = 3)
        mnistm_test_x, mnistm_test_y = readfile(os.path.join(workspace_dir, index), test_index, "test")
        mnistm_test_index = pd.DataFrame(test_index)[0].values.tolist()
    elif index == 'svhn':
        svhn_train_x, svhn_train_y = readfile(os.path.join(workspace_dir, index), train_index, "train")
        # split train / val, ratio = split_ratio
        svhn_train_x, svhn_val_x, svhn_train_y, svhn_val_y = train_test_split(svhn_train_x, svhn_train_y, test_size = split_ratio, random_state = 3) 
        svhn_test_x, svhn_test_y = readfile(os.path.join(workspace_dir, index), test_index, "test")
        svhn_test_index = pd.DataFrame(test_index)[0].values.tolist()  
    else:
        usps_train_x, usps_train_y = readfile(os.path.join(workspace_dir, index), train_index, "train")
        # split train / val, ratio = split_ratio
        usps_train_x,usps_val_x, usps_train_y, usps_val_y = train_test_split(usps_train_x, usps_train_y, test_size = split_ratio, random_state = 3)
        usps_test_x, usps_test_y = readfile(os.path.join(workspace_dir, index), test_index, "test") 
        usps_test_index = pd.DataFrame(test_index)[0].values.tolist()
    print("Size of {} training data = {}".format(index, round(len(train_index)*(1-split_ratio))))
    print("Size of {} validation data = {}".format(index, round(len(train_index)*split_ratio)))
    print("Size of {} testing data = {}".format(index, round(len(test_index))))

Reading mnistm data
Size of mnistm training data = 48000
Size of mnistm validation data = 12000
Size of mnistm testing data = 10000
Reading svhn data
Size of svhn training data = 58606
Size of svhn validation data = 14651
Size of svhn testing data = 26032
Reading usps data
Size of usps training data = 5833
Size of usps validation data = 1458
Size of usps testing data = 2007


In [6]:
# training 時需做 data augmentation
train_transform = transforms.Compose([
    transforms.ToPILImage(), # 轉成 python 圖片
    transforms.RandomHorizontalFlip(), # 隨機將圖片水平翻轉
    transforms.RandomRotation(15), # 隨機旋轉圖片，表示在（-15，+15）之間隨機旋轉，旋轉後空的地方補 0
    transforms.ToTensor(), # 將圖片轉成 Tensor，並把數值 normalize 到 [0,1] (data normalization) ps. Tensor 為多維張量
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) # normalize
])

# testing 時不需做 data augmentation
test_transform = transforms.Compose([
    transforms.ToPILImage(),                                    
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) # normalize
])

class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None): # transform 自己決定
        self.x = x
        # label is required to be a LongTensor
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X

In [7]:
batch_size = 64
train_x_list = [mnistm_train_x, svhn_train_x, usps_train_x]
val_x_list = [mnistm_val_x, svhn_val_x, usps_val_x]
test_x_list = [mnistm_test_x, svhn_test_x, usps_test_x]
train_y_list = [mnistm_train_y, svhn_train_y, usps_train_y]
val_y_list = [mnistm_val_y, svhn_val_y, usps_val_y]
test_y_list = [mnistm_test_y, svhn_test_y, usps_test_y]
for i in range(len(train_x_list)):
    train_set = ImgDataset(train_x_list[i], train_y_list[i], transform=train_transform)
    val_set = ImgDataset(val_x_list[i], val_y_list[i], transform=test_transform)
    test_set = ImgDataset(test_x_list[i], test_y_list[i], transform=test_transform)
    if i == 0: # mnistm
        mnistm_train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
        mnistm_val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
        mnistm_test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
        print('finish mnistm_loader')
    elif i == 1: # svhn
        svhn_train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
        svhn_val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
        svhn_test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
        print('finish svhn_loader')
    else: # usps
        usps_train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
        usps_val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
        usps_test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
        print('finish usps_loader')

finish mnistm_loader
finish svhn_loader
finish usps_loader


## Model

In [34]:
class USPS2MNISTM_SourceClassifier(nn.Module):

    def __init__(self):
        super(USPS2MNISTM_SourceClassifier, self).__init__()

        self.conv = nn.Sequential(
            # input_size = (28, 28, 3)
            # output_size = (input_size-kernel_size+2*padding)/stride + 1
            nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(3, 3)), 
            nn.BatchNorm2d(64),
            nn.ReLU(),
            # [batch_size, 64, 32, 32]

            nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            # [batch_size, 128, 32, 32]

            nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),
             # [batch_size, 256, 16, 16]

            nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),
            # [batch_size, 512, 8, 8]

            nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),
            # [batch_size, 512, 4, 4]

            nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),
            # [batch_size, 512, 2, 2]      
        )   
        
        self.linear = nn.Sequential(
            # flatten
            nn.Linear(in_features=512*2*2, out_features=1024, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5, inplace=False),

            nn.Linear(in_features=1024, out_features=512, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5, inplace=False),

            nn.Linear(in_features=512, out_features=10, bias=True),
        )       

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size()[0], -1)
        out = self.linear(x)
        return x

"""Discriminator model for ADDA."""

class Discriminator(nn.Module):
    """Discriminator model for source domain."""

    def __init__(self, input_dims, hidden_dims, output_dims):
        """Init discriminator."""
        super(Discriminator, self).__init__()
        self.layer = nn.Sequential(
            nn.Linear(input_dims, hidden_dims),
            # nn.BatchNorm1d(hidden_dims),
            nn.ReLU(),
            nn.Linear(hidden_dims, hidden_dims),
            # nn.BatchNorm1d(hidden_dims),
            nn.ReLU(),
            nn.Linear(hidden_dims, output_dims),
            # nn.LogSoftmax()
            nn.Softmax()
        )

    def forward(self, x):
        """Forward the discriminator."""
        out = self.layer(x)
        return out

#util

In [None]:
"""Utilities for ADDA."""
def init_weights(layer):
    """Init weights for layers w.r.t. the original paper."""
    layer_name = layer.__class__.__name__
    if layer_name.find("Conv") != -1:
        layer.weight.data.normal_(0.0, 0.02)
    elif layer_name.find("BatchNorm") != -1:
        layer.weight.data.normal_(1.0, 0.02)
        layer.bias.data.fill_(0)

def init_model(net, restore):
    """Init models with cuda and weights."""
    # init weights of model
    # net.apply(init_weights)

    # restore model weights
    if restore is not None and os.path.exists(restore):
        net.load_state_dict(torch.load(restore))
        net.restored = True
        print("Restore model from: {}".format(os.path.abspath(restore)))

    return net

In [37]:
same_seeds(0)
num_epochs = 100
path = 'drive/My Drive/senior_1/DLCV/HW/hw3'
best_val_acc = 0.0

# load dataset
src_data_loader = usps_train_loader
src_data_loader_eval = usps_test_loader
tgt_data_loader = mnistm_train_loader
tgt_data_loader_eval = mnistm_test_loader

# set pretrained model
USPS2MNISTM_source_model = USPS2MNISTM_SourceClassifier().cuda()
USPS2MNISTM_source_model.load_state_dict(torch.load(os.path.join(path, 'p3_USPS2MNISTM_source_model.pkl')))

# set model
src_encoder = USPS2MNISTM_source_model.conv.cuda() # LenetEncoder
src_classifier = USPS2MNISTM_source_model.linear.cuda() # LenetClassifier
tgt_encoder = USPS2MNISTM_SourceClassifier().conv.cuda()
critic = Discriminator(input_dims=512*2*2, hidden_dims=512, output_dims=2).cuda()

criterion = nn.CrossEntropyLoss()
# criterion = nn.BCEWithLogitsLoss()
optimizer_tgt = optim.Adam(tgt_encoder.parameters(), lr=0.0001, betas=(0.5,0.9))
optimizer_critic = optim.Adam(critic.parameters(), lr=0.0001, betas=(0.5,0.9))
len_data_loader = min(len(src_data_loader), len(tgt_data_loader))

for epoch in range(num_epochs):
    tgt_encoder.train()
    critic.train()
    domain_acc = 0.0
    for step, ((images_src, _), (images_tgt, _)) in enumerate(zip(src_data_loader, tgt_data_loader)):
        ###########################
        # 2.1 train discriminator #
        ###########################
        images_src = images_src.cuda()
        images_tgt = images_tgt.cuda()

        # zero gradients for optimizer
        optimizer_critic.zero_grad()

        # extract and concat features
        feat_src = src_encoder(images_src)
        feat_tgt = tgt_encoder(images_tgt)
        feat_concat = torch.cat((feat_src, feat_tgt), dim=0)

        # predict on discriminator
        pred_concat = critic(feat_concat.view(-1, 512*2*2).detach())

        # prepare real and fake label
        label_src = torch.ones(feat_src.size(0)).long().cuda() # source = 1
        label_tgt = torch.zeros(feat_tgt.size(0)).long().cuda() # target = 1
        label_concat = torch.cat((label_src, label_tgt), dim=0)

        # compute loss for critic
        loss_critic = criterion(pred_concat, label_concat)
        loss_critic.backward()

        # optimize critic
        optimizer_critic.step()
        pred_cls = torch.squeeze(pred_concat.max(1)[1])
        domain_acc += (pred_cls == label_concat).float().mean()

        ############################
        # 2.2 train target encoder #
        ############################

        # zero gradients for optimizer
        optimizer_critic.zero_grad()
        optimizer_tgt.zero_grad()

        # extract and target features
        feat_tgt = tgt_encoder(images_tgt)

        # predict on discriminator
        pred_tgt = critic(feat_tgt.view(-1, 512*2*2))

        # prepare fake labels
        label_tgt = torch.ones(feat_tgt.size(0)).long().cuda()

        # compute loss for target encoder
        loss_tgt = criterion(pred_tgt, label_tgt)
        loss_tgt.backward()

        # optimize target encoder
        optimizer_tgt.step()

    tgt_encoder.eval()
    src_classifier.eval()
    # init loss and accuracy
    target_val_loss = 0.0
    target_val_acc = 0.0
    # set loss function
    criterion = nn.CrossEntropyLoss()
    # evaluate network
    with torch.no_grad():
        target_val_loss = 0.0
        target_val_acc = 0.0
        for (images, labels) in tgt_data_loader_eval:
            images = images.cuda()
            labels = labels.cuda()
            preds = src_classifier(tgt_encoder(images).view(-1, 512*2*2))
            target_val_loss += criterion(preds, labels).item()
            target_val_acc += torch.sum(torch.argmax(preds, dim=1) == labels).item()
    if target_val_acc > best_val_acc:
        best_val_acc = target_val_acc
        torch.save(tgt_encoder.state_dict(), os.path.join(path, 'p4_USPS2MNISTM_tgt_encoder.pkl'))
        torch.save(src_classifier.state_dict(), os.path.join(path, 'p3_USPS2MNISTM_src_classifier.pkl'))
        print('save model')

    print('target val acc: {:.4f}, target val loss: {:.5f}, domain acc: {:.5f}'.format(
        target_val_acc/len(tgt_data_loader_eval.dataset), target_val_loss/len(tgt_data_loader_eval), domain_acc/len_data_loader
    ))  

torchvision.__version__ = 0.5.0
torch.cuda.is_available() = True


  input = module(input)


save model
target val acc: 0.0924, target val loss: 2.31350, domain acc: 0.69585
target val acc: 0.0919, target val loss: 2.31511, domain acc: 0.73512
save model
target val acc: 0.0974, target val loss: 2.32059, domain acc: 0.90820
target val acc: 0.0924, target val loss: 2.31620, domain acc: 0.90191
save model
target val acc: 0.1010, target val loss: 2.31660, domain acc: 0.89079
target val acc: 0.0965, target val loss: 2.31292, domain acc: 0.95050


KeyboardInterrupt: ignored

#pretrain

In [None]:
"""Pre-train encoder and classifier for source dataset."""

def train_src(encoder, classifier, src_train_data_loader, src_eval_data_loader, tgt_eval_data_loader):
    """Train classifier for source domain."""
    ####################
    # 1. setup network #
    ####################

    # setup criterion and optimizer
    # optimizer = optim.Adam(list(encoder.parameters()) + list(classifier.parameters()), lr=1e-3, betas=(0.5, 0.9))
    optimizer_c = optim.Adam(classifier.parameters(), lr=1e-3)
    optimizer_e = optim.Adam(encoder.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()

    ####################
    # 2. train network #
    ####################
    num_epochs_pre = 100
    for epoch in range(num_epochs_pre):
        encoder.train()
        classifier.train()
        for step, (images, labels) in enumerate(src_train_data_loader):
            images = images.cuda()
            labels = labels.cuda()
            # zero gradients for optimizer
            optimizer_c.zero_grad()
            optimizer_e.zero_grad()

            # compute loss for critic
            preds = classifier(encoder(images))
            loss = criterion(preds, labels)

            # optimize source classifier
            loss.backward()
            optimizer_c.step()
            optimizer_e.step()

        print("Epoch [{}/{}]: loss={}".format(
                    epoch + 1, num_epochs_pre, loss.data.item()))

        # eval model on test set
        eval_src(encoder, classifier, src_eval_data_loader, tgt_eval_data_loader)

        # save model parameters
        if ((epoch + 1) % 20 == 0):
            save_model(encoder, "ADDA-source-encoder-{}.pt".format(epoch + 1))
            save_model(classifier, "ADDA-source-classifier-{}.pt".format(epoch + 1))

    # # save final model
    save_model(encoder, "ADDA-source-encoder-final.pt")
    save_model(classifier,"ADDA-source-classifier-final.pt")

    return encoder, classifier


def eval_src(encoder, classifier, src_eval_data_loader, tgt_eval_data_loader):
    """Evaluate classifier for source domain."""
    # set eval state for Dropout and BN layers
    encoder.eval()
    classifier.eval()

    # init loss and accuracy
    loss_source = 0
    loss_target = 0
    acc_source = 0
    acc_target = 0

    # set loss function
    criterion = nn.CrossEntropyLoss()

    # evaluate network
    with torch.no_grad():
        for (images, labels) in src_eval_data_loader:
            images = images.cuda()
            labels = labels.cuda()
            preds = classifier(encoder(images))
            loss_source += criterion(preds, labels).data.item()
            acc_source += torch.sum(torch.argmax(preds, dim=1) == labels).item()

        for (images, labels) in tgt_eval_data_loader:
            images = images.cuda()
            labels = labels.cuda()
            preds = classifier(encoder(images))
            loss_target += criterion(preds, labels).item()
            acc_target += torch.sum(torch.argmax(preds, dim=1) == labels).item()

    print("Source Avg Loss = {}, Source Avg Accuracy = {:2%}".format(
        loss_source/len(src_eval_data_loader), acc_source/len(src_eval_data_loader.dataset)))
    print("Target Avg Loss = {}, Target Avg Accuracy = {:2%}".format(
        loss_target/len(tgt_eval_data_loader), acc_target/len(tgt_eval_data_loader.dataset)))

# ADDA


In [None]:
def train_tgt(src_encoder, tgt_encoder, critic,
              src_data_loader, tgt_data_loader):
    """Train encoder for target domain."""
    ####################
    # 1. setup network #
    ####################
    # setup criterion and optimizer
    # criterion = nn.CrossEntropyLoss()
    criterion = nn.BCEWithLogitsLoss()
    optimizer_tgt = optim.Adam(tgt_encoder.parameters(), lr=1e-4, betas=(0.5,0.9))
    optimizer_critic = optim.Adam(critic.parameters(), lr=1e-4, betas=(0.5,0.9))
    len_data_loader = min(len(src_data_loader), len(tgt_data_loader))

    ####################
    # 2. train network #
    ####################
    num_epochs = 5
    for epoch in range(num_epochs):
        tgt_encoder.train()
        critic.train()
        # zip source and target data pair
        data_zip = enumerate(zip(src_data_loader, tgt_data_loader))
        for step, ((images_src, _), (images_tgt, _)) in data_zip:
            ###########################
            # 2.1 train discriminator #
            ###########################
            images = images.cuda()
            labels = labels.cuda()
            # zero gradients for optimizer
            optimizer_critic.zero_grad()

            # extract and concat features
            feat_src = src_encoder(images_src)
            feat_tgt = tgt_encoder(images_tgt)
            feat_concat = torch.cat((feat_src, feat_tgt), 0)

            # predict on discriminator
            pred_concat = critic(feat_concat.detach())

            # prepare real and fake label
            label_src = torch.ones(feat_src.size(0)).long().cuda()
            label_tgt = torch.zeros(feat_tgt.size(0)).long().cuda()
            label_concat = torch.cat((label_src, label_tgt), 0)

            # compute loss for critic
            loss_critic = criterion(pred_concat, label_concat)
            loss_critic.backward()

            # optimize critic
            optimizer_critic.step()

            pred_cls = torch.squeeze(pred_concat.max(1)[1])
            acc = (pred_cls == label_concat).float().mean()

            ############################
            # 2.2 train target encoder #
            ############################

            # zero gradients for optimizer
            optimizer_critic.zero_grad()
            optimizer_tgt.zero_grad()

            # extract and target features
            feat_tgt = tgt_encoder(images_tgt)

            # predict on discriminator
            pred_tgt = critic(feat_tgt)

            # prepare fake labels
            label_tgt = torch.ones(feat_tgt.size(0)).long().cuda()

            # compute loss for target encoder
            loss_tgt = criterion(pred_tgt, label_tgt)
            loss_tgt.backward()

            # optimize target encoder
            optimizer_tgt.step()

            #######################
            # 2.3 print step info #
            #######################
            if ((step + 1) % 50 == 0):
                print("Epoch [{}/{}] Step [{}/{}]: d_loss={:.5f} g_loss={:.5f} acc={:.5f}".format(
                          epoch + 1, num_epochs, step + 1, len_data_loader, loss_critic.item(), loss_tgt.item(), acc.item()))

        #############################
        # 2.4 save model parameters #
        #############################
        if ((epoch + 1) % 50 == 0):
            torch.save(critic.state_dict(), os.path.join(model_path, "ADDA-critic-{}.pt".format(epoch + 1)))
            torch.save(tgt_encoder.state_dict(), os.path.join(model_path, "ADDA-target-encoder-{}.pt".format(epoch + 1)))
            print('save model')

    torch.save(critic.state_dict(), os.path.join(model_path, "ADDA-critic-final.pt"))
    torch.save(tgt_encoder.state_dict(), os.path.join(model_path, "ADDA-target-encoder-final.pt"))
    return tgt_encoder

def eval_tgt(encoder, classifier, data_loader):
    """Evaluation for target encoder by source classifier on target dataset."""
    # set eval state for Dropout and BN layers
    encoder.eval()
    classifier.eval()

    # init loss and accuracy
    loss = 0
    acc = 0

    # set loss function
    criterion = nn.CrossEntropyLoss()

    # evaluate network
    with torch.no_grad():
        for (images, labels) in data_loader:
            images = images.cuda()
            labels = labels.cuda()

            preds = classifier(encoder(images))
            loss += criterion(preds, labels).item()
            acc += torch.sum(torch.argmax(preds, dim=1) == labels).item()
    print("Avg Loss = {}, Avg Accuracy = {}".format(loss/len(data_loader), acc/len(data_loader.dataset)))

In [None]:
#usps_mnistm
import torch.backends.cudnn as cudnn
def save_model(net, filename):
    """Save trained model."""
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    torch.save(net.state_dict(),
               os.path.join(model_path, filename))
    print("save pretrained model to: {}".format(os.path.join(model_path,filename)))

if __name__ == '__main__':
    # init random seed
    same_seeds(0)
    model_path = './'
    # load dataset
    src_data_loader = usps_train_loader
    src_data_loader_eval = usps_test_loader
    tgt_data_loader = mnistm_train_loader
    tgt_data_loader_eval = mnistm_test_loader

    # load models
    src_encoder = init_model(net=LeNetEncoder().cuda(), restore=os.path.join(model_path, "ADDA-source-encoder-final.pkl"))
    src_classifier = init_model(net=LeNetClassifier().cuda(), restore= os.path.join(model_path, "ADDA-source-classifier-final.pkl"))
    tgt_encoder = init_model(net=LeNetEncoder().cuda(), restore=os.path.join(model_path, "tgt_encoder.pkl"))
    critic = init_model(Discriminator(input_dims=500, hidden_dims=500, output_dims=2), restore=os.path.join(model_path, "ADDA_discriminator.pkl"))

    # train source model
    print("=== Training classifier for source domain ===")
    print(">>> Source Encoder <<<")
    print(src_encoder)
    print(">>> Source Classifier <<<")
    print(src_classifier)
    src_model_trained = True
    if not (src_encoder.restored and src_classifier.restored and src_model_trained):
        src_encoder, src_classifier = train_src(src_encoder, src_classifier, src_data_loader, src_data_loader_eval, tgt_data_loader_eval)

    # train target encoder by GAN
    print("=== Training encoder for target domain ===")
    print(">>> Target Encoder <<<")
    print(tgt_encoder)
    print(">>> Critic <<<")
    print(critic)

    # init weights of target encoder with those of source encoder
    if not tgt_encoder.restored:
        tgt_encoder.load_state_dict(src_encoder.state_dict())
    tgt_model_trained = True
    tgt_encoder_restore = "snapshots/ADDA-target-encoder-final.pkl"
    if not (tgt_encoder.restored and critic.restored and tgt_model_trained):
        tgt_encoder = train_tgt(src_encoder, tgt_encoder, critic, src_data_loader, tgt_data_loader)

    # eval target encoder on test set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> source only <<<")
    eval_tgt(src_encoder, src_classifier, tgt_data_loader_eval)
    print(">>> domain adaption <<<")
    eval_tgt(tgt_encoder, src_classifier, tgt_data_loader_eval)

torchvision.__version__ = 0.5.0
torch.cuda.is_available() = True
=== Training classifier for source domain ===
>>> Source Encoder <<<
LeNetEncoder(
  (encoder): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU()
    (13): Max

UnboundLocalError: ignored