In [1]:
%load_ext autoreload 
%autoreload 2

In [2]:
import os 
os.chdir('/home/oldrain123/MMD/')
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [3]:
import numpy as np
import torch
import pickle
from tqdm.auto import tqdm
from utils_HD import MatConvert, MMDu, TST_MMD_u

In [4]:
class ModelLatentF(torch.nn.Module):
    """Latent space for both domains."""

    def __init__(self, x_in, H, x_out):
        """Init latent features."""
        super(ModelLatentF, self).__init__()
        self.restored = False

        self.latent = torch.nn.Sequential(
            torch.nn.Linear(x_in, H, bias=True),
            torch.nn.Softplus(),
            torch.nn.Linear(H, H, bias=True),
            torch.nn.Softplus(),
            torch.nn.Linear(H, H, bias=True),
            torch.nn.Softplus(),
            torch.nn.Linear(H, x_out, bias=True),
        )
    def forward(self, input):
        """Forward the LeNet."""
        fealant = self.latent(input)
        return fealant

In [5]:
# Setup seeds
np.random.seed(1102)
torch.manual_seed(1102)
torch.cuda.manual_seed(1102)
torch.backends.cudnn.deterministic = True
is_cuda = True

In [6]:
# Setup for experiments
dtype = torch.float
device = torch.device("cuda:0")
N_per = 200 # permutation times
alpha = 0.05 # test threshold
n = 4000 
d = 10
x_in = d
H = 3*d 
x_out = 3*d
learning_rate = 0.00005
N_epoch = 1000 # number of training epochs
K = 10 # number of trails
N = 100 # number of test sets
N_f = 100.0 # number of test sets (float)

In [7]:
# Generate variance and co-variance matrix of Q 
Num_clusters = 2 
mu_mx = np.zeros([Num_clusters, d])
mu_mx[1] = mu_mx[1] + 0.5
sigma_mx_1 = np.identity(d) 
sigma_mx_2 = [np.identity(d), np.identity(d)]
sigma_mx_2[0][0,1] = 0.5
sigma_mx_2[0][1,0] = 0.5
sigma_mx_2[1][0,1] = -0.5
sigma_mx_2[1][1,0] = -0.5
s1 = np.zeros([n*Num_clusters, d])
s2 = np.zeros([n*Num_clusters, d])

In [8]:
# Naming variables 
Results = np.zeros([1,K])
J_star_u = np.zeros([N_epoch])
J_star_adp = np.zeros([N_epoch])
ep_OPT = np.zeros([K])
s_OPT = np.zeros([K])
s0_OPT = np.zeros([K])

In [9]:
from torch.cuda.amp import GradScaler, autocast 
scaler = GradScaler() 

# Repeat experiments K times (K = 10) and report average test power (rejection rate)
for kk in tqdm(range(K), desc="Experiment"):
    torch.manual_seed(kk * 19 + n)
    torch.cuda.manual_seed(kk * 19 + n)
    # Initialize parameters
    if is_cuda:
        model_u = ModelLatentF(x_in, H, x_out).cuda()
    else:
        model_u = ModelLatentF(x_in, H, x_out)
    epsilonOPT = torch.log(MatConvert(np.random.rand(1) * 10 ** (-10), device, dtype))
    epsilonOPT.requires_grad = True
    sigmaOPT = MatConvert(np.ones(1) * np.sqrt(2 * d), device, dtype)
    sigmaOPT.requires_grad = True
    sigma0OPT = MatConvert(np.ones(1) * np.sqrt(0.1), device, dtype)
    sigma0OPT.requires_grad = True
    # print(epsilonOPT.item())

    # Setup optimizer for training deep kernel
    optimizer_u = torch.optim.Adam(list(model_u.parameters()) + [epsilonOPT] + [sigmaOPT] + [sigma0OPT],
                                   lr=learning_rate)
    # Generate HDGM-D
    for i in range(Num_clusters):
        np.random.seed(seed=1102*kk + i + n)
        s1[n * (i):n * (i + 1), :] = np.random.multivariate_normal(mu_mx[i], sigma_mx_1, n)
    for i in range(Num_clusters):
        np.random.seed(seed=819*kk + 1 + i + n)
        s2[n * (i):n * (i + 1), :] = np.random.multivariate_normal(mu_mx[i], sigma_mx_2[i], n)
        # REPLACE above line with
        # s2[n * (i):n * (i + 1), :] = np.random.multivariate_normal(mu_mx[i], sigma_mx_1, n)
        # for validating type-I error (s1 ans s2 are from the same distribution)
    if kk==0:
        s1_o = s1
        s2_o = s2
    S = np.concatenate((s1, s2), axis=0)
    S = MatConvert(S, device, dtype)
    N1 = Num_clusters*n
    N2 = Num_clusters*n

    # Train deep kernel to maximize test power
    np.random.seed(seed=1102)
    torch.manual_seed(1102)
    torch.cuda.manual_seed(1102)
    for t in range(N_epoch):
        # Compute epsilon, sigma and sigma_0
        ep = torch.exp(epsilonOPT) / (1 + torch.exp(epsilonOPT))
        sigma = sigmaOPT ** 2
        sigma0_u = sigma0OPT ** 2
        
        optimizer_u.zero_grad()
        
        with autocast():
            # Compute output of the deep network
            modelu_output = model_u(S)
            # Compute J (STAT_u)
            TEMP = MMDu(modelu_output, N1, S, sigma, sigma0_u, ep)
            mmd_value_temp = -1 * (TEMP[0])
            mmd_std_temp = torch.sqrt(TEMP[1]+10**(-5))
            if mmd_std_temp.item() == 0:
                print('error!!')
            if np.isnan(mmd_std_temp.item()):
                print('error!!')
            STAT_u = torch.div(mmd_value_temp, mmd_std_temp)
        J_star_u[t] = STAT_u.item()
        scaler.scale(STAT_u).backward()
        scaler.step(optimizer_u)
        scaler.update()

        # Print MMD, std of MMD and J
        if t % 100 ==0:
            print("mmd_value: ", -1 * mmd_value_temp.item(), "mmd_std: ", mmd_std_temp.item(), "Statistic: ",
                  -1 * STAT_u.item())

    h_u, threshold_u, mmd_value_u = TST_MMD_u(model_u(S), N_per, N1, S, sigma, sigma0_u, ep, alpha, device, dtype)
    print("h:", h_u, "Threshold:", threshold_u, "MMD_value:", mmd_value_u)
    ep_OPT[kk] = ep.item()
    s_OPT[kk] = sigma.item()
    s0_OPT[kk] = sigma0_u.item()

    # Compute test power of deep kernel based MMD
    H_u = np.zeros(N)
    T_u = np.zeros(N)
    M_u = np.zeros(N)
    np.random.seed(1102)
    count_u = 0
    for k in tqdm(range(N), desc="Testing"):
        # Generate Blob-D
        for i in range(Num_clusters):
            np.random.seed(seed=1102 * (k+2) + 2*kk + i + n)
            s1[n * (i):n * (i + 1), :] = np.random.multivariate_normal(mu_mx[i], sigma_mx_1, n)
        for i in range(Num_clusters):
            np.random.seed(seed=819 * (k + 1) + 2*kk + i + n)
            s2[n * (i):n * (i + 1), :] = np.random.multivariate_normal(mu_mx[i], sigma_mx_2[i], n)
            # REPLACE above line with
            # s2[n * (i):n * (i + 1), :] = np.random.multivariate_normal(mu_mx[i], sigma_mx_1, n)
            # for validating type-I error (s1 ans s2 are from the same distribution)
        S = np.concatenate((s1, s2), axis=0)
        S = MatConvert(S, device, dtype)
        # Run two sample test (deep kernel) on generated data
        h_u, threshold_u, mmd_value_u = TST_MMD_u(model_u(S), N_per, N1, S, sigma, sigma0_u, ep, alpha, device, dtype)
        # Gather results
        count_u = count_u + h_u
        print("MMD-DK:", count_u, "Threshold:", threshold_u, "MMD_value:", mmd_value_u)
        H_u[k] = h_u
        T_u[k] = threshold_u
        M_u[k] = mmd_value_u
    # Print test power of MMD-D
    print("Test Power of MMD-D: ", H_u.sum() / N_f)
    Results[0, kk] = H_u.sum() / N_f
    print("Test Power of MMD-D (K times): ", Results[0])
    print("Average Test Power of MMD-D: ", Results[0].sum() / (kk + 1))
np.save('./Results_HDGM_n'+str(n)+'_d'+str(d)+'_H1_MMD-D', Results)

Experiment:   0%|          | 0/10 [00:00<?, ?it/s]

mmd_value:  3.55839729309082e-05 mmd_std:  0.003162210127555641 Statistic:  0.011252880578943147


OutOfMemoryError: CUDA out of memory. Tried to allocate 246.00 MiB. GPU 0 has a total capacty of 23.68 GiB of which 129.00 MiB is free. Process 760606 has 602.00 MiB memory in use. Including non-PyTorch memory, this process has 22.96 GiB memory in use. Of the allocated memory 8.79 GiB is allocated by PyTorch, and 115.95 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF