<a href="https://colab.research.google.com/github/rjin1/NICE_Experiments_Notebook/blob/main/NICE_Experiment_Main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [85]:
import numpy as np
import torch.optim as optim
import torch
import torch.utils.data as data
import torch.autograd as autograd

from scipy.io import loadmat
from scipy.io import savemat
import matplotlib.pyplot as plt

import sys
import os

import argparse

# np.set_printoptions(threshold=sys.maxsize)


In [86]:
def Gen_Source(loc_filename = './drive/MyDrive/NICE_JupyterNotebook/DistributionParams/GumbelLocatParams.mat', loc_varname = 'locat_param', 
               scale_filename = './drive/MyDrive/NICE_JupyterNotebook/DistributionParams/GumbelScaleParams.mat', sacle_varname = 'scale_param', 
               N_sample = 7668, seed_gs = 1):
  
  loc = loadmat(loc_filename)[loc_varname].astype(np.float64)
  scale = loadmat(scale_filename)[sacle_varname].astype(np.float64)

  # Assume the loc and scale are in same format np.array in (N_source x 1)
  N_source = loc.shape[0]
  S = np.zeros((N_source, N_sample), np.float64)

  # Control the RNG for repro
  np.random.seed(seed_gs)
  for i in range(N_source):
    S[i,:] = np.random.gumbel(loc[i,0], scale[i,0], (1, N_sample))

  return S

In [87]:
def Gen_Mixture(S, ratio=1):
  # Assume S is in np.array format in (N_souece x N_sample)
  N_source_sample = S.shape
  X = np.zeros((N_source_sample[0], N_source_sample[1]), np.float64)

  for i in range(N_source_sample[1]):
    for j in range(N_source_sample[0]):
      X[j,i] = S[j,i] + ratio * (S[j,i] * (np.sum(S[:,i]) - S[j,i]) + np.sum(S[:,i] ** 2) - S[j,i] ** 2)

  return X 

In [88]:
def Train_Valid_Split(Data, N_train, N_valid, seed_tvs):
  # Assume data in format np.array with (N_source x N_sample)
  N_sample = Data.shape[1]
  
  np.random.seed(seed_tvs)
  ind_all = np.random.permutation(N_sample)
  ind_train = ind_all[:N_train]
  ind_valid = ind_all[N_train:N_train+N_valid]

  Data_train = Data[:,ind_train]
  Data_valid = Data[:,ind_valid]

  return Data_train, Data_valid

In [89]:
if __name__ == '__main__':
    # ----- parse training settings:
    parser = argparse.ArgumentParser(description="Train a fresh NICE model and save.")
    # configuration settings:
    parser.add_argument("--dataset", default='fMRI', dest='dataset', choices=('tfd', 'cifar10', 'svhn', 'mnist'),
                        help="Dataset to train the NICE model on.")
    parser.add_argument("--epochs", dest='num_epochs', default=40000, type=int,
                        help="Number of epochs to train on. [1500]")
    parser.add_argument("--batch_size", dest="batch_size_train", default=10, type=int,
                        help="Number of training examples per batch. [16]")
    parser.add_argument("--Train_size", dest="size_train", default=5110, type=int,
                        help="Number of training examples. [5110]")
    parser.add_argument("--Validation_size", dest="size_valid", default=2558, type=int,
                        help="Number of validation examples. [2558]")
    parser.add_argument("--savedir", dest='savedir',
                        default="./drive/MyDrive/NICE_JupyterNotebook/Results/",
                        help="Where to save the results and trained models.")
    parser.add_argument("--initialization_path", dest='init_filepath',
                        default="./fMRI_data/",
                        help="Where to load the pretrained model parameters.")

    # validation and test:
    parser.add_argument("--val_batch_size", dest="batch_size_val", default=2558, type=int,
                        help="Number of validation examples per batch. [16]")
    parser.add_argument("--test_batch_size", dest="batch_size_test", default=7668, type=int,
                        help="Number of test examples per batch. [16]")
    parser.add_argument("--early_stop_iteration", dest="early_stop_iter", default=20, type=int,
                        help="Number of iterations for early stopping. [16]")

    # model settings:
    parser.add_argument("--nonlinearity_hiddens", dest='nhidden', default=2, type=int,
                        help="Hidden size of inner layers of nonlinearity. [1000]")
    parser.add_argument("--nonlinearity_dropout", dest='dropout_p', default=0.0, type=float,
                        help="The dropout probability in each layer (except scaling layer). [0.8]")
    parser.add_argument("--prior", choices=('logistic', 'Gumbel', 'prior', 'regression'), default='Gumbel',
                        help="Prior distribution of latent space components. [logistic]")
    parser.add_argument("--model_path", dest='model_path', default=None, type=str,
                        help="Continue from pretrained model. [None]")
    parser.add_argument("--uniform_init_interval", dest='init_interval', default=1e-2, type=float,
                        help="The interval of uniform initialization. [0.01]")

    # optimization settings:
    parser.add_argument("--lr", default=1e-3, dest='lr', type=float,
                        help="Learning rate for ADAM optimizer. [0.001]")
    parser.add_argument("--beta1", default=0.9, dest='beta1', type=float,
                        help="Momentum for ADAM optimizer. [0.9]")
    parser.add_argument("--beta2", default=0.999, dest='beta2', type=float,
                        help="Beta2 for ADAM optimizer. [0.01]")
    parser.add_argument("--eps", default=1e-8, dest='eps', type=float,
                        help="Epsilon for ADAM optimizer. [0.0001]")
    parser.add_argument("--lambda", default=0.0, dest='lmbda', type=float,
                        help="L1 weight decay coefficient. [0.0]")
    parser.add_argument("--DataRNGseed", dest="DataRNG_seed", default=1, type=int,
                        help="Data random number generator seed. [1]")
    parser.add_argument("--ModelRNGseed", dest="ModelRNG_seed", default=1, type=int,
                        help="Model random number generator seed. [1]")
    parser.add_argument('-f')
    args = parser.parse_args()

In [93]:
    # ----- run training loop over several epochs & save models for each epoch:
    # RNG control
    # torch.manual_seed(args.RNG_seed)
    # random.seed(args.DataRNG_seed)
    # np.random.seed(args.RNG_seed)
    torch.set_default_dtype(torch.double)
    
    # The params files are default in Gen_Source
    Sources = Gen_Source(N_sample=7668, seed_gs=args.DataRNG_seed)
    Data = Gen_Mixture(Sources, ratio=1)
    Data_train, Data_valid = Train_Valid_Split(Data, args.size_train, args.size_valid, args.DataRNG_seed)

    train(args, Data_train, Data_valid, Data)

(16, 2558)