In [2]:
%run dihiggs_dataset.ipynb

import argparse
import os
import numpy as np
import math
import sys

import torchvision.transforms as transforms
from torchvision.utils import save_image

from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable

import torch.nn as nn
import torch.nn.functional as F   # NOTE: I don't think this is used
import torch.autograd as autograd
import torch

from IPython import display
from matplotlib import pyplot as plt

#
print(torch.cuda.is_available())

False


In [6]:
#os.makedirs("images", exist_ok=True)

parser = argparse.ArgumentParser()

parser.add_argument("--n_epochs", type=int, help="number of epochs of training")
parser.add_argument("--batchSize", type=int, help="size of the batches")
parser.add_argument("--learningRate", type=float, help="adam: learning rate")
parser.add_argument("--beta1", type=float, help="adam: decay of first order momentum of gradient")
parser.add_argument("--beta2", type=float, help="adam: decay of first order momentum of gradient")
parser.add_argument("--latentSpaceSize", type=int, help="dimensionality of the latent space")
#parser.add_argument("--n_critic", type=int, default=5, help="number of training steps for discriminator per iter")
parser.add_argument("--lrDecayRate", type=float, help="Learning rate decay rate")
parser.add_argument("--width", type=int, help="Width of network")
parser.add_argument("--depth", type=int, help="Depth of network")
parser.add_argument("--activationFunction", type=str, help="Activation function used")
parser.add_argument("--alpha", type=float, help="Alpha value for LeakyRelu (if used)")
parser.add_argument("--batch_norm", type=bool, help="True or false value for using batch normalization")
parser.add_argument("--configDir", type=str, help="Directory to save data to")

opt = parser.parse_args()
print(opt)

class opt_old():   # Class used for optimizers in the future. Defines all variables and stuff needed.
    n_epochs = 20000   # an epoch is the number of times it works through the entire training set
    batch_size = 1000   # the training set is broken up into batches, 
                        # and the average loss is used from a given batch for back propagation
    lr =  0.0002 # 0.001   # learning rate (how much to change based on error)
    b1 = 0     # 0.9 # Used for Adam. Exponential decay rate for the first moment. 
    b2 = 0.9   # 0.999 # Used for Adam. Exponential decay rate for the second moment estimates (gradient squared)
    #NOTE: The default epsilon for torch.optim.adam is 1e-8, so I will just leave it that way
    
    #n_cpu = 2   # not used rn
    latent_dim = 100 #size of noise input to generator (latent space) 
    #img_size = 28
    # channels = 1   # Only used for img_shape right below, and img_shape isn't needed
    n_critic = 5   # The generator is trained after this many critic steps
    #   clip_value = 0.01   # No other usages rn. 
    sample_interval = 400   # Determines when a to save the image(s?) generated
    
    Xi = 10;   # multiplier for recursively finding r_adversarial
    
    # Loss weight for alp penalty
    lambda_alp = 100

# img_shape = (opt.channels, opt.img_size, opt.img_size)   # Not used rn

cuda = True if torch.cuda.is_available() else False

sample_interval = 400
n_critic = 5

try:
    os.makedirs(opt.configDir)
except OSError:
    logging.warning("Output folders already exist. May overwrite some output files.")

class Generator(nn.Module):
    """
    Create hidden layers. Apply normalization. Apply leaky relu. 
    """
    def __init__(self):
        super(Generator, self).__init__()   

        def block(in_feat, out_feat, normalize=True):   # This function creates the hidden layers
            layers = [nn.Linear(in_feat, out_feat)]   # layer is a hidden layer. Takes input
                                                      # (batch_size,in_feat) and give an output (batch_size,out_feat)
            if normalize:
                layers.append(nn.BatchNorm1d(out_feat, 0.8))   # adds normalization to what Layers does to input and comes out in
                                                               # size (batch_size,out_feat). I think this does bn1d(linear(input))
            layers.append(nn.LeakyReLU(0.2, inplace=True))   # inplace means just modify input, don't allocate more memory
            return layers



        """
        stores layers and functions applied to layers
        """   
        if os.path.exists(opt.configDir + "/generatorModel.pt"): 
            self.model = torch.load(opt.configDir + "/generatorModel.pt")
        else:
            self.model = nn.Sequential(   
                *block(opt.latent_dim, 128, normalize=False),   # first layer
                *block(128, 256),   # second layer
                *block(256, 512),   # 3rd layer
                *block(512, 1024),   # 4th layer
                nn.Linear(1024, 25),   # final layer. Output is size 25
                nn.Tanh()   # Using tanh for final output (why tanh vs leaky relu?)
            )

    def forward(self, z):
        """
        applies layers to input to get img
        """
        img = self.model(z)   # applies model (layers and functions on layers) to z
        #img = img.view(img.shape[0], *img_shape)
        return img


class Discriminator(nn.Module):
    """
    Discriminator/critic layers
    """
    def __init__(self):
        super(Discriminator, self).__init__()   # Just uses the module constructor with name Discriminator 

        if os.path.exists(opt.configDir + "/discriminatorModel.pt"): 
            self.model = torch.load(opt.configDir + "/discriminatorModel.pt")
        else:
            self.model = nn.Sequential(
                nn.Linear(25, 512),   # first layer
                nn.LeakyReLU(0.2, inplace=True),   # apply leaky relu to layer
                nn.Linear(512, 256),   # 2nd layer
                nn.LeakyReLU(0.2, inplace=True),   # apply leaky relu to layer
                nn.Linear(256, 1),   # Final layer to give output. Output is size 1 (validity score)
                                     # NOTE: weird to end with comma
            )

    def forward(self, img):
        """
        applies model to image and gives validity score
        """
        img_flat = img.view(img.shape[0], -1)   # TODO: Figure out what this does 
        validity = self.model(img_flat)   # calculates validity score
        #print("forward validity from discriminator: " + str((np.max(np.abs(validity.detach().numpy())))))
        return validity


# ******* OUT OF CLASSES NOW ************

# Initialize generator and discriminator
generator = Generator()
discriminator = Discriminator()

if cuda:
    generator.cuda()
    discriminator.cuda()
    
    
# Configure data loader - CHANGE
os.makedirs("./data/", exist_ok=True)
dataloader = torch.utils.data.DataLoader(
   DiHiggsSignalMCDataset('./DiHiggs Data', generator_level = False),
   batch_size=opt.batch_size,
   shuffle=True,
)
print('done')

usage: ipykernel_launcher.py [-h] [--n_epochs N_EPOCHS]
                             [--batchSize BATCHSIZE]
                             [--learningRate LEARNINGRATE] [--beta1 BETA1]
                             [--beta2 BETA2]
                             [--latentSpaceSize LATENTSPACESIZE]
                             [--lrDecayRate LRDECAYRATE] [--width WIDTH]
ipykernel_launcher.py: error: unrecognized arguments: -f /home/mcnama20/.local/share/jupyter/runtime/kernel-94a4bc80-bdd1-4c69-a965-1fb39dc52a69.json


SystemExit: 2

In [5]:
%tb

SystemExit: 2

In [4]:
def compute_ALP(D, real_samples, fake_samples):   # TODO: Find out why these are .data
    """
    Calculates the gradient penalty loss for WGAN GP
    D input will be discrimantor function
    real_samples and fake_samples are from reality and generator. Both are sent in via memory location of buffer
    
    """
    
    # Random weight term for interpolation between real and fake samples (how much of each)
    alpha = Tensor(np.random.random((real_samples.size(0),1)))   # This is a tensor designating which to use where
    #print(alpha)
  #  print(alpha.shape)
    # Get random interpolation between real and fake samples
   # print(real_samples.shape)
    
    # Gets some of real and some of fake samples for gradient penalty calculation
    interpolates = (alpha * real_samples + ((1 - alpha) * fake_samples)).requires_grad_(True)
    # .requires grad is something attached to all tensors and can be used to speed up (by making false I think)
    # It is automatically false, but if you need gradient then set to be true
    # TODO: Understand how this statement works
    
    
    ################## CALCULATE R ADVERSARIAL ###############################################
    # start with random unit vector r0
    r0 = np.random.rand(interpolates.shape[0], interpolates.shape[1])
    r0 = Tensor(r0/r0.max(axis = 0)).requires_grad_(True)
    #print(r[0])
    
    #  add this initial r to our random data points
    interpol_y0 = (interpolates + opt.Xi * r0).requires_grad_(True)   #.requires_grad_(True)
    # run the discriminator on both of these
    d_interpolates = D(interpolates)   # Run discriminator on interpolates to get validity scores
    d_interpol_y0 = D(interpol_y0)   # do the same for the adjusted interpolates to find r adversarial

    
    # find gradient(d(f(x) - f(x+r)))
    difference = (d_interpolates - d_interpol_y0).requires_grad_(True)  #.requires_grad_(True)
    #print("d interpolates: " + str(d_interpolates.shape) + " " + str(d_interpolates.type))
    #print("difference: " + str(difference.shape) + " " + str(difference.type))
    fake = Variable(Tensor(real_samples.shape[0], 1).fill_(1.0), requires_grad=False) 
    gradient_r0 = autograd.grad(
        outputs=difference,
        inputs=r0,
        grad_outputs=fake,
        create_graph=True,
        retain_graph=True,
        only_inputs=True
    )[0]
    
    # finally, find r_adversarial!
    epsilon_r = np.random.uniform(0.1,10)
    r_adv = epsilon_r * gradient_r0/np.linalg.norm(gradient_r0.cpu().detach().numpy())
    #print(np.max(np.linalg.norm(r_adv.cpu().detach().numpy())))
###########################################################################################################

######### Now find the loss ###########################
    
    interpol_adversarial = (interpolates + r_adv).requires_grad_(True)
    d_interpol_adv = D(interpol_adversarial)
    abs_difference = np.abs((d_interpolates - d_interpol_adv).cpu().detach().numpy())/ \
    (np.linalg.norm(r_adv.cpu().detach().numpy())) - 1
    squared = np.square(np.maximum(abs_difference,np.zeros(100)))
    #print("Max of alp before mean: " + str(np.max(np.abs(squared))))
    
    alp_penalty = squared.mean()
   # print("ALP final: " + str(alp_penalty))
    
    return alp_penalty


In [None]:
batches_done = 0   # Counter for batches
for epoch in range(opt.n_epochs):   # Loop through all epochs
    for i, x in enumerate(dataloader): # x is in dataloader (a batch I think). i
                                       # is the index of x (number of times critic is trained this epoch)

        # Configure input
        real_imgs = Variable(x.type(Tensor))   # Variable is a wrapper for the Tensor x was just made into

        # ---------------------
        #  Train Discriminator
        # ---------------------

        optimizer_D.zero_grad()   # Make gradients zero so they don't accumulate

        # Sample noise (latent space) to make generator input
        z = Variable(Tensor(np.random.normal(0, 1, (x.shape[0], opt.latent_dim))))   # Once again Variable wraps the Tensor
#         print(type(x))
#         print(x.shape)
#         print(x[0].shape())
#         print(z.shape)

        # Generate a batch of images from the latent space sampled
        fake_imgs = generator(z)

        #print(fake_imgs[0])

        # Calculate validity score for real images
        real_validity = discriminator(real_imgs)

        # Calculate validity score for fake images
        fake_validity = discriminator(fake_imgs)

        # Calculate gradient penalty
        alp = compute_ALP(discriminator, real_imgs.data, fake_imgs.data)
        # TODO: figure out why .data is used

        # Calculate loss for critic (Adversarial loss)
        d_loss = -torch.mean(real_validity) + torch.mean(fake_validity) + opt.lambda_alp * alp

        d_loss.backward()   # Do back propagation 
        optimizer_D.step()   # Update parameters based on gradients for individuals

        optimizer_G.zero_grad()   # Resets gradients for generator to be zero to avoid accumulation

        # Train the generator every n_critic steps
        if i % opt.n_critic == 0:

            # -----------------
            #  Train Generator
            # -----------------

            # Generate a batch of images
            fake_imgs = generator(z)

            # Loss measures generator's ability to fool the discriminator
            # Train on fake images
            fake_validity = discriminator(fake_imgs)
            g_loss = -torch.mean(fake_validity)

            g_loss.backward()
            optimizer_G.step()


            # ----------------------------
            # Save stuff when time is right
            # ----------------------------
            if batches_done % 10 == 0:
                print(
                    "[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]"
                    % (epoch, opt.n_epochs, i, len(dataloader), d_loss.item(), g_loss.item())
                )

            if batches_done % sample_interval == 0:
                save_image(fake_imgs.data[:25], "images/%d.png" % batches_done, nrow=5, normalize=True)

            batches_done += n_critic
    if epoch % 10 == 0:
        z = Variable(Tensor(np.random.normal(0, 1, (300000, opt.latent_dim))))
        fake_data = generator(z)
        np.save('./gen_data_alp/{num_batches}.npy'.format(num_batches=batches_done), fake_data.cpu().detach().numpy())

torch.save(generator.model, opt.configDir + "/generatorModel.pt")
torch.save(discriminator.model, opt.configDir + "/discriminatorModel.pt")

[Epoch 0/20000] [Batch 0/192] [D loss: 0.604934] [G loss: -6.642289]
[Epoch 0/20000] [Batch 10/192] [D loss: 0.578657] [G loss: -8.013991]
[Epoch 0/20000] [Batch 20/192] [D loss: 0.500111] [G loss: -7.837383]
[Epoch 0/20000] [Batch 30/192] [D loss: 0.296897] [G loss: -6.350225]
[Epoch 0/20000] [Batch 40/192] [D loss: -0.018437] [G loss: -4.221060]
[Epoch 0/20000] [Batch 50/192] [D loss: -0.230345] [G loss: -3.197325]
[Epoch 0/20000] [Batch 60/192] [D loss: -0.189514] [G loss: -3.848126]
[Epoch 0/20000] [Batch 70/192] [D loss: -0.048276] [G loss: -4.697019]
[Epoch 0/20000] [Batch 80/192] [D loss: 0.117612] [G loss: -3.215794]
[Epoch 0/20000] [Batch 90/192] [D loss: 0.079117] [G loss: -1.266546]
[Epoch 0/20000] [Batch 100/192] [D loss: -0.128019] [G loss: -0.325409]
[Epoch 0/20000] [Batch 110/192] [D loss: -0.297198] [G loss: 0.785536]
[Epoch 0/20000] [Batch 120/192] [D loss: -0.397040] [G loss: 2.727832]
[Epoch 0/20000] [Batch 130/192] [D loss: -0.391507] [G loss: 3.362292]
[Epoch 0/200

[Epoch 6/20000] [Batch 0/192] [D loss: -0.005962] [G loss: 10.589444]
[Epoch 6/20000] [Batch 10/192] [D loss: -0.104293] [G loss: 11.610182]
[Epoch 6/20000] [Batch 20/192] [D loss: -0.140729] [G loss: 8.688994]
[Epoch 6/20000] [Batch 30/192] [D loss: -0.157740] [G loss: 10.643454]
[Epoch 6/20000] [Batch 40/192] [D loss: -0.174800] [G loss: 9.535302]
[Epoch 6/20000] [Batch 50/192] [D loss: -0.170429] [G loss: 10.055379]
[Epoch 6/20000] [Batch 60/192] [D loss: -0.200838] [G loss: 11.210100]
[Epoch 6/20000] [Batch 70/192] [D loss: -0.194701] [G loss: 9.948377]
[Epoch 6/20000] [Batch 80/192] [D loss: -0.199996] [G loss: 11.272081]
[Epoch 6/20000] [Batch 90/192] [D loss: -0.240033] [G loss: 11.380981]
[Epoch 6/20000] [Batch 100/192] [D loss: -0.274364] [G loss: 10.467807]
[Epoch 6/20000] [Batch 110/192] [D loss: -0.272298] [G loss: 11.119071]
[Epoch 6/20000] [Batch 120/192] [D loss: -0.238013] [G loss: 12.129158]
[Epoch 6/20000] [Batch 130/192] [D loss: -0.206676] [G loss: 11.555389]
[Epoch

[Epoch 11/20000] [Batch 175/192] [D loss: -0.190287] [G loss: 17.639114]
[Epoch 11/20000] [Batch 185/192] [D loss: -0.273409] [G loss: 18.597532]
[Epoch 12/20000] [Batch 0/192] [D loss: -0.041637] [G loss: 16.088005]
[Epoch 12/20000] [Batch 10/192] [D loss: -0.228434] [G loss: 17.940094]
[Epoch 12/20000] [Batch 20/192] [D loss: -0.220020] [G loss: 19.304050]
[Epoch 12/20000] [Batch 30/192] [D loss: -0.248777] [G loss: 17.114403]
[Epoch 12/20000] [Batch 40/192] [D loss: -0.198954] [G loss: 16.356569]
[Epoch 12/20000] [Batch 50/192] [D loss: -0.255960] [G loss: 19.133614]
[Epoch 12/20000] [Batch 60/192] [D loss: -0.173138] [G loss: 17.787251]
[Epoch 12/20000] [Batch 70/192] [D loss: -0.227362] [G loss: 18.206762]
[Epoch 12/20000] [Batch 80/192] [D loss: -0.313465] [G loss: 20.368313]
[Epoch 12/20000] [Batch 90/192] [D loss: -0.304316] [G loss: 18.738594]
[Epoch 12/20000] [Batch 100/192] [D loss: -0.431522] [G loss: 14.824829]
[Epoch 12/20000] [Batch 110/192] [D loss: -0.233015] [G loss: 

[Epoch 17/20000] [Batch 145/192] [D loss: -0.319660] [G loss: 24.496155]
[Epoch 17/20000] [Batch 155/192] [D loss: -0.264248] [G loss: 24.460323]
[Epoch 17/20000] [Batch 165/192] [D loss: -0.310659] [G loss: 21.880833]
[Epoch 17/20000] [Batch 175/192] [D loss: -0.238657] [G loss: 23.847227]
[Epoch 17/20000] [Batch 185/192] [D loss: -0.316849] [G loss: 25.053463]
[Epoch 18/20000] [Batch 0/192] [D loss: -0.098419] [G loss: 24.104284]
[Epoch 18/20000] [Batch 10/192] [D loss: -0.197563] [G loss: 24.857315]
[Epoch 18/20000] [Batch 20/192] [D loss: -0.208632] [G loss: 26.766588]
[Epoch 18/20000] [Batch 30/192] [D loss: -0.302195] [G loss: 22.011942]
[Epoch 18/20000] [Batch 40/192] [D loss: -0.298580] [G loss: 25.388029]
[Epoch 18/20000] [Batch 50/192] [D loss: -0.292238] [G loss: 24.419790]
[Epoch 18/20000] [Batch 60/192] [D loss: -0.359352] [G loss: 24.761837]
[Epoch 18/20000] [Batch 70/192] [D loss: -0.359865] [G loss: 23.527470]
[Epoch 18/20000] [Batch 80/192] [D loss: -0.254747] [G loss:

[Epoch 23/20000] [Batch 115/192] [D loss: -0.616680] [G loss: 36.407173]
[Epoch 23/20000] [Batch 125/192] [D loss: -0.434864] [G loss: 33.589176]
[Epoch 23/20000] [Batch 135/192] [D loss: -0.614326] [G loss: 38.279881]
[Epoch 23/20000] [Batch 145/192] [D loss: -0.388538] [G loss: 31.803207]
[Epoch 23/20000] [Batch 155/192] [D loss: -0.499565] [G loss: 31.685579]
[Epoch 23/20000] [Batch 165/192] [D loss: -0.433514] [G loss: 35.964622]
[Epoch 23/20000] [Batch 175/192] [D loss: -0.523594] [G loss: 39.317886]
[Epoch 23/20000] [Batch 185/192] [D loss: -0.416794] [G loss: 32.944157]
[Epoch 24/20000] [Batch 0/192] [D loss: -0.253567] [G loss: 36.204845]
[Epoch 24/20000] [Batch 10/192] [D loss: -0.654800] [G loss: 31.187334]
[Epoch 24/20000] [Batch 20/192] [D loss: -0.582165] [G loss: 35.440910]
[Epoch 24/20000] [Batch 30/192] [D loss: -0.501610] [G loss: 37.089844]
[Epoch 24/20000] [Batch 40/192] [D loss: -0.551468] [G loss: 32.319363]
[Epoch 24/20000] [Batch 50/192] [D loss: -0.577774] [G lo