In [None]:
jupyter nbextension enable beakerx --py --sys-prefix

In [1]:
%classpath add mvn org.nd4j nd4j-native-platform 0.7.2

In [2]:
%classpath add mvn org.nd4j nd4s_2.11 0.7.2


In [3]:
%classpath add mvn org.vegas-viz vegas_2.11 0.3.11

In [4]:
%classpath add jar target/scala-2.11/scala-miniflow_2.11-0.1.0-SNAPSHOT.jar

# Generative Adversarial Networks

The objective today is to use a neural network library written from scratch to create a generative adversarial network from which we will create data that resembles the ever so popular MNIST data set.  

First let's get a few questions out of the way

## Why?

1.  Scala - because it is a great language
2.  ND4J - it is the main linear algebra library for DL4J (Deep learning 4 Java).  The author's objective was to shorten the gap between JVM languages and Numpy or Matlab
3.  From Scratch - it is challenging a.f. and a lot of fun.  

##  Feed Forward Neural networks

Here we will begin to get into how we will model our neural network framework. Per Deep Learning by Goodfellow, the feedforward neural network is called such because

* (feedforward) information flows through the function being evaluated from the input $x$, through intermediate computations used to define $f$, and finally to the output $y$. We are not considering any feedback connections
* (network) They are typically represented by composing together many different functions. The model is associated with a directy acyclic graph describing how functions are componsed together.
* (neural) The models are loosly inspired by neuroscience.  

Albert did such a great job on from scratch that I'm not going to bother.  

Our Node will
* take as arguments, incoming nodes
* have a method which captures outbound nodes
* has a forward method (feed forward)
* has a backward method (for back propagation)

We'll demonstrate a simple example of using the framework to estimate a linear regression on the boston dataset, and then we'll jump into the GAN

In [194]:
import org.nd4j.linalg.factory.Nd4j
import org.nd4s.Implicits._
import org.nd4j.linalg.api.ndarray.INDArray
import org.nd4s.Implicits._

import com.github.timsetsfire.nn.node._
import com.github.timsetsfire.nn.activation._
import com.github.timsetsfire.nn.costfunctions._
import com.github.timsetsfire.nn.regularization._
import com.github.timsetsfire.nn.optimize._
import com.github.timsetsfire.nn.graph._

import org.nd4j.linalg.factory.Nd4j
import org.nd4s.Implicits._
import org.nd4j.linalg.api.ndarray.INDArray
import org.nd4s.Implicits._
import com.github.timsetsfire.nn.node._
import com.github.timsetsfire.nn.activation._
import com.github.timsetsfire.nn.costfunctions._
import com.github.timsetsfire.nn.regularization._
import com.github.timsetsfire.nn.optimize._
import com.github.timsetsfire.nn.graph._


In [9]:
val x = new Input()  
x.setName("features")
val y = new Input()  
y.setName("target")
val w = new Variable()
w.setName("weights")
val b = new Variable()
b.setName("bias")
val yhat = new Linear(x,w,b)
//val yhat = (x * w) + b
yhat.setName("prediction")
val mse = new MSE(y, yhat)

val regression = topologicalSort{ 
    buildGraph(mse) 
}

regression.foreach(println)
""

{weights}@44b16a2e
{features}@387742e3
{bias}@208dc62a
{target}@3cbb1c7e
{prediction}@70e79f7a
MSE@7910b546




In [10]:
val x_ = Nd4j.readNumpy("resources/boston_x.csv", ",")
val y_ = Nd4j.readNumpy("resources/boston_y.csv", ",")
""



In [11]:
val Array(xrows,nfeatures) = x_.shape
print(s"rows: $xrows, columns: $nfeatures")
""

rows: 506, columns: 13



## Variable Initilization

We need to initalize the bias and the weights.  For out purposes, we'll just set them equal to zero

In [12]:
b.forward(Nd4j.zeros(1,1))
w.forward(Nd4j.zeros(13,1))

null

In [13]:
// standardize data 
val xs_ = x_.subRowVector(x_.mean(0)).divRowVector( x_.std(0))
val ys_ = y_.subRowVector(y_.mean(0)).divRowVector( y_.std(0))
val data = Nd4j.concat(1, ys_, xs_);
val epochs = 500
val batchSize = 100
val stepsPerEpoch = xrows / batchSize
""



In [14]:
val sgd = new GradientDescent(regression, learningRate = 0.1)
for(epoch <- 0 to epochs) {
  var loss = 0d
  for(j <- 0 until stepsPerEpoch) {

    Nd4j.shuffle(data, 1)

    val feedDict: Map[Node, Any] = Map(
      x -> data.getColumns( (1 to nfeatures):_*).getRows((0 until batchSize):_*),
      y -> data.getColumn(0).getRows((0 until batchSize):_*)
    )

    sgd.optimize(feedDict)

    loss += mse.value(0,0)
  }
  if(epoch % 50 == 0)  println(s"Epoch: ${epoch}, Loss: ${loss/stepsPerEpoch.toDouble}")
}

Epoch: 0, Loss: 0.5307063281536102
Epoch: 50, Loss: 0.27538190484046937
Epoch: 100, Loss: 0.2489509642124176
Epoch: 150, Loss: 0.3016616731882095
Epoch: 200, Loss: 0.29843970835208894
Epoch: 250, Loss: 0.280102527141571
Epoch: 300, Loss: 0.21600520610809326
Epoch: 350, Loss: 0.2709260553121567
Epoch: 400, Loss: 0.2555361956357956
Epoch: 450, Loss: 0.2606124997138977
Epoch: 500, Loss: 0.29919455349445345


null

In [15]:
x.forward(xs_)
y.forward(ys_)
regression.foreach( _.forward())

null

In [16]:
println(s"final cost: ${mse.value}")
""

final cost: 0.27




## GAN

In [82]:
import com.github.timsetsfire.nn.node._
import com.github.timsetsfire.nn.activation._
import com.github.timsetsfire.nn.costfunctions._
import com.github.timsetsfire.nn.batchnormalization._
import com.github.timsetsfire.nn.regularization.Dropout
import com.github.timsetsfire.nn.optimize._
import com.github.timsetsfire.nn.graph._

import scala.util.Try
import org.nd4j.linalg.factory.Nd4j
import org.nd4j.linalg.api.ndarray.INDArray
import org.nd4j.linalg.ops.transforms.Transforms.{sigmoid,exp,log,pow,sqrt}
import org.nd4s.Implicits._



import com.github.timsetsfire.nn.node._
import com.github.timsetsfire.nn.activation._
import com.github.timsetsfire.nn.costfunctions._
import com.github.timsetsfire.nn.batchnormalization._
import com.github.timsetsfire.nn.regularization.Dropout
import com.github.timsetsfire.nn.optimize._
import com.github.timsetsfire.nn.graph._
import scala.util.Try
import org.nd4j.linalg.factory.Nd4j
import org.nd4j.linalg.api.ndarray.INDArray
import org.nd4j.linalg.ops.transforms.Transforms.{sigmoid, exp, log, pow, sqrt}
import org.nd4s.Implicits._


## Get and Import Data 



Run `curl -s https://pjreddie.com/media/files/mnist_test.csv > resources/mnist_test.csv` in terminal

In [85]:
val x_ = Nd4j.readNumpy("resources/mnist_test.csv", ",").getColumns( (1 until 785):_*).div(255d)

""



## Inputs

For inputs, we'll have fake data, real data, and labels.  

In [195]:
val images = new Input()
images.setName("images")
val labels = new Input()
labels.setName("labels")
val noise = new Input()  // noise is used to generate fake images
noise.setName("noise")
val fakeLabels = new Input()
fakeLabels.setName("fake_labels")

null

## Generator Network

This is the network that will generate fake data.  

In [196]:
val h1Generator= LeakyReLU(noise, (100,128), 0.2)
h1Generator.setName("generator_hidden1")

val h2Generator= LeakyReLU(h1Generator, (128, 256), 0.2)
h2Generator.setName("generator_hidden2")

val h3Generator= LeakyReLU(h2Generator, (256, 512), 0.2)
h3Generator.setName("generator_hidden3")

val fakeImages = Sigmoid(h3Generator, (512,784))
fakeImages.setName("fake_images")

null

In [197]:
val generatorNetwork = buildGraph(fakeImages)
val generator = topologicalSort(generatorNetwork)
val generatorTrainables = generator.filter{ _.getClass.getSimpleName == "Variable" }

[[Variable@1df7920a, Variable@1c0880f1, Variable@10fbbcca, Variable@45a08d47, Variable@6db8eb0e, Variable@223a12bd, Variable@2a629dd0, Variable@8707d6f]]

## Discriminator Network

In [198]:
val h1Discrim = LeakyReLU(images, (784,256), 0.1)
h1Discrim.setName("discriminator_hidden_layer1")

val d1 = new Dropout(h1Discrim, 0.20)
d1.setName("dropout_h1_layer")

val h2Discrim = LeakyReLU(d1, (256,64), 0.1)
h2Discrim.setName("discriminator_hidden_layer2")

val d2 = new Dropout(h2Discrim, 0.20)
d2.setName("dropout_h2_layer")

val h3Discrim = LeakyReLU(d2, (64,16), 0.1)
h3Discrim.setName("discriminator_hidden_layer3")

val logits = Linear(h3Discrim, (16, 1))
logits.setName("discriminator_logits")

val cost = new BceWithLogits(labels, logits)
cost.setName("cost_function")

null

In [199]:
val discriminatorNetwork = buildGraph(cost)
val discriminator = topologicalSort(discriminatorNetwork)
val discriminatorTrainables = discriminator.filter{ _.getClass.getSimpleName == "Variable" }

[[Variable@45628127, Variable@707c13f8, Variable@7b81667, Variable@32cc16f8, Variable@4b5b1bad, Variable@c97378f, Variable@5591bbbe, Variable@53dbbd83]]

In [200]:
// initialize generator and discriminator parameters
discriminatorTrainables.foreach{ node =>
    val (m,n) = node.size
    node.value = Nd4j.randn(m.asInstanceOf[Int], n.asInstanceOf[Int]) * math.sqrt(3/(m.asInstanceOf[Int].toDouble + n.asInstanceOf[Int].toDouble))
  }

// initialize generator and discriminator
generatorTrainables.foreach{ node =>
    val (m,n) = node.size
    node.value = Nd4j.randn(m.asInstanceOf[Int], n.asInstanceOf[Int]) * math.sqrt(3/(m.asInstanceOf[Int].toDouble + n.asInstanceOf[Int].toDouble))
  }


null

## Setting up first and second moment maps for Adam Optimizer

In [201]:
val Array(xrows, xcols) = x_.shape
val batchSize = 128
val stepsPerEpoch = xrows / batchSize

val firstMomentGenerator = generatorTrainables.map{ i => (i, Nd4j.zerosLike(i.value))}.toMap
val secondMomentGenerator = generatorTrainables.map{ i => (i, Nd4j.zerosLike(i.value))}.toMap
val firstMomentDiscriminator = discriminatorTrainables.map{ i => (i, Nd4j.zerosLike(i.value))}.toMap
val secondMomentDiscriminator = discriminatorTrainables.map{ i => (i, Nd4j.zerosLike(i.value))}.toMap
val t = new java.util.concurrent.atomic.AtomicInteger

0

In [202]:
def setDropoutTraining(n: Node, training: Boolean = false): Unit = {
  n.asInstanceOf[Dropout[Node]].train = training
}

setDropoutTraining: (n: com.github.timsetsfire.nn.node.Node, training: Boolean)Unit


In [203]:
var stepSize: Double = 0.002 // 0.001 default
val beta1: Double = 0.2  // 0.9 default
val beta2: Double = 0.999  // 0.999 default
val delta: Double = 1e-8

1.0E-8

In [205]:
val noiseDataForPicture = Nd4j.rand(16,100).mul(2).sub(1)
OutputCell.HIDDEN

In [210]:
for(epoch <- 0 to 20) {

      var loss = 0d
      var genCost = 0d
      var n = 0d
      for(steps <- 0 to stepsPerEpoch) {

        t.addAndGet(1)

        val noiseData = Nd4j.rand(batchSize,100).mul(2).sub(1)
        val fakeLabelData = Nd4j.ones(batchSize, 1)

        val generatorFeedDict: Map[Node, INDArray] = Map(
          noise -> noiseData,
          fakeLabels -> fakeLabelData
        )

        // generator
        discriminator.filter{ _.getClass.getSimpleName == "Dropout"}.foreach(d => setDropoutTraining(d, false))
        generatorFeedDict.foreach{ case (n, v) => n.forward(v)}
        generator.foreach(_.forward())
        images.forward(fakeImages.value)
        labels.forward(fakeLabels.value)
        discriminator.foreach(_.forward())
        discriminator.reverse.foreach(_.backward())
        //fakeImages.gradients(fakeImages) = images.gradients(images)
        fakeImages.backward(images.gradients(images).dup)
        generator.reverse.tail.foreach(_.backward())
        // still need to update parameters of generator
        genCost += (cost.value.sumT*batchSize)

        for( n <- generatorTrainables) {
          firstMomentGenerator(n).muli(beta1).addi(n.gradients(n).mul(1 - beta1))
          secondMomentGenerator(n).muli(beta2).addi( pow(n.gradients(n),2).mul(1 - beta2))
          val fhat = firstMomentGenerator(n).div(1 - math.pow(beta1, t.get))
          val shat = secondMomentGenerator(n).div(1 - math.pow(beta2, t.get))
          n.value.addi( fhat.mul(-stepSize).div(sqrt(shat).add(delta)))
        }


        generator.foreach(_.forward())
        val fakeImageData = fakeImages.value
        Nd4j.shuffle(x_,1)
        val realImageData = x_.getRows((0 until batchSize):_*)
        val realLabelData = Nd4j.ones(batchSize, 1)
        val fakeLabelData0 = Nd4j.zeros(batchSize, 1)

        val labelData = Nd4j.concat(0, fakeLabelData0, realLabelData)
        val imageData = Nd4j.concat(0, fakeImageData, realImageData)
        val discriminatorFeedDict: Map[Node, INDArray] = Map(
          images -> imageData,
          labels -> labelData
        )

        discriminator.filter{ _.getClass.getSimpleName == "Dropout"}.foreach(d => setDropoutTraining(d, true))
        discriminatorFeedDict.foreach{ case (n, v) => n.forward(v)}
        discriminator.foreach(_.forward())
        discriminator.reverse.foreach(_.backward())

        for( n <- discriminatorTrainables) {
          firstMomentDiscriminator(n).muli(beta1).addi(n.gradients(n).mul(1d - beta1))
          secondMomentDiscriminator(n).muli(beta2).addi( pow(n.gradients(n),2).mul(1d - beta2))
          val fhat = firstMomentDiscriminator(n).div(1 - math.pow(beta1, t.get))
          val shat = secondMomentDiscriminator(n).div(1 - math.pow(beta2, t.get))
          n.value.addi( fhat.mul(-stepSize).div(sqrt(shat).add(delta)))
        }

        loss += ((cost.value(0,0)) * images.value.shape.apply(0))
        n += images.value.shape.apply(0)
      }
      // if(epoch % 1000 == 0) stepSize /= 2d
      if(epoch % 10 == 0) {
        print(f"epoch: ${epoch}")
        print(f"\tdiscriminator -> loss: ${loss / n.toDouble}%2.3f")
        println(f"\tgenerator -> loss: ${genCost / (n.toDouble/2d)}%2.3f")
          
        
      }
}

epoch: 0	discriminator -> loss: 0.703	generator -> loss: 0.791
epoch: 10	discriminator -> loss: 0.280	generator -> loss: 2.824
epoch: 20	discriminator -> loss: 0.061	generator -> loss: 3.083
epoch: 30	discriminator -> loss: 0.466	generator -> loss: 1.243
epoch: 40	discriminator -> loss: 0.537	generator -> loss: 0.976
epoch: 50	discriminator -> loss: 0.570	generator -> loss: 0.908
epoch: 60	discriminator -> loss: 0.565	generator -> loss: 0.961
epoch: 70	discriminator -> loss: 0.538	generator -> loss: 1.193
epoch: 80	discriminator -> loss: 0.422	generator -> loss: 1.655
epoch: 90	discriminator -> loss: 0.270	generator -> loss: 2.307
epoch: 100	discriminator -> loss: 0.319	generator -> loss: 2.895


null

In [211]:
noise.forward(noiseDataForPicture)
generator.foreach(_.forward())

val d1 = fakeImages.value.getRows(1,2,3,4).data.asDouble
val d2 = fakeImages.value.getRows(5,6,7,8).data.asDouble
val d3 = fakeImages.value.getRows(9,10,11,12).data.asDouble
val d4 = fakeImages.value.getRows(13,14,15,0).data.asDouble

val items = 4 * 28

val da = (for{ i <- 0 to items} yield d1.drop(i*28).take(28) ++ (d2.drop(i*28).take(28)) ++ (d3.drop(i*28).take(28)) ++ (d4.drop(i*28).take(28))).init 


val hm = new HeatMap
hm.data_=( da.reverse )
hm.color_=(GradientColor.GREEN_YELLOW_WHITE )

hm



First Epoch
![epoch](resources/fig0.png)

Epoch 10
![epoch 100](resources/fig10.png)

Epoch 50
![epoch 100](resources/fig50.png)

Epoch 80
![epoch 100](resources/fig80.png)

Epoch 100
![epoch 100](resources/fig100.png)