In [None]:
// (copied from http://lamastex.org/lmse/mep/src/GraphXShortestWeightedPaths.html)

import scala.util.Random
import scala.math.max

import scala.reflect.ClassTag
import org.apache.spark.graphx._
import org.apache.spark.graphx.{Graph, VertexId}
import org.apache.spark.graphx.util.GraphGenerators


// A graph with edge attributes containing distances
//val graphSeed = 123L
val graphSeed = 3L
val numVertices = 50
val graph: Graph[Long, Double] = GraphGenerators.logNormalGraph(sc, numVertices = numVertices, seed=graphSeed).mapEdges { e => 
  // to make things nicer we assign 0 distance to itself
  if (e.srcId == e.dstId) 0.0 else Random.nextDouble()
}.groupEdges((attr1, attr2) => max(attr1, attr2))

  

>     import scala.util.Random
>     import scala.math.max
>     import scala.reflect.ClassTag
>     import org.apache.spark.graphx._
>     import org.apache.spark.graphx.{Graph, VertexId}
>     import org.apache.spark.graphx.util.GraphGenerators
>     graphSeed: Long = 3
>     numVertices: Int = 50
>     graph: org.apache.spark.graphx.Graph[Long,Double] = org.apache.spark.graphx.impl.GraphImpl@4d95da76

  

The Shortest Path algorithm
---------------------------

The implemented shorthest path alborithm uses the the distributed Pregel
algorithm and is divided into two parts.

The first part is based on the code in
`998_EX_01_GraphXShortestWeightedPaths`. As the original code did not
have have all functionality desiered functionality, the algorithm did
find the shortest distance but didn't keep track of the path itself, the
algorithm was extend with this functionality.

The first part takes a graph, where the edges are double values
representing the cost of trevelling between its connected nodes, and an
array of the ids of each goal node. As output, it provides a graph where
each node containse a Map-object of the different landmarks/goal nodes.
When a lookup is made in the map from a specific node, a tuple contaning
the shortest distance, the id of the next node in the path and the id of
the current node. The last element serves no pupose in the final results
but is used as a form of stopping critera in the algorithm.

The second part transforms the output of the first part to a "path
graph" where each edge is marked with either a 1 or a 0 depending on if
it is used in a path between a starting node and a goal node. Altough
this recursion can be performed on a single machine for small examples,
this procedure is also implemented using the Pregel algorithm to handle
situations of millions of edges.

The input of the second part is the graph created in the first part as
well as the id of a single goal node and a start node. The goal node has
to be in the set of goal nodes used in the first part. This part outputs
a "path graph" where each edge is given the value 1 or 0 depending on if
it is on the shortest path or not.

In [None]:
import scala.reflect.ClassTag
import org.apache.spark.graphx._

/**
 * Computes shortest weighted paths to the given set of goal nodes, returning a graph where each
 * vertex attribute is a map containing the shortest-path distance to each reachable landmark.
 * Currently supports only Graph of [VD, Double], where VD is an arbitrary vertex type.
 *
 * The object also include a function which transforms the resulting graph into a path_graph between a 
 * specific starting node and goal node. Each edge in the path_grpah is either 1 or 0 depending if it is 
 * the shortest path or not.
 *
 */
object ShortestPath extends Serializable {

  // When finding the shortest path each node stores a map from the itself to each goal node.
  // The map returns an array includeing the total distance to the goal node as well as the
  // next node pn the shortest path to the goal node. The last value in the array is only 
  // populated with the nodes own id and is only used for computational convenience. 
  type SPMap = Map[VertexId, Tuple3[Double, VertexId, VertexId]]
  
  // PN holds the information of the path nodes which are used for creating a path graph
  // PN = ('Distance left to goal node', 'Next path node id', 'Goal node', 'Is on path')
  type PN = Tuple4[Double, VertexId, VertexId, Boolean] 
  
  private val INITIAL_DIST = 0.0
  private val DEFAULT_ID = -1L
  private val INFINITY = Int.MaxValue.toDouble

  private def makeMap(x: (VertexId, Tuple3[Double, VertexId, VertexId])*) = Map(x: _*)
  
  private def incrementMap(spmap: SPMap, delta: Double, id: VertexId): SPMap = { 
    spmap.map { case (v, d) => v -> (Tuple3(d._1 + delta, d._3, id)) }
  }

  private def addMaps(spmap1: SPMap, spmap2: SPMap): SPMap = {
    (spmap1.keySet ++ spmap2.keySet).map {
    k =>{
        if (spmap1.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._1 < spmap2.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._1) 
                k -> (Tuple3(spmap1.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._1, 
                             spmap1.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._2, 
                             spmap1.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._3))
        else 
                k -> (Tuple3(spmap2.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._1, 
                             spmap2.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._2, 
                             spmap2.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._3))
        }
    }.toMap
  }
  
  // at this point it does not really matter what vertex type is
  def run[VD](graph: Graph[VD, Double], landmarks: Seq[VertexId]): Graph[SPMap, Double] = {
    val spGraph = graph.mapVertices { (vid, attr) =>
      // initial value for itself is 0.0 as Double
      if (landmarks.contains(vid)) makeMap(vid -> Tuple3(INITIAL_DIST, DEFAULT_ID, DEFAULT_ID)) else makeMap()
    }

    val initMaps = makeMap()

    def vProg(id: VertexId, attr: SPMap, msg: SPMap): SPMap = {
      addMaps(attr, msg)
    }

    def sendMsg(edge: EdgeTriplet[SPMap, Double]): Iterator[(VertexId, SPMap)] = {
      val newAttr = incrementMap(edge.dstAttr, edge.attr, edge.srcId) // newAttr is the value 
      if (edge.srcAttr._1 > addMaps(newAttr, edge.srcAttr)._1) Iterator((edge.srcId, newAttr))
      else Iterator.empty
    }

    Pregel(spGraph, initMaps)(vProg, sendMsg, addMaps)
  }
  
  def create_path_graph[VD](graph: Graph[SPMap, Double], goalId: VertexId, startId: VertexId): Graph[PN, Int] = {
    // For a given goal node we remove the lookup map and extend the state to a Tuple5 with the goal id and a boolean
    val path = graph.mapEdges(e => 0)
              .mapVertices((vertixId, attr) => {
                if (attr.contains(goalId)) {
                  val path_step = attr(goalId)
                  if (vertixId == path_step._3 && path_step._2 == -1L)
                    (path_step._1, goalId, goalId, false) // while we are at it, we clean up the state a bit
                  else  
                    (path_step._1, path_step._2, goalId, false)
                } else// If the vertice does not have a map to our goal we add a default value to it
                    (INFINITY, -1L, -1L, false)
              })

      def mergeMsg(msg1: VertexId, msg2: VertexId): VertexId = { // we should only get one msg
          msg2
      }

      def vprog(id: VertexId, attr: PN, msg: VertexId): PN = {
        // Check that the current node is the one adressed in the message
        if (id == msg)
          (attr._1, attr._2, attr._3, true)
        else // If the message is not addressed to the current node (happens for inital message), use the old value 
          attr
      }
      def sendMsg(triplet: EdgeTriplet[PN, Int]): Iterator[(VertexId, VertexId)] = {
        // If dstId is the next node on the path and has not yet been activated
        if (triplet.srcAttr._2 == triplet.dstId && triplet.srcAttr._4 && !triplet.dstAttr._4) 
          Iterator((triplet.dstId, triplet.dstId))// Send next msg
        else
          Iterator.empty// Do nothing
      }

      Pregel(path, startId)(vprog, sendMsg, mergeMsg).mapTriplets(triplet => {
        if(triplet.srcAttr._2 == triplet.dstId && triplet.srcAttr._4)
          1
        else
          0
      })
  }
}

println("Usage: val result = GraphXShortestWeightedPaths.run(graph, Seq(4L, 0L, 9L))")

  

>     Usage: val result = GraphXShortestWeightedPaths.run(graph, Seq(4L, 0L, 9L))
>     import scala.reflect.ClassTag
>     import org.apache.spark.graphx._
>     defined object ShortestPath

  

To make the code somewhat more accesible, we wrap the execution of the
two parts above in a new function called `shortestPath`. This new
function takes the id of the start node and a single goal node as well
as the input graph as input. The function then ouputs the path graph
mentioned above.

In [None]:
import scala.util.Random

def shortestPath(srcId : Long, dstId : Long, graph : Graph[Long, Double], placeholder: Boolean) : Graph[Long, Double] = {
  if (placeholder) {
    return graph.mapEdges(e => Random.nextInt(2))
  } else {
    val distanceGraph = ShortestPath.run(graph, Seq(dstId))
    val pathGraph = ShortestPath.create_path_graph(distanceGraph, dstId, srcId)
    return pathGraph.mapVertices((vid, attr) => 0L).mapEdges(e => e.attr)
  }
}

def shortestPath(srcId : Long, dstId : Long, graph : Graph[Long, Double]) : Graph[Long, Double] = {
  return shortestPath(srcId, dstId, graph, false)
}

  

>     import scala.util.Random
>     shortestPath: (srcId: Long, dstId: Long, graph: org.apache.spark.graphx.Graph[Long,Double], placeholder: Boolean)org.apache.spark.graphx.Graph[Long,Double] <and> (srcId: Long, dstId: Long, graph: org.apache.spark.graphx.Graph[Long,Double])org.apache.spark.graphx.Graph[Long,Double]
>     shortestPath: (srcId: Long, dstId: Long, graph: org.apache.spark.graphx.Graph[Long,Double], placeholder: Boolean)org.apache.spark.graphx.Graph[Long,Double] <and> (srcId: Long, dstId: Long, graph: org.apache.spark.graphx.Graph[Long,Double])org.apache.spark.graphx.Graph[Long,Double]

In [None]:
import scala.reflect.ClassTag

// # Merge edge attributes of two (identical in structure) graphs
def mergeEdgeAttributes[ED1 : ClassTag, ED2 : ClassTag](firstGraph : Graph[Long, ED1], secondGraph : Graph[Long, ED2]) : Graph[Long, (ED1, ED2)] = {
  return Graph(firstGraph.vertices, firstGraph.edges.innerJoin(secondGraph.edges) {(id1, id2, first, second) => (first, second)}) 
}

  

>     import scala.reflect.ClassTag
>     mergeEdgeAttributes: [ED1, ED2](firstGraph: org.apache.spark.graphx.Graph[Long,ED1], secondGraph: org.apache.spark.graphx.Graph[Long,ED2])(implicit evidence$1: scala.reflect.ClassTag[ED1], implicit evidence$2: scala.reflect.ClassTag[ED2])org.apache.spark.graphx.Graph[Long,(ED1, ED2)]

In [None]:
// Test graph
val graph: Graph[Long, Double] = Graph.fromEdges(spark.sparkContext.parallelize(1 until numVertices-1).flatMap(vid => List(Edge(0L, vid.toLong, 0.0), Edge(vid.toLong, numVertices-1, 0.0))), 0L)

  

>     graph: org.apache.spark.graphx.Graph[Long,Double] = org.apache.spark.graphx.impl.GraphImpl@6c5928bd

In [None]:
import scala.util.Random

def graphRandomGaussian(graph : Graph[Long, (Double, Double)], seed : Int, eps : Double, sparkSqlRandom : Boolean) : Graph[Long, Double] = {
  if (sparkSqlRandom) {
    return Graph(graph.vertices, graph.edges.toDF.select($"srcId", $"dstId", $"attr._1" + org.apache.spark.sql.functions.sqrt($"attr._2") * org.apache.spark.sql.functions.randn(seed)).rdd.map(r => Edge(r.getLong(0), r.getLong(1), r.getDouble(2)))).mapEdges(e => scala.math.max(eps, e.attr))
  } else {
    return graph.mapEdges(e => scala.math.max(eps, e.attr._1 + Random.nextGaussian() * scala.math.sqrt(e.attr._2)))
  }
} 

def graphRandomGaussian(graph : Graph[Long, (Double, Double)], seed : Int, eps : Double) : Graph[Long, Double] = {
  return graphRandomGaussian(graph, seed, eps, true)
}

  

>     import scala.util.Random
>     graphRandomGaussian: (graph: org.apache.spark.graphx.Graph[Long,(Double, Double)], seed: Int, eps: Double, sparkSqlRandom: Boolean)org.apache.spark.graphx.Graph[Long,Double] <and> (graph: org.apache.spark.graphx.Graph[Long,(Double, Double)], seed: Int, eps: Double)org.apache.spark.graphx.Graph[Long,Double]
>     graphRandomGaussian: (graph: org.apache.spark.graphx.Graph[Long,(Double, Double)], seed: Int, eps: Double, sparkSqlRandom: Boolean)org.apache.spark.graphx.Graph[Long,Double] <and> (graph: org.apache.spark.graphx.Graph[Long,(Double, Double)], seed: Int, eps: Double)org.apache.spark.graphx.Graph[Long,Double]

In [None]:
import scala.math.sqrt
import scala.math.max

println("Starting experiment!")
val startTime = System.currentTimeMillis()

val seed = 1000
val eps = 0.00001

// # Horizon N
val N = 30

// # Source and destination node IDs
val srcId = 0L
val dstId = numVertices - 1L

// # Assumption: Gaussian rewards with known variance
// # Prior graph (map weight to prior mean and variance)
// TODO: Prior mean from real graph
val priorMean = 100.0
val priorVar = 10.0
val prior = graph.mapEdges(e => (priorMean, priorVar))
var posterior = prior

// # Environment (sample true environment from prior)
val trueVar = 10.0
val env = graphRandomGaussian(prior, seed, eps).mapEdges(e => (e.attr, trueVar))

// # For regret calculations
val optimal = shortestPath(srcId, dstId, env.mapEdges(e => e.attr._1))
val optimalExpectedCost = mergeEdgeAttributes(optimal, env).edges.map(e => e.attr._1 * e.attr._2._1).reduce(_ + _)
val optimalPathEdges = optimal.edges.filter(e => e.attr == 1).map(e => (e.srcId, e.dstId)).collect()
printf("Optimal path edges: [%s]\n", optimalPathEdges.mkString(","))

// # Array with instant regret values
val instantRegretValues = new Array[Double](N)
var lastAction = optimal

// # Run experiment for N iterations
for (t <- 0 until N) {
  printf("Iteration %d, elapsed time: %d ms", t, System.currentTimeMillis() - startTime)
  
  // # Find action (super arm) using posterior sampling
  val sampledParameters = graphRandomGaussian(posterior, seed, eps)
  val action = shortestPath(srcId, dstId, sampledParameters)
  lastAction = action
  
  // # Apply action on environments (assuming path is indicated by 1-valued edge attributes) and observe realized costs
  val realizedEnv = graphRandomGaussian(env, seed, eps)
  val observation = mergeEdgeAttributes(action, realizedEnv).mapEdges(e => e.attr._1 * e.attr._1)
  
  // # Update posterior
  posterior = mergeEdgeAttributes(observation, posterior).mapEdges(e => {
    val obs = e.attr._1
    val pMean = e.attr._2._1
    val pVar = e.attr._2._2
    ((1/(1/trueVar + 1/pVar))*(obs/trueVar + pMean/pVar), (1/(1/trueVar + 1/pVar)))
  })
  
  // # Calculate regret
  //val actionExpectedCost = mergeEdgeAttributes(action, env).edges.map(e => e.attr._1 * e.attr._2._1).reduce(_ + _)
  //val instantRegret = actionExpectedCost - optimalExpectedCost
  //instantRegretValues(t) = instantRegret
  //val actionPathEdges = action.edges.filter(e => e.attr == 1).map(e => (e.srcId, e.dstId)).collect()
  //printf(", instant regret: %.2f", instantRegret)
  //printf(", action path edges: [%s]", actionPathEdges.mkString(","))
  printf("\n")
}

val endTime = System.currentTimeMillis()
printf("Finished experiment! Elapsed time:%d\n", endTime - startTime)

  

>     Starting experiment!
>     Optimal path edges: [(0,4),(4,9)]
>     Iteration 0, elapsed time: 124927 ms
>     Iteration 1, elapsed time: 218948 ms
>     Iteration 2, elapsed time: 254865 ms
>     Iteration 3, elapsed time: 300192 ms
>     Iteration 4, elapsed time: 384399 ms
>     Iteration 5, elapsed time: 415356 ms
>     Iteration 6, elapsed time: 473626 ms
>     Iteration 7, elapsed time: 603751 ms
>     Iteration 8, elapsed time: 649864 ms
>     Iteration 9, elapsed time: 657970 ms
>     Iteration 10, elapsed time: 711303 ms
>     Iteration 11, elapsed time: 733911 ms
>     Iteration 12, elapsed time: 768825 ms
>     Iteration 13, elapsed time: 864619 ms
>     Iteration 14, elapsed time: 908692 ms
>     Iteration 15, elapsed time: 969860 ms
>     Iteration 16, elapsed time: 1034139 ms
>     Iteration 17, elapsed time: 1089197 ms
>     Iteration 18, elapsed time: 1184161 ms
>     Iteration 19, elapsed time: 1264011 ms
>     Iteration 20, elapsed time: 1334011 ms
>     Iteration 21, elapsed time: 1395009 ms
>     Iteration 22, elapsed time: 1493679 ms
>     Iteration 23, elapsed time: 1573276 ms
>     Iteration 24, elapsed time: 1634326 ms
>     Iteration 25, elapsed time: 1733819 ms
>     Iteration 26, elapsed time: 1820353 ms
>     Iteration 27, elapsed time: 1875346 ms
>     Iteration 28, elapsed time: 1948843 ms
>     Iteration 29, elapsed time: 2048672 ms
>     Finished experiment! Elapsed time:2119322
>     import scala.math.sqrt
>     import scala.math.max
>     startTime: Long = 1609958630541
>     seed: Int = 1000
>     eps: Double = 1.0E-5
>     N: Int = 30
>     srcId: Long = 0
>     dstId: Long = 9
>     priorMean: Double = 100.0
>     priorVar: Double = 10.0
>     prior: org.apache.spark.graphx.Graph[Long,(Double, Double)] = org.apache.spark.graphx.impl.GraphImpl@5e483f07
>     posterior: org.apache.spark.graphx.Graph[Long,(Double, Double)] = org.apache.spark.graphx.impl.GraphImpl@4a4132b1
>     trueVar: Double = 10.0
>     env: org.apache.spark.graphx.Graph[Long,(Double, Double)] = org.apache.spark.graphx.impl.GraphImpl@636784b2
>     optimal: org.apache.spark.graphx.Graph[Long,Double] = org.apache.spark.graphx.impl.GraphImpl@77c0d44c
>     optimalExpectedCost: Double = 190.72770275035734
>     optimalPathEdges: Array[(org.apache.spark.graphx.VertexId, org.apache.spark.graphx.VertexId)] = Array((0,4), (4,9))
>     instantRegretValues: Array[Double] = Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
>     lastAction: org.apache.spark.graphx.Graph[Long,Double] = org.apache.spark.graphx.impl.GraphImpl@138e7325
>     endTime: Long = 1609960749863

In [None]:
import scala.util.Random
import scala.math.sqrt
import scala.math.max

println("Starting experiment!")
val startTime = System.currentTimeMillis()

val seed = 1000
val rng = new Random(seed)
val eps = 0.00001

// # Horizon N
val N = 30

// # Source and destination node IDs
val srcId = 0L
val dstId = numVertices - 1L

// # Assumption: Gaussian rewards with known variance
// # Prior graph (map weight to prior mean and variance)
// TODO: Prior mean from real graph
val priorMean = 100.0
val priorVar = 10.0
val prior = graph.mapEdges(e => (priorMean, priorVar))
var posterior = prior

// # Environment (sample true environment from prior)
val trueVar = 10.0
val env = prior.mapEdges(e => (max(eps, e.attr._1 + rng.nextGaussian() * sqrt(e.attr._2)), trueVar))

// # For regret calculations
val optimal = shortestPath(srcId, dstId, env.mapEdges(e => e.attr._1))
val optimalExpectedCost = mergeEdgeAttributes(optimal, env).edges.map(e => e.attr._1 * e.attr._2._1).reduce(_ + _)
val optimalPathEdges = optimal.edges.filter(e => e.attr == 1).map(e => (e.srcId, e.dstId)).collect()
printf("Optimal path edges: [%s]\n", optimalPathEdges.mkString(","))

val sampledParameters = posterior.mapEdges(e => max(eps, e.attr._1 + rng.nextGaussian() * sqrt(e.attr._2)))
val action = shortestPath(srcId, dstId, sampledParameters)

  

>     Starting experiment!
>     Optimal path edges: [(0,5),(5,9)]
>     import scala.util.Random
>     import scala.math.sqrt
>     import scala.math.max
>     startTime: Long = 1609932752437
>     seed: Int = 1000
>     rng: scala.util.Random = scala.util.Random@55ffb303
>     eps: Double = 1.0E-5
>     N: Int = 30
>     srcId: Long = 0
>     dstId: Long = 9
>     priorMean: Double = 100.0
>     priorVar: Double = 10.0
>     prior: org.apache.spark.graphx.Graph[Long,(Double, Double)] = org.apache.spark.graphx.impl.GraphImpl@660b2c60
>     posterior: org.apache.spark.graphx.Graph[Long,(Double, Double)] = org.apache.spark.graphx.impl.GraphImpl@660b2c60
>     trueVar: Double = 10.0
>     env: org.apache.spark.graphx.Graph[Long,(Double, Double)] = org.apache.spark.graphx.impl.GraphImpl@15febbf8
>     optimal: org.apache.spark.graphx.Graph[Long,Double] = org.apache.spark.graphx.impl.GraphImpl@2f6f564e
>     optimalExpectedCost: Double = 207.25786634313445
>     optimalPathEdges: Array[(org.apache.spark.graphx.VertexId, org.apache.spark.graphx.VertexId)] = Array((0,5), (5,9))
>     sampledParameters: org.apache.spark.graphx.Graph[Long,Double] = org.apache.spark.graphx.impl.GraphImpl@258e9bf1
>     action: org.apache.spark.graphx.Graph[Long,Double] = org.apache.spark.graphx.impl.GraphImpl@6c700943

In [None]:
display(optimal.edges.toDF)

  

[TABLE]

In [None]:
display(env.edges.toDF)

In [None]:
display(mergeEdgeAttributes(optimal, env).edges.map(e => e.attr._1 * e.attr._2._1).toDF)

  

[TABLE]

In [None]:
display(mergeEdgeAttributes(action, env).edges.map(e => e.attr._1 * e.attr._2._1).toDF)

  

[TABLE]

  

Graph visualization
-------------------

what do we want to visualize??

shortest path for small algorithm???

But first, we need to initialize the d3 package.

In [None]:
package d3


// We use a package object so that we can define top level classes like Edge that need to be used in other cells
// This was modified by Ivan Sadikov to make sure it is compatible the latest databricks notebook

import org.apache.spark.sql._
import com.databricks.backend.daemon.driver.EnhancedRDDFunctions.displayHTML

case class Edge(src: String, dest: String, count: Long)

case class Node(name: String)
case class Link(source: Int, target: Int, value: Long)
case class Graph(nodes: Seq[Node], links: Seq[Link])

object graphs {
// val sqlContext = SQLContext.getOrCreate(org.apache.spark.SparkContext.getOrCreate())  /// fix
val sqlContext = SparkSession.builder().getOrCreate().sqlContext
import sqlContext.implicits._
  
def force(clicks: Dataset[Edge], height: Int = 100, width: Int = 960): Unit = {
  val data = clicks.collect()
  val nodes = (data.map(_.src) ++ data.map(_.dest)).map(_.replaceAll("_", " ")).toSet.toSeq.map(Node)
  val links = data.map { t =>
    Link(nodes.indexWhere(_.name == t.src.replaceAll("_", " ")), nodes.indexWhere(_.name == t.dest.replaceAll("_", " ")), t.count / 20 + 1)
  }
  showGraph(height, width, Seq(Graph(nodes, links)).toDF().toJSON.first())
}

/**
 * Displays a force directed graph using d3
 * input: {"nodes": [{"name": "..."}], "links": [{"source": 1, "target": 2, "value": 0}]}
 */
def showGraph(height: Int, width: Int, graph: String): Unit = {

displayHTML(s"""
<style>

.node_circle {
  stroke: #777;
  stroke-width: 1.3px;
}

.node_label {
  pointer-events: none;
}

.link {
  stroke: #777;
  stroke-opacity: .2;
}

.node_count {
  stroke: #777;
  stroke-width: 1.0px;
  fill: #999;
}

text.legend {
  font-family: Verdana;
  font-size: 13px;
  fill: #000;
}

.node text {
  font-family: "Helvetica Neue","Helvetica","Arial",sans-serif;
  font-size: 17px;
  font-weight: 200;
}

</style>

<div id="clicks-graph">
<script src="//d3js.org/d3.v3.min.js"></script>
<script>

var graph = $graph;

var width = $width,
    height = $height;

var color = d3.scale.category20();

var force = d3.layout.force()
    .charge(-700)
    .linkDistance(180)
    .size([width, height]);

var svg = d3.select("#clicks-graph").append("svg")
    .attr("width", width)
    .attr("height", height);
    
force
    .nodes(graph.nodes)
    .links(graph.links)
    .start();

var link = svg.selectAll(".link")
    .data(graph.links)
    .enter().append("line")
    .attr("class", "link")
    .style("stroke-width", function(d) { return Math.sqrt(d.value); });

var node = svg.selectAll(".node")
    .data(graph.nodes)
    .enter().append("g")
    .attr("class", "node")
    .call(force.drag);

node.append("circle")
    .attr("r", 10)
    .style("fill", function (d) {
    if (d.name.startsWith("other")) { return color(1); } else { return color(2); };
})

node.append("text")
      .attr("dx", 10)
      .attr("dy", ".35em")
      .text(function(d) { return d.name });
      
//Now we are giving the SVGs co-ordinates - the force layout is generating the co-ordinates which this code is using to update the attributes of the SVG elements
force.on("tick", function () {
    link.attr("x1", function (d) {
        return d.source.x;
    })
        .attr("y1", function (d) {
        return d.source.y;
    })
        .attr("x2", function (d) {
        return d.target.x;
    })
        .attr("y2", function (d) {
        return d.target.y;
    });
    d3.selectAll("circle").attr("cx", function (d) {
        return d.x;
    })
        .attr("cy", function (d) {
        return d.y;
    });
    d3.selectAll("text").attr("x", function (d) {
        return d.x;
    })
        .attr("y", function (d) {
        return d.y;
    });
});
</script>
</div>
""")
}
  
  def help() = {
displayHTML("""
<p>
Produces a force-directed graph given a collection of edges of the following form:</br>
<tt><font color="#a71d5d">case class</font> <font color="#795da3">Edge</font>(<font color="#ed6a43">src</font>: <font color="#a71d5d">String</font>, <font color="#ed6a43">dest</font>: <font color="#a71d5d">String</font>, <font color="#ed6a43">count</font>: <font color="#a71d5d">Long</font>)</tt>
</p>
<p>Usage:<br/>
<tt><font color="#a71d5d">import</font> <font color="#ed6a43">d3._</font></tt><br/>
<tt><font color="#795da3">graphs.force</font>(</br>
&nbsp;&nbsp;<font color="#ed6a43">height</font> = <font color="#795da3">500</font>,<br/>
&nbsp;&nbsp;<font color="#ed6a43">width</font> = <font color="#795da3">500</font>,<br/>
&nbsp;&nbsp;<font color="#ed6a43">clicks</font>: <font color="#795da3">Dataset</font>[<font color="#795da3">Edge</font>])</tt>
</p>""")
  }
}

  

>     Warning: classes defined within packages cannot be redefined without a cluster restart.
>     Compilation successful.

In [None]:
import org.graphframes.GraphFrame
import d3._
import org.apache.spark.sql.functions.lit // import the lit function in sql
val priorShortestPath = GraphFrame.fromGraphX(graph.mapEdges(e => e.attr.toInt*10000))

d3.graphs.force(
  height = 500,
  width = 1000,
  clicks = priorShortestPath.edges.select($"src", $"dst".as("dest"), $"attr".as("count")).as[d3.Edge])


In [None]:

val posteriorShortestPath = GraphFrame.fromGraphX(lastAction.mapEdges(e => e.attr.toInt*10000))

d3.graphs.force(
  height = 500,
  width = 1000,
  clicks = posteriorShortestPath.edges.select($"src", $"dst".as("dest"), $"attr".as("count")).as[d3.Edge])

In [None]:
display(posteriorShortestPath.edges.select($"src", $"dst".as("dest"), $"attr".as("count")).as[d3.Edge])

  

[TABLE]

Truncated to 30 rows

  

We can also show the cumulative regret. As the algorithm reaches a final
solution, the instantaneous regret should decrease and the cumulative
regret should reach a plateau.

In [None]:
val cumulativeRegret = instantRegretValues.scanLeft(0.0)(_ + _)
val df = spark.sparkContext.parallelize((1 to N) zip cumulativeRegret).toDF("Iteration (t)","Cumulative regret")
display(df)


  

[TABLE]

In [None]:
display(df)

  

[TABLE]

In [None]:
// This is just to be able to debug the graph

import scala.util.Random
import scala.math.sqrt
import scala.math.max

println("Graph export!")

val eps = 0.00001

// # Assumption: Gaussian rewards with known variance
// # Prior graph (map weight to prior mean and variance)
// TODO: Prior mean from real graph
val priorMean = 100.0
val priorVar = 10.0
val prior = graph.mapEdges(e => (priorMean, priorVar))

// # Environment (sample true environment from prior)
val env = prior.mapEdges(e => (max(eps, e.attr._1 + Random.nextGaussian() * sqrt(e.attr._2)), 0.0))

// # Display DF
display(env.edges.toDF.select($"srcId".as("nodeFrom"), $"dstId".as("nodeTo"), $"attr._1".as("Length"), 
                              $"attr._2".as("fromLat"), $"attr._2".as("fromLon"),
                              $"attr._2".as("toLat"), $"attr._2".as("toLon")))

  

[TABLE]