In [None]:
// (copied from http://lamastex.org/lmse/mep/src/GraphXShortestWeightedPaths.html)

import scala.util.Random
import scala.math.max

import org.apache.spark.graphx.{Graph, VertexId}
import org.apache.spark.graphx.util.GraphGenerators

// A graph with edge attributes containing distances
val numVertices = 100000
val graph: Graph[Long, Double] = GraphGenerators.logNormalGraph(sc, numVertices = numVertices, seed=123L).mapEdges { e => 
  // to make things nicer we assign 0 distance to itself
  if (e.srcId == e.dstId) 0.0 else Random.nextDouble()
}.groupEdges((attr1, attr2) => max(attr1, attr2))

  

>     import scala.util.Random
>     import scala.math.max
>     import org.apache.spark.graphx.{Graph, VertexId}
>     import org.apache.spark.graphx.util.GraphGenerators
>     numVertices: Int = 100000
>     graph: org.apache.spark.graphx.Graph[Long,Double] = org.apache.spark.graphx.impl.GraphImpl@2aab3532

In [None]:
// # High-level overview of the structure (pseudo code)

// # Horizon N

// # Environment
// val true_mean = 1
// val true_var = 1
// val env = graph.map(attr -> (true_mean, true_var))

// # Assumption: Gaussian rewards with known variance
// # Prior graph (map weight to prior mean and variance)
// val prior_mean = 1
// val prior_var = 1
// var prior = graph.map(attr -> (prior_mean, prior_var))

// # For regret calculations
// val optimal = ShortestPath.apply(env.map(attr -> attr._1))
// val optimal_expected_cost = optimal.join(env).map((o, e) -> o.attr * e.attr._1).reduce(_ + _)

// # List with instant regret values
// val instant_regret_values = List(N)

// For t = 1,..,N:

    // # Find action (super arm)
    // If agent is greedy:
        // val action = ShortestPath.apply(prior.map(attr -> attr._1))
    // Else If agent is Thompson Sampling:
        // val sampled = prior.map(attr -> Gaussian.random(attr._1, attr._2))
        // val action = ShortestPath.apply(sampled)
    
    // # Apply action on environments (assuming path is indicated by 1-valued edge attributes)
    // val observation = action.join(env.map(attr -> Gaussian.random(attr._1, attr._2))).map((a, e) -> a.attr * e.attr)
    
    // # Update posterior
    // val posterior = observation.join(prior).map((o, p) -> ((1/(1/true_var + 1/p.attr._2))*(o.attr/true_var + p.attr._1/p.attr_2), (1/(1/true_var + 1/p.attr._2))))

    // # Set next prior to current posterior
    // prior = posterior

    // # Calculate regret
    // val action_expected_cost = action.join(env).map((a, e) -> a.attr * e.attr._1).reduce(_ + _)
    // val instant_regret = action_expected_cost - optimal_expected_cost
    // instant_regret_value(t) = instant_regret


    

In [None]:
import scala.reflect.ClassTag
import org.apache.spark.graphx._

/**
 * Computes shortest weighted paths to the given set of landmark vertices, returning a graph where each
 * vertex attribute is a map containing the shortest-path distance to each reachable landmark.
 * Currently supports only Graph of [VD, Double], where VD is an arbitrary vertex type.
 *
 * The object also include a function which transforms the resulting graph into a path_graph between a 
 * specific starting node and goal node. Each edge in the path_grpah is either 1 or 0 depending if it is 
 * the shortest path or not.
 *
 */
object ShortestPath extends Serializable {

  // When finding the shortest path each node stores a map from the itself to each goal node.
  // The map returns an array includeing the total distance to the goal node as well as the
  // next node pn the shortest path to the goal node. The last value in the array is only 
  // populated with the nodes own id and is only used for computational convenience. 
  type SPMap = Map[VertexId, Tuple3[Double, VertexId, VertexId]]
  
  // PN holds the information of the path nodes which are used for creating a path graph
  // PN = ('Distance left to goal node', 'Next path node id', 'Goal node', 'Is on path')
  type PN = Tuple4[Double, VertexId, VertexId, Boolean] 
  
  private val INITIAL_DIST = 0.0
  private val DEFAULT_ID = -1L
  private val INFINITY = Int.MaxValue.toDouble

  private def makeMap(x: (VertexId, Tuple3[Double, VertexId, VertexId])*) = Map(x: _*)
  
  private def incrementMap(spmap: SPMap, delta: Double, id: VertexId): SPMap = { 
    spmap.map { case (v, d) => v -> (Tuple3(d._1 + delta, d._3, id)) }
  }

  private def addMaps(spmap1: SPMap, spmap2: SPMap): SPMap = {
    (spmap1.keySet ++ spmap2.keySet).map {
    k =>{
        if (spmap1.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._1 < spmap2.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._1) 
                k -> (Tuple3(spmap1.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._1, 
                             spmap1.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._2, 
                             spmap1.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._3))
        else 
                k -> (Tuple3(spmap2.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._1, 
                             spmap2.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._2, 
                             spmap2.getOrElse(k, Tuple3(INFINITY, DEFAULT_ID, DEFAULT_ID))._3))
        }
    }.toMap
  }
  
  // at this point it does not really matter what vertex type is
  def run[VD](graph: Graph[VD, Double], landmarks: Seq[VertexId]): Graph[SPMap, Double] = {
    val spGraph = graph.mapVertices { (vid, attr) =>
      // initial value for itself is 0.0 as Double
      if (landmarks.contains(vid)) makeMap(vid -> Tuple3(INITIAL_DIST, DEFAULT_ID, DEFAULT_ID)) else makeMap()
    }

    val initMaps = makeMap()

    def vProg(id: VertexId, attr: SPMap, msg: SPMap): SPMap = {
      addMaps(attr, msg)
    }

    def sendMsg(edge: EdgeTriplet[SPMap, Double]): Iterator[(VertexId, SPMap)] = {
      val newAttr = incrementMap(edge.dstAttr, edge.attr, edge.srcId)
      if (edge.srcAttr != addMaps(newAttr, edge.srcAttr)) Iterator((edge.srcId, newAttr))
      else Iterator.empty
    }

    Pregel(spGraph, initMaps)(vProg, sendMsg, addMaps)
  }
  
  def create_path_graph[VD](graph: Graph[SPMap, Double], goalId: VertexId, startId: VertexId): Graph[PN, Int] = {
    // For a given goal node we remove the lookup map and extend the state to a Tuple5 with the goal id and a boolean
    val path = graph.mapEdges(e => 0)
              .mapVertices((vertixId, attr) => {
                if (attr.contains(goalId)) {
                  val path_step = attr(goalId)
                  if (vertixId == path_step._3 && path_step._2 == -1L)
                    (path_step._1, goalId, goalId, false) // while we are at it, we clean up the state a bit
                  else  
                    (path_step._1, path_step._2, goalId, false)
                } else// If the vertice does not have a map to our goal we add a default value to it
                    (INFINITY, -1L, -1L, false)
              })

      def mergeMsg(msg1: VertexId, msg2: VertexId): VertexId = { // we should only get one msg
          msg2
      }

      def vprog(id: VertexId, attr: PN, msg: VertexId): PN = {
        // Check that the current node is the one adressed in the message
        if (id == msg)
          (attr._1, attr._2, attr._3, true)
        else // If the message is not addressed to the current node (happens for inital message), use the old value 
          attr
      }
      def sendMsg(triplet: EdgeTriplet[PN, Int]): Iterator[(VertexId, VertexId)] = {
        // If dstId is the next node on the path and has not yet been activated
        if (triplet.srcAttr._2 == triplet.dstId && triplet.srcAttr._4 && !triplet.dstAttr._4) 
          Iterator((triplet.dstId, triplet.dstId))// Send next msg
        else
          Iterator.empty// Do nothing
      }

      Pregel(path, startId)(vprog, sendMsg, mergeMsg).mapTriplets(triplet => {
        if(triplet.srcAttr._2 == triplet.dstId && triplet.srcAttr._4)
          1
        else
          0
      })
  }
}

println("Usage: val result = GraphXShortestWeightedPaths.run(graph, Seq(4L, 0L, 9L))")

  

>     Usage: val result = GraphXShortestWeightedPaths.run(graph, Seq(4L, 0L, 9L))
>     import scala.reflect.ClassTag
>     import org.apache.spark.graphx._
>     defined object ShortestPath

In [None]:
import scala.util.Random

def shortestPath(srcId : Long, dstId : Long, graph : Graph[Long, Double], placeholder: Boolean) : Graph[Long, Double] = {
  if (placeholder) {
    return graph.mapEdges(e => Random.nextInt(2))
  } else {
    val distanceGraph = ShortestPath.run(graph, Seq(dstId))
    val pathGraph = ShortestPath.create_path_graph(distanceGraph, dstId, srcId)
    return pathGraph.mapVertices((vid, attr) => 0L).mapEdges(e => e.attr)
  }
}

def shortestPath(srcId : Long, dstId : Long, graph : Graph[Long, Double]) : Graph[Long, Double] = {
  return shortestPath(srcId, dstId, graph, false)
}

  

>     import scala.util.Random
>     shortestPath: (srcId: Long, dstId: Long, graph: org.apache.spark.graphx.Graph[Long,Double], placeholder: Boolean)org.apache.spark.graphx.Graph[Long,Double] <and> (srcId: Long, dstId: Long, graph: org.apache.spark.graphx.Graph[Long,Double])org.apache.spark.graphx.Graph[Long,Double]
>     shortestPath: (srcId: Long, dstId: Long, graph: org.apache.spark.graphx.Graph[Long,Double], placeholder: Boolean)org.apache.spark.graphx.Graph[Long,Double] <and> (srcId: Long, dstId: Long, graph: org.apache.spark.graphx.Graph[Long,Double])org.apache.spark.graphx.Graph[Long,Double]

In [None]:
//val wrapperPath = shortestPath(0L, 99L, graph)
//display(wrapperPath.edges.toDF.filter('attr === 1))

In [None]:
//val srcId = 0L
//val dstId = numVertices - 1L
//val distanceGraph = ShortestPath.run(graph, Seq(dstId))
//val pathGraph = ShortestPath.create_path_graph(distanceGraph, dstId, srcId)

In [None]:
//import org.apache.spark.sql.functions._ 
//display(distanceGraph.vertices.toDF.orderBy(asc("_1")))

In [None]:
//display(pathGraph.edges.toDF.filter('attr === 1))

In [None]:
//display(graph.edges.toDF)

In [None]:
import scala.reflect.ClassTag

// # Merge edge attributes of two (identical in structure) graphs
def mergeEdgeAttributes[ED1 : ClassTag, ED2 : ClassTag](firstGraph : Graph[Long, ED1], secondGraph : Graph[Long, ED2]) : Graph[Long, (ED1, ED2)] = {
  return Graph(firstGraph.vertices, firstGraph.edges.innerJoin(secondGraph.edges) {(id1, id2, first, second) => (first, second)}) 
}

  

>     import scala.reflect.ClassTag
>     mergeEdgeAttributes: [ED1, ED2](firstGraph: org.apache.spark.graphx.Graph[Long,ED1], secondGraph: org.apache.spark.graphx.Graph[Long,ED2])(implicit evidence$1: scala.reflect.ClassTag[ED1], implicit evidence$2: scala.reflect.ClassTag[ED2])org.apache.spark.graphx.Graph[Long,(ED1, ED2)]

In [None]:
import scala.util.Random
import scala.math.sqrt
import scala.math.max

println("Starting experiment!")
val startTime = System.currentTimeMillis()

val eps = 0.00001

// # Horizon N
val N = 10

// # Source and destination node IDs
val srcId = 0L
val dstId = numVertices - 1L

// # Environment
val trueMean = 1.0
val trueVar = 1.0
// TODO: Real environment (either random or from road map)
val env = graph.mapEdges(e => (trueMean, trueVar))

// # Assumption: Gaussian rewards with known variance
// # Prior graph (map weight to prior mean and variance)
val priorMean = 1.0
val priorVar = 1.0
val prior = graph.mapEdges(e => (priorMean, priorVar))
var posterior = prior

// # For regret calculations
val optimal = shortestPath(srcId, dstId, env.mapEdges(e => e.attr._1))
val optimalExpectedCost = mergeEdgeAttributes(optimal, env).edges.map(e => e.attr._1 * e.attr._2._1).reduce(_ + _)

// # Array with instant regret values
val instantRegretValues = new Array[Double](N)

// # Run experiment for N iterations
for (t <- 0 until N) {
  printf("Iteration %d, elapsed time: %d ms\n", t, System.currentTimeMillis() - startTime)
  
  // # Find action (super arm) using posterior sampling
  val sampledParameters = posterior.mapEdges(e => max(eps, e.attr._1 + Random.nextGaussian() * sqrt(e.attr._2)))
  val action = shortestPath(srcId, dstId, sampledParameters)
  
  // # Apply action on environments (assuming path is indicated by 1-valued edge attributes) and observe realized costs
  val realizedEnv = env.mapEdges(e => max(eps, e.attr._1 + Random.nextGaussian() * sqrt(e.attr._2)))
  val observation = mergeEdgeAttributes(action, realizedEnv).mapEdges(e => e.attr._1 * e.attr._1)
  
  // # Update posterior
  posterior = mergeEdgeAttributes(observation, posterior).mapEdges(e => {
    val obs = e.attr._1
    val pMean = e.attr._2._1
    val pVar = e.attr._2._2
    ((1/(1/trueVar + 1/pVar))*(obs/trueVar + pMean/pVar), (1/(1/trueVar + 1/pVar)))
  })
  
  // # Calculate regret
  val actionExpectedCost = mergeEdgeAttributes(action, env).edges.map(e => e.attr._1 * e.attr._2._1).reduce(_ + _)
  val instantRegret = actionExpectedCost - optimalExpectedCost
  instantRegretValues(t) = instantRegret
}

val endTime = System.currentTimeMillis()
printf("Finished experiment! Elapsed time:%d\n", endTime - startTime)

  

>     Starting experiment!
>     Iteration 0, elapsed time: 30437 ms
>     Iteration 1, elapsed time: 83241 ms
>     Iteration 2, elapsed time: 203896 ms
>     Iteration 3, elapsed time: 268759 ms
>     Iteration 4, elapsed time: 324692 ms
>     Iteration 5, elapsed time: 452711 ms
>     Iteration 6, elapsed time: 512567 ms
>     Iteration 7, elapsed time: 583455 ms
>     Iteration 8, elapsed time: 675289 ms
>     Iteration 9, elapsed time: 721292 ms
>     Finished experiment! Elapsed time:800570
>     import scala.util.Random
>     import scala.math.sqrt
>     import scala.math.max
>     startTime: Long = 1608632014009
>     eps: Double = 1.0E-5
>     N: Int = 10
>     srcId: Long = 0
>     dstId: Long = 99999
>     trueMean: Double = 1.0
>     trueVar: Double = 1.0
>     env: org.apache.spark.graphx.Graph[Long,(Double, Double)] = org.apache.spark.graphx.impl.GraphImpl@45b8191e
>     priorMean: Double = 1.0
>     priorVar: Double = 1.0
>     prior: org.apache.spark.graphx.Graph[Long,(Double, Double)] = org.apache.spark.graphx.impl.GraphImpl@2f453bc2
>     posterior: org.apache.spark.graphx.Graph[Long,(Double, Double)] = org.apache.spark.graphx.impl.GraphImpl@7f828d32
>     optimal: org.apache.spark.graphx.Graph[Long,Double] = org.apache.spark.graphx.impl.GraphImpl@3bf78954
>     optimalExpectedCost: Double = 3.0
>     instantRegretValues: Array[Double] = Array(2.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0)
>     endTime: Long = 1608632814579

In [None]:
// Visualization
val df = spark.sparkContext.parallelize((1 to N) zip instantRegretValues).toDF("iteration (t)","instant regret")
df.show

  

>     +-------------+--------------+
>     |iteration (t)|instant regret|
>     +-------------+--------------+
>     |            1|           1.0|
>     |            2|           1.0|
>     |            3|           1.0|
>     |            4|           0.0|
>     |            5|           1.0|
>     |            6|           0.0|
>     |            7|           1.0|
>     |            8|           1.0|
>     |            9|           1.0|
>     |           10|           0.0|
>     +-------------+--------------+
>
>     df: org.apache.spark.sql.DataFrame = [iteration (t): int, instant regret: double]

In [None]:
display(df)

  

[TABLE]

In [None]:
graph.edges.toDF.count()

  

>     res34: Long = 12654999

In [None]:
val testArray = Array(0,1,2,3,4,5,6,7,8,9)
println(testArray)

  

>     [I@4cb3c696
>     testArray: Array[Int] = Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)

In [None]:
//val testZipped = testArray.slice(0,testArray.size-1) zip testArray.slice(1,testArray.size)
val testZipped = testArray.sliding(2,1).toArray

  

>     testZipped: Array[Array[Int]] = Array(Array(0, 1), Array(1, 2), Array(2, 3), Array(3, 4), Array(4, 5), Array(5, 6), Array(6, 7), Array(7, 8), Array(8, 9))