Skip to content
This repository
branch: master
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 293 lines (234 sloc) 9.426 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
package upenn.junto.graph

import akka.actor._
import scala.collection.JavaConversions._

import gnu.trove.map.hash.TObjectDoubleHashMap

import upenn.junto.algorithm._
import upenn.junto.util.Constants


/**
* An implementation of MAD using Akka actors. Still preliminary -- has some things
* to add in yet, and round out, but the core algorithm is working properly. There
* are many things that can surely be improved, especially to reducing the amount
* of blocking employed here.
*
* @author Jason Baldridge
*/
object MadGraphRunner {

  sealed trait MadMessage
  case object NextStep extends MadMessage
  case object Stop extends MadMessage
  case class SetNeighbors(neighbors: Map[ActorRef, Double]) extends MadMessage
  case object PushLabels extends MadMessage
  case class DoneUpdating(delta: Double, mrr: Double) extends MadMessage
  case class Advance(worker: ActorRef) extends MadMessage

  case class NeighborInfo(
    neighbor: ActorRef,
    nPcontinue: Double,
    nWeightOfRecipent: Double,
    labelDist: Map[String, Double]
  )

  /**
* Entry point into actor-based MAD: create the actors for the graph and its
* vertices and start the work.
*/
  def apply (graph: Graph, mu1: Double, mu2: Double, mu3: Double, maxIters: Int) {
    val system = ActorSystem("MadRunner")
    val clock = system.actorOf(Props(new Clock(maxIters)), name="clock")
    val madGraph = system.actorOf(Props(new MadGraph(clock, graph, mu1, mu2, mu3)), name="graph")
    madGraph ! NextStep
  }

  /**
* An actor that controls the entire graph. Stores all the vertex actors
* and dispatches work for them to do, and communicates with the Clock to
* keep each iteration of work separate.
*/
  class MadGraph (clock: ActorRef, graph: Graph, mu1: Double, mu2: Double, mu3: Double)
  extends Actor {

    val normalizationConstants =
      MadHelper.computeNormalizationConstants(graph, mu1, mu2, mu3)

    AdsorptionHelper.prepareGraph(graph)

    val namesToActorVertices: Map[String, ActorRef] =
      graph
        .vertices
        .values
        .toIndexedSeq
        .zipWithIndex
        .map { case(v, index) => {
          v.SetInjectedLabelScore(Constants.GetDummyLabel, 0.0)
          val vertexActorRef = context.actorOf(
            Props(new MadVertex(self, v.name, v.pinject, v.pcontinue, v.pabandon,
                          mu1, mu2, mu3, v.neighbors.size,
                          normalizationConstants.get(v.name),
                          TroveToScalaMap(v.injectedLabels),
                          TroveToScalaMap(v.estimatedLabels),
                          v.isTestNode,
                          TroveToScalaMap(v.goldLabels))), name = "vertex"+index)
          (v.name, vertexActorRef)
        }}
        .toMap

    namesToActorVertices.foreach {
      case(vName, vertexActorRef) => {
        val neighborsAsStrings = graph.vertices.get(vName).neighbors
        val neighborsAsActorRefs = neighborsAsStrings.keySet.map {
          neighName => (namesToActorVertices(neighName) -> neighborsAsStrings.get(neighName))
        } toMap

        vertexActorRef ! SetNeighbors(neighborsAsActorRefs)
      }
    }
    
    val vertices = namesToActorVertices.values.toIndexedSeq
    val numTestNodes = graph.vertices.values.count(_.isTestNode).toDouble

    var numBusyVertices: Int = _
    var totalDeltaLabelDiff = 0.0
    var correctNodeCount = 0.0

    def receive = {

      // Broadcast to all the vertices that they should push their
      // labels to their neighbors
      case NextStep =>
        numBusyVertices = vertices.length
        vertices.foreach(vertex => vertex ! PushLabels)

      // Receive a message from a vertex about the results of its
      // updating its previous iteration estimated label distribution
      // to the new one.
      case DoneUpdating(delta, mrr) => {
        numBusyVertices -= 1
        totalDeltaLabelDiff += delta
        correctNodeCount += (if (mrr==1.0) 1.0 else 0.0)

        if (numBusyVertices == 0) {
          println("Delta: " + totalDeltaLabelDiff)
          println("Acc: " + correctNodeCount/numTestNodes)
          totalDeltaLabelDiff = 0.0
          correctNodeCount = 0.0
          clock ! Advance(self)
        }
      }

      // Tell all the vertices to stop and then stop itself.
      case Stop =>
        vertices.foreach(vertex => vertex ! PoisonPill)
        context.system.shutdown

    }

  }


  /**
* An actor for a vertex. Knows how to send its fellow vertices the information
* they need to perform MAD updates, and -- of course -- what to do with the
* information it receives from fellow vertices.
*/
  class MadVertex (
    controller: ActorRef,
    name: String, pinject: Double, pcontinue: Double, pabandon: Double,
    mu1: Double, mu2: Double, mu3: Double, numNeighbors: Int,
    miiNormalization: Double,
    injectedLabels: Map[String, Double],
    var estimatedLabels: Map[String, Double],
    isTestNode: Boolean,
    goldLabels: Map[String, Double]
  ) extends Actor {

    import upenn.junto.util._
    import java.util.ArrayList

    var neighbors: Map[ActorRef, Double] = _
    val newLabelDist = new collection.mutable.HashMap[String, Double]

    var numNeighborMessagesReceived = 0

    def receive = {

      // Set the neighbors of this node, as a map from ActorRefs for
      // the neighbors to their weights.
      case SetNeighbors (neighborActorRefs) =>
        neighbors = neighborActorRefs

      // Push the label distribution and relevant other factors so
      // that one's neighbors can compute their updates.
      case PushLabels =>
        for (neighRef <- neighbors.keySet)
          neighRef ! NeighborInfo(self, pcontinue, neighbors(neighRef), estimatedLabels)

      // Receive a message from a neighbor and perform the relevant computation.
      case NeighborInfo(neighbor: ActorRef, nPcontinue, nWeightOfRecipient, nLabelDist) => {
        val mult = pcontinue * neighbors(neighbor) + nPcontinue * nWeightOfRecipient
        DistUtil.addScores(newLabelDist, mult*mu2, nLabelDist)

numNeighborMessagesReceived += 1

// When all neighbors have reported their distributions, wrap
// up the update and report back to the controller.
if (numNeighborMessagesReceived == numNeighbors) {
val (deltaLabelDiff, mrr) = updateEstimatedLabels
          numNeighborMessagesReceived = 0
controller ! DoneUpdating(deltaLabelDiff, mrr)
}
      }
        
    }

    // Once all labels have been pushed, finalizes the update, which
    // involves vertex-internal computations with the injected
    // labels and the prior distribution (the dummy distribution).
    def updateEstimatedLabels = {

      // Add in the injected label contribution
      DistUtil.addScores(newLabelDist, pinject*mu1, injectedLabels)

      // Add in the dummy label contribution
      DistUtil.addScores(newLabelDist, pabandon*mu3, DistUtil.DummyLabelDist)

      // Normalize by M_ii
      newLabelDist.foreach {
        case(k,v) => newLabelDist += (k -> v/miiNormalization)
      }

      // Calculate the delta from the previous estimated label distribution
      val deltaLabelDiff =
        DistUtil.getDifferenceNorm2Squared(estimatedLabels, 1.0, newLabelDist, 1.0)

      // Swap in the new distribution and clear newLabelDist for the next round
      estimatedLabels = newLabelDist.toMap
      newLabelDist.clear


      val mrr =
        if (isTestNode) {
          val sortedMap: List[(String,Double)] =
            estimatedLabels.toList.sortBy(_._2).reverse.filter(_._1 != Constants.GetDummyLabel)

          val goldRank = sortedMap.indexWhere(pair => goldLabels.containsKey(pair._1))

          if (goldRank > -1) 1.0/(goldRank + 1.0) else 0.0
        } else {
          0.0
        }
      (deltaLabelDiff,mrr)
    }


  }

  /**
* A time keeper that makes sure that each iteration is distinct from the next, by
* ensuring that the graph controller (MadGraph actor) doesn't broadcast the next
* label pushing event until all vertices are done updating.
*/
  class Clock (maxSteps: Int) extends Actor {
    var currentStep = 0

    def receive = {
      case Advance(worker) =>
        currentStep += 1
        println("Step: " + currentStep)
        if (currentStep == maxSteps) {
          worker ! Stop
          context.stop(self)
        } else {
          worker ! NextStep
        }
    }

  }

}

/**
* Converts a Trove map to a Scala map.
*/
object TroveToScalaMap {
  def apply[T] (tmap: TObjectDoubleHashMap[T]): Map[T,Double] =
    tmap.keySet.map(key => (key -> tmap.get(key))).toMap
}

/**
* Utilities for working with probability distributions stored
* as Map[String, Double] objects.
*/
object DistUtil {

  val DummyLabelDist = Map(Constants.GetDummyLabel -> 1.0)

  def addScores (accumulator: collection.mutable.Map[String, Double],
                 multiplier: Double,
                 addDist: Map[String,Double]) {
    addDist.mapValues(_*multiplier).foreach {
      case(k,v) =>
        val updatedValue = v + accumulator.getOrElse(k, 0.0)
        accumulator += (k -> updatedValue)
    }
  }


  def getDifferenceNorm2Squared(
    m1: Map[String, Double],
    m1Mult: Double,
    m2: collection.mutable.Map[String, Double],
    m2Mult: Double
  ) = {
    val differences = (m1.keys ++ m2.keys).map(k => m1.getOrElse(k, 0.0) - m2.getOrElse(k, 0.0))
    math.sqrt(differences.map(x => x*x).sum)
  }

}
Something went wrong with that request. Please try again.