mesosphere · takirala · May 8, 2019 · Mar 29, 2019 · Mar 29, 2019 · Mar 29, 2019
diff --git a/.travis.yml b/.travis.yml
@@ -7,7 +7,7 @@ language: scala
 install:
   - true
 scala:
-   - 2.12.7
+  - 2.12.7
 script:
   - ./gradlew checkScalaFmtAll && sudo ./gradlew provision && ./gradlew ci --info
 before_cache:

diff --git a/core/build.gradle b/core/build.gradle
@@ -1,5 +1,6 @@
 dependencies {
     compile project(':core-models')
+    compile project(':persistence')
     compile project(':mesos-client')
     testCompile project(':test-utils')
 }
diff --git a/core/src/main/scala/com/mesosphere/usi/core/CallerThreadExecutionContext.scala b/core/src/main/scala/com/mesosphere/usi/core/CallerThreadExecutionContext.scala
@@ -0,0 +1,11 @@
+package com.mesosphere.usi.core
+
+import java.util.concurrent.Executor
+import scala.concurrent.ExecutionContext
+import scala.concurrent.ExecutionContextExecutor
+
+private[usi] object CallerThreadExecutionContext {
+  val executor: Executor = (command: Runnable) => command.run()
+
+  implicit val context: ExecutionContextExecutor = ExecutionContext.fromExecutor(executor)
+}
diff --git a/core/src/main/scala/com/mesosphere/usi/core/FrameResultBuilder.scala b/core/src/main/scala/com/mesosphere/usi/core/FrameResultBuilder.scala
@@ -27,12 +27,14 @@ case class FrameResultBuilder(
     if (schedulerEvents == SchedulerEvents.empty)
       this
     else {
-      val newDirty = dirtyPodIds ++ schedulerEvents.stateEvents.iterator.collect {
-        case podEvent: PodStateEvent => podEvent.id
-        case _: StateSnapshot =>
-          // We need to handle status snapshots and create a mechanism to signal that all cache should be recomputed
-          ???
-      }
+      val newDirty: Set[PodId] = dirtyPodIds ++ schedulerEvents.stateEvents.iterator
+        .collect[Set[PodId]] {
+          case podEvent: PodStateEvent => Set(podEvent.id)
+          // We need to handle status snapshots with a mechanism to signal that all cache should be recomputed
+          case snapshot: StateSnapshot => snapshot.podRecords.map(_.podId).toSet
+        }
+        .flatten
+
       copy(
         state = state.applyStateIntents(schedulerEvents.stateEvents),
         dirtyPodIds = newDirty,

diff --git a/core/src/main/scala/com/mesosphere/usi/core/Scheduler.scala b/core/src/main/scala/com/mesosphere/usi/core/Scheduler.scala
@@ -1,13 +1,24 @@
 package com.mesosphere.usi.core
 
+import akka.Done
 import akka.NotUsed
-import akka.stream.scaladsl.{BidiFlow, Broadcast, Flow, GraphDSL, Source}
-import akka.stream.{BidiShape, FlowShape}
+import akka.stream.{BidiShape, FlowShape, Materializer}
+import akka.stream.scaladsl.{BidiFlow, Broadcast, Flow, GraphDSL, Sink, Source}
 import com.mesosphere.mesos.client.{MesosCalls, MesosClient}
-import com.mesosphere.usi.core.models.{SpecEvent, SpecUpdated, SpecsSnapshot, StateEvent, StateSnapshot, StateUpdated}
+import com.mesosphere.usi.core.models.{
+  PodRecordUpdated,
+  SpecEvent,
+  SpecUpdated,
+  SpecsSnapshot,
+  StateEvent,
+  StateSnapshot,
+  StateUpdated
+}
+import com.mesosphere.usi.repository.PodRecordRepository
 import org.apache.mesos.v1.Protos.FrameworkInfo
 import org.apache.mesos.v1.scheduler.Protos.{Call => MesosCall, Event => MesosEvent}
 import scala.collection.JavaConverters._
+import scala.concurrent.Future
 
 /*
  * Provides the scheduler graph component. The component has two inputs, and two outputs:
@@ -51,30 +62,34 @@ object Scheduler {
 
   type StateOutput = (StateSnapshot, Source[StateEvent, Any])
 
-  def fromClient(client: MesosClient): Flow[SpecInput, StateOutput, NotUsed] = {
+  def fromClient(
+      client: MesosClient,
+      podRecordRepository: PodRecordRepository
+  )(implicit materializer: Materializer): Flow[SpecInput, StateOutput, NotUsed] = {
     if (!isMultiRoleFramework(client.frameworkInfo)) {
       throw new IllegalArgumentException(
-        "USI scheduler provides support for MULTI_ROLE frameworks only. Please provide create MesosClient with FrameworkInfo that has capability MULTI_ROLE")
+        "USI scheduler provides support for MULTI_ROLE frameworks only. " +
+          "Please provide a MesosClient with FrameworkInfo that has capability MULTI_ROLE")
     }
-    fromFlow(client.calls, Flow.fromSinkAndSource(client.mesosSink, client.mesosSource))
+    fromFlow(client.calls, podRecordRepository, Flow.fromSinkAndSource(client.mesosSink, client.mesosSource))
   }
 
-  private def isMultiRoleFramework(frameworkInfo: FrameworkInfo): Boolean =
-    frameworkInfo.getCapabilitiesList.asScala.exists(_.getType == FrameworkInfo.Capability.Type.MULTI_ROLE)
-
   def fromFlow(
       mesosCallFactory: MesosCalls,
-      mesosFlow: Flow[MesosCall, MesosEvent, Any]): Flow[SpecInput, StateOutput, NotUsed] = {
+      podRecordRepository: PodRecordRepository,
+      mesosFlow: Flow[MesosCall, MesosEvent, Any]
+  )(implicit materializer: Materializer): Flow[SpecInput, StateOutput, NotUsed] = {
     Flow.fromGraph {
-      GraphDSL.create(unconnectedGraph(mesosCallFactory), mesosFlow)((_, _) => NotUsed) { implicit builder =>
-        { (graph, mesos) =>
-          import GraphDSL.Implicits._
+      GraphDSL.create(unconnectedGraph(mesosCallFactory, podRecordRepository), mesosFlow)((_, _) => NotUsed) {
+        implicit builder =>
+          { (graph, mesos) =>
+            import GraphDSL.Implicits._
 
-          mesos ~> graph.in2
-          graph.out2 ~> mesos
+            mesos ~> graph.in2
+            graph.out2 ~> mesos
 
-          FlowShape(graph.in1, graph.out1)
-        }
+            FlowShape(graph.in1, graph.out1)
+          }
       }
     }
   }
@@ -91,30 +106,38 @@ object Scheduler {
       rest.prepend(Source.single(snapshot))
   }
 
-  // TODO (DCOS-47476) use actual prefixAndTail and expect first event to be a Snapshot; change the prefixAndTail param from 0 value to 1, let fail, etc.
-  private val stateOutputBreakoutFlow: Flow[StateEvent, StateOutput, NotUsed] = Flow[StateEvent].prefixAndTail(0).map {
-    case (_, stateEvents) =>
+  private val stateOutputBreakoutFlow: Flow[StateEvent, StateOutput, NotUsed] = Flow[StateEvent].prefixAndTail(1).map {
+    case (Seq(snapshot), stateEvents) =>
+      val stateSnapshot = snapshot match {
+        case x: StateSnapshot => x
+        case _ => throw new IllegalStateException("First event is allowed to be only a state snapshot")
+      }
       val stateUpdates = stateEvents.map {
         case c: StateUpdated => c
         case _: StateSnapshot =>
           throw new IllegalStateException("Only the first event is allowed to be a state snapshot")
       }
-      (StateSnapshot.empty, stateUpdates)
+      (stateSnapshot, stateUpdates)
   }
 
-  def unconnectedGraph(
-      mesosCallFactory: MesosCalls): BidiFlow[SpecInput, StateOutput, MesosEvent, MesosCall, NotUsed] = {
+  private[core] def unconnectedGraph(
+      mesosCallFactory: MesosCalls,
+      podRecordRepository: PodRecordRepository
+  )(implicit materializer: Materializer): BidiFlow[SpecInput, StateOutput, MesosEvent, MesosCall, NotUsed] = {
     BidiFlow.fromGraph {
-      GraphDSL.create(new SchedulerLogicGraph(mesosCallFactory)) { implicit builder => (schedulerLogic) =>
+      GraphDSL.create(
+        new SchedulerLogicGraph(mesosCallFactory, podRecordRepository.readAll().runWith(Sink.head)),
+      ) { implicit builder => (schedulerLogic) =>
         {
           import GraphDSL.Implicits._
 
           val broadcast = builder.add(Broadcast[SchedulerEvents](2, eagerCancel = true))
           val specInputFlattening = builder.add(specInputFlatteningFlow)
           val stateOutputBreakout = builder.add(stateOutputBreakoutFlow)
 
+          val persistenceStorageFlow = builder.add(persistenceFlow(podRecordRepository))
           specInputFlattening ~> schedulerLogic.in0
-          schedulerLogic.out ~> broadcast.in
+          schedulerLogic.out ~> persistenceStorageFlow ~> broadcast.in
 
           val mesosCalls = broadcast.out(0).mapConcat { frameResult =>
             frameResult.mesosCalls
@@ -130,4 +153,36 @@ object Scheduler {
       }
     }
   }
+
+  private[core] def persistenceFlow(
+      podRecordRepository: PodRecordRepository
+  )(implicit materializer: Materializer): Flow[SchedulerEvents, SchedulerEvents, NotUsed] = {
+    Flow[SchedulerEvents].mapAsync(1) { events =>
+      val (storeRecords, deleteRecords) = events.stateEvents.collect { case x: PodRecordUpdated => x }
+        .partition(_.newRecord.isDefined)
+      if (storeRecords.isEmpty && deleteRecords.isEmpty) {
+        Future.successful(events)
+      } else {
+        val storeResult = if (storeRecords.nonEmpty) {
+          Source(storeRecords).collect { case PodRecordUpdated(_, Some(record)) => record }
+            .via(podRecordRepository.storeFlow)
+            .grouped(storeRecords.size)
+            .map(_ => Done)
+        } else Source.single(Done)
+        val deleteResult = if (deleteRecords.nonEmpty) {
+          Source(deleteRecords)
+            .map(_.id)
+            .via(podRecordRepository.deleteFlow)
+            .grouped(deleteRecords.size)
+            .map(_ => Done)
+        } else Source.single(Done)
+        Source
+          .zipWithN[Done, SchedulerEvents](_ => events)(List(storeResult, deleteResult))
+          .runWith(Sink.head) // This materialization is cheap compared to the IO operation.
+      }
+    }
+  }
+
+  private def isMultiRoleFramework(frameworkInfo: FrameworkInfo): Boolean =
+    frameworkInfo.getCapabilitiesList.asScala.exists(_.getType == FrameworkInfo.Capability.Type.MULTI_ROLE)
 }
diff --git a/core/src/main/scala/com/mesosphere/usi/core/SchedulerLogicGraph.scala b/core/src/main/scala/com/mesosphere/usi/core/SchedulerLogicGraph.scala
@@ -3,10 +3,11 @@ package com.mesosphere.usi.core
 import akka.stream.stage.{GraphStage, GraphStageLogic, InHandler, OutHandler}
 import akka.stream.{Attributes, FanInShape2, Inlet, Outlet}
 import com.mesosphere.mesos.client.MesosCalls
-import com.mesosphere.usi.core.models.SpecEvent
+import com.mesosphere.usi.core.models.{PodId, PodRecord, SpecEvent}
 import org.apache.mesos.v1.scheduler.Protos.{Event => MesosEvent}
-
 import scala.collection.mutable
+import scala.concurrent.Future
+import scala.util.{Failure, Success, Try}
 
 object SchedulerLogicGraph {
   val BUFFER_SIZE = 32
@@ -41,13 +42,16 @@ object SchedulerLogicGraph {
   * It's existence is only warranted by forecasted future needs. It's kept as a graph with an internal buffer as we will
   * likely need timers, other callbacks, and additional output ports (such as an offer event stream?).
   */
-class SchedulerLogicGraph(mesosCallFactory: MesosCalls)
+private[core] class SchedulerLogicGraph(
+    mesosCallFactory: MesosCalls,
+    initialPodRecords: => Future[Map[PodId, PodRecord]])
     extends GraphStage[FanInShape2[SpecEvent, MesosEvent, SchedulerEvents]] {
   import SchedulerLogicGraph.BUFFER_SIZE
 
-  val mesosEventsInlet = Inlet[MesosEvent]("mesos-events")
-  val specEventsInlet = Inlet[SpecEvent]("specs")
-  val frameResultOutlet = Outlet[SchedulerEvents]("effects")
+  private val mesosEventsInlet = Inlet[MesosEvent]("mesos-events")
+  private val specEventsInlet = Inlet[SpecEvent]("specs")
+  private val frameResultOutlet = Outlet[SchedulerEvents]("effects")
+
   // Define the shape of this stage, which is SourceShape with the port we defined above
   override val shape: FanInShape2[SpecEvent, MesosEvent, SchedulerEvents] =
     new FanInShape2(specEventsInlet, mesosEventsInlet, frameResultOutlet)
@@ -82,12 +86,17 @@ class SchedulerLogicGraph(mesosCallFactory: MesosCalls)
         }
       })
 
-      override def preStart(): Unit = {
-        // Start the stream
-        pull(specEventsInlet)
-        pull(mesosEventsInlet)
+      val startGraph = this.getAsyncCallback[Try[Map[PodId, PodRecord]]] {
+        case Success(initialSnapshot) =>
+          pushOrQueueIntents(handler.handlePodRecordSnapshot(initialSnapshot))
+          maybePull()
+        case Failure(ex) =>
+          this.failStage(ex)
       }
 
+      override def preStart(): Unit =
+        initialPodRecords.onComplete(startGraph.invoke)(CallerThreadExecutionContext.context)
+
       def pushOrQueueIntents(effects: SchedulerEvents): Unit = {
         if (isAvailable(frameResultOutlet)) {
           if (pendingEffects.nonEmpty) {

diff --git a/core/src/main/scala/com/mesosphere/usi/core/SchedulerLogicHandler.scala b/core/src/main/scala/com/mesosphere/usi/core/SchedulerLogicHandler.scala
@@ -2,6 +2,8 @@ package com.mesosphere.usi.core
 
 import com.mesosphere.mesos.client.MesosCalls
 import com.mesosphere.usi.core.logic.{MesosEventsLogic, SpecLogic}
+import com.mesosphere.usi.core.models.PodRecord
+import com.mesosphere.usi.core.models.StateSnapshot
 import com.mesosphere.usi.core.models.{PodId, SpecEvent}
 import org.apache.mesos.v1.scheduler.Protos.{Event => MesosEvent}
 
@@ -95,6 +97,33 @@ private[core] class SchedulerLogicHandler(mesosCallFactory: MesosCalls) {
     }
   }
 
+  /**
+    * Process a Mesos event and update internal state.
+    *
+    * @param event
+    * @return The events describing state changes as Mesos call intents
+    */
+  def handleMesosEvent(event: MesosEvent): SchedulerEvents = {
+    handleFrame { builder =>
+      builder.process { (specs, state, _) =>
+        mesosEventsLogic.processEvent(specs, state, cachedPendingLaunch.pendingLaunch)(event)
+      }
+    }
+  }
+
+  def handlePodRecordSnapshot(podRecords: Map[PodId, PodRecord]): SchedulerEvents = {
+    handleFrame { builder =>
+      builder.process { (specs, state, _) =>
+        if (state.podRecords.nonEmpty || specs.podSpecs.nonEmpty) {
+          throw new IllegalStateException(
+            s"Expected initial Scheduler state to be empty." +
+              s" Found ${state.podRecords.size} records and ${specs.podSpecs.size} statuses")
+        }
+        SchedulerEvents(stateEvents = List(StateSnapshot.empty.copy(podRecords = podRecords.values.toSeq)))
+      }
+    }
+  }
+
   /**
     * Instantiate a frameResultBuilder instance, call the handler, then follow up with housekeeping:
     *
@@ -129,20 +158,6 @@ private[core] class SchedulerLogicHandler(mesosCallFactory: MesosCalls) {
       SchedulerEvents.empty
   }
 
-  /**
-    * Process a Mesos event and update internal state.
-    *
-    * @param event
-    * @return The events describing state changes as Mesos call intents
-    */
-  def handleMesosEvent(event: MesosEvent): SchedulerEvents = {
-    handleFrame { builder =>
-      builder.process { (specs, state, _) =>
-        mesosEventsLogic.processEvent(specs, state, cachedPendingLaunch.pendingLaunch)(event)
-      }
-    }
-  }
-
   /**
     * We remove a task if it is not reachable and running, and it has no podSpec defined
     *

diff --git a/core/src/main/scala/com/mesosphere/usi/core/SchedulerState.scala b/core/src/main/scala/com/mesosphere/usi/core/SchedulerState.scala
@@ -37,7 +37,10 @@ case class SchedulerState(podRecords: Map[PodId, PodRecord], podStatuses: Map[Po
         }
       case agentRecordChange: AgentRecordUpdated => ???
       case reservationStatusChange: ReservationStatusUpdated => ???
-      case statusSnapshot: StateSnapshot => ???
+      case statusSnapshot: StateSnapshot =>
+        // TODO (DCOS-47476) Implement cache invalidation and handle snapshot fully
+        newPodRecords = statusSnapshot.podRecords
+          .foldLeft(newPodRecords)((acc, record) => acc.updated(record.podId, record))
     }
 
     copy(podRecords = newPodRecords, podStatuses = newPodStatuses)

diff --git a/core/src/test/resources/application.conf b/core/src/test/resources/application.conf
@@ -0,0 +1,16 @@
+akka {
+  stream {
+    materializer {
+      debug {
+        # Enables the fuzzing mode which increases the chance of race conditions
+        # by aggressively reordering events and making certain operations more
+        # concurrent than usual.
+        # This setting is for testing purposes, NEVER enable this in a production
+        # environment!
+        # To get the best results, try combining this setting with a throughput
+        # of 1 on the corresponding dispatchers.
+        fuzzing-mode = on
+      }
+    }
+  }
+}