Skip to content

Commit

Permalink
a
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexGilleran committed Jan 8, 2017
1 parent ddb74d0 commit cc83c20
Show file tree
Hide file tree
Showing 29 changed files with 416 additions and 232 deletions.
22 changes: 10 additions & 12 deletions .project
@@ -1,13 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>magda-metadata</name>
<buildSpec>
<buildCommand>
<name>org.scala-ide.sdt.core.scalabuilder</name>
</buildCommand>
</buildSpec>
<natures>
<nature>org.scala-ide.sdt.core.scalanature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
<linkedResources> </linkedResources>
</projectDescription>
<name>magda-metadata</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
</buildSpec>
<natures>
</natures>
</projectDescription>
9 changes: 5 additions & 4 deletions build.sbt
Expand Up @@ -7,14 +7,15 @@ lazy val commonSettings = Seq(
scalaVersion := "2.11.8"
)

//lazy val root = (project in file("."))
// .aggregate(common, searchApi, indexer)
// .settings(commonSettings: _*)
lazy val root = (project in file("."))
.aggregate(common, searchApi, indexer)
.settings(commonSettings: _*)
lazy val common = (project in file("common"))
.settings(commonSettings: _*)
lazy val searchApi = (project in file("search-api"))
.settings(commonSettings: _*)
.dependsOn(common)
.dependsOn(indexer % "test")
.enablePlugins(sbtdocker.DockerPlugin, JavaServerAppPackaging)
lazy val indexer = (project in file("indexer"))
.settings(commonSettings: _*)
Expand All @@ -41,4 +42,4 @@ variables in EditSource += ("homeDir", homeDir)
variables in EditSource += ("version", version.value)

Revolver.settings
Revolver.enableDebugging(port = 8000, suspend = false)
Revolver.enableDebugging(port = 8000, suspend = false)
1 change: 1 addition & 0 deletions common/.gitignore
@@ -0,0 +1 @@
/bin/
13 changes: 13 additions & 0 deletions common/.project
@@ -0,0 +1,13 @@
<projectDescription>
<name>magda-metadata-common</name>
<buildSpec>
<buildCommand>
<name>org.scala-ide.sdt.core.scalabuilder</name>
</buildCommand>
</buildSpec>
<natures>
<nature>org.scala-ide.sdt.core.scalanature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
<linkedResources> </linkedResources>
</projectDescription>
4 changes: 3 additions & 1 deletion common/build.sbt
Expand Up @@ -15,6 +15,8 @@ libraryDependencies ++= {
"com.monsanto.labs" %% "mwundo" % "0.1.0" exclude("xerces", "xercesImpl"),
"org.scalaz" %% "scalaz-core" % "7.2.8",

"org.scalatest" %% "scalatest" % scalaTestV % "test"
"org.scalatest" %% "scalatest" % scalaTestV % "test",
"org.scalamock" %% "scalamock-scalatest-support" % "3.2.2" % "test",
"com.sksamuel.elastic4s" %% "elastic4s-testkit" % "2.3.0" % "test"
)
}
24 changes: 17 additions & 7 deletions common/src/main/scala/au/csiro/data61/magda/AppConfig.scala
Expand Up @@ -3,13 +3,23 @@ package au.csiro.data61.magda
import com.typesafe.config._

object AppConfig {
val env: String = if (System.getenv("SCALA_ENV") == null) "local" else System.getenv("SCALA_ENV")

private val commonGeneralConf = ConfigFactory.load("common.conf", ConfigParseOptions.defaults().setAllowMissing(false), ConfigResolveOptions.defaults())
private val commonEnvConf = ConfigFactory.load("common-env-specific-config/" + env, ConfigParseOptions.defaults().setAllowMissing(false), ConfigResolveOptions.defaults())
private val appConf = ConfigFactory.load("application.conf", ConfigParseOptions.defaults().setAllowMissing(false), ConfigResolveOptions.defaults())
private val envConf = ConfigFactory.load("env-specific-config/" + env, ConfigParseOptions.defaults().setAllowMissing(false), ConfigResolveOptions.defaults())
def getEnv = if (System.getenv("SCALA_ENV") == null) "local" else System.getenv("SCALA_ENV")

/** The global config, potentially including env-custom settings for libraries like akka */
val conf = envConf.withFallback(appConf).withFallback(commonEnvConf).withFallback(commonGeneralConf)
def conf(envOption: Option[String] = None, allowUnresolved: Boolean = false) = {
val env = envOption match {
case Some(env) => env
case None => getEnv
}

val parseOptions = ConfigParseOptions.defaults().setAllowMissing(false)
val resolveOptions = ConfigResolveOptions.defaults().setAllowUnresolved(allowUnresolved)

val commonGeneralConf = ConfigFactory.load("common.conf", parseOptions, resolveOptions)
val commonEnvConf = ConfigFactory.load("common-env-specific-config/" + env, parseOptions, resolveOptions)
val appConf = ConfigFactory.load("application.conf", parseOptions, resolveOptions)
val envConf = ConfigFactory.load("env-specific-config/" + env, parseOptions, resolveOptions)

envConf.withFallback(appConf).withFallback(commonEnvConf).withFallback(commonGeneralConf)
}
}
Expand Up @@ -3,7 +3,7 @@ package au.csiro.data61.magda.spatial
import java.net.URL

import au.csiro.data61.magda.AppConfig
import com.typesafe.config.{Config, ConfigObject}
import com.typesafe.config.{ Config, ConfigObject }

import scala.collection.JavaConversions._

Expand All @@ -18,11 +18,13 @@ case class RegionSource(
order: Int)

object RegionSource {
val sources = loadFromConfig(AppConfig.conf.getConfig("regionSources"))
def generateRegionId(regionType: String, id: String) = s"${regionType}/$id".toLowerCase
}

private lazy val lookup = sources.groupBy(_.name.toLowerCase).mapValues(_.head)
class RegionSources(config: Config) {
val sources = loadFromConfig(config)

def generateRegionId(regionType: String, id: String) = s"${regionType}/$id".toLowerCase
private lazy val lookup = sources.groupBy(_.name.toLowerCase).mapValues(_.head)

def forName(name: String): Option[RegionSource] = lookup.get(name.toLowerCase)

Expand All @@ -41,4 +43,5 @@ object RegionSource {
order = regionSourceConfig.getInt("order"))
}.toSeq.filterNot(_.disabled)
}
}
}

1 change: 1 addition & 0 deletions indexer/.gitignore
@@ -0,0 +1 @@
/bin/
13 changes: 13 additions & 0 deletions indexer/.project
@@ -0,0 +1,13 @@
<projectDescription>
<name>magda-metadata-indexer</name>
<buildSpec>
<buildCommand>
<name>org.scala-ide.sdt.core.scalabuilder</name>
</buildCommand>
</buildSpec>
<natures>
<nature>org.scala-ide.sdt.core.scalanature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
<linkedResources> </linkedResources>
</projectDescription>
2 changes: 1 addition & 1 deletion indexer/src/main/resources/application.conf
Expand Up @@ -3,9 +3,9 @@ akka {
}

indexer {
readSnapshots = true
alwaysReindex = false
makeSnapshots = true
readSnapshots = true
}

indexedServices = {
Expand Down
12 changes: 5 additions & 7 deletions indexer/src/main/resources/env-specific-config/local.conf
@@ -1,5 +1,9 @@
crawler {
indexer {
maxResults = 100
alwaysReindex = false
makeSnapshots = false
readSnapshots = false
connectionRetries = 10
}

indexedServices {
Expand Down Expand Up @@ -59,10 +63,4 @@ regionSources {
STE {
disabled = false
}
}

indexer {
alwaysReindex = false
makeSnapshots = false
readSnapshots = false
}
22 changes: 15 additions & 7 deletions indexer/src/main/scala/au/csiro/data61/magda/MagdaApp.scala
@@ -1,16 +1,24 @@

package au.csiro.data61.magda

import akka.actor.{Actor, ActorLogging, ActorSystem, DeadLetter, Props}
import scala.collection.JavaConversions._
import scala.util.Failure
import scala.util.Success

import com.typesafe.config.ConfigObject
import com.typesafe.config.ConfigValue

import akka.actor.Actor
import akka.actor.ActorLogging
import akka.actor.ActorSystem
import akka.actor.DeadLetter
import akka.actor.Props
import akka.event.Logging
import akka.stream.ActorMaterializer
import au.csiro.data61.magda.crawler.Crawler
import au.csiro.data61.magda.external.InterfaceConfig
import au.csiro.data61.magda.search.SearchIndexer
import com.typesafe.config.{ConfigObject, ConfigValue}

import scala.collection.JavaConversions._
import scala.util.{Failure, Success}
import au.csiro.data61.magda.search.elasticsearch.DefaultClientProvider

object MagdaApp extends App {
implicit val system = ActorSystem()
Expand All @@ -34,8 +42,8 @@ object MagdaApp extends App {
// system.scheduler.schedule(0 millis, 1 days, supervisor, Start(List((ExternalInterfaceType.CKAN, new URL(config.getString("services.dga-api.baseUrl"))))))

logger.debug("Starting Crawler")
val indexer = SearchIndexer(system, system.dispatcher, materializer)
val crawler = new Crawler(system, config, interfaceConfigs, materializer, indexer)
val indexer = SearchIndexer(new DefaultClientProvider, config)
val crawler = new Crawler(system, config.getConfig("indexer"), interfaceConfigs, materializer, indexer)

crawler.crawl() onComplete {
case Success(_) =>
Expand Down
Expand Up @@ -28,7 +28,7 @@ class Crawler(system: ActorSystem, config: Config, val externalInterfaces: Seq[I
def crawl() = {
externalInterfaces
.map { interface =>
val needsIndexingFuture = if (AppConfig.conf.getBoolean("indexer.alwaysReindex")) {
val needsIndexingFuture = if (config.getBoolean("indexer.alwaysReindex")) {
log.info("Indexing {} because indexer.alwaysReindex is true", interface.name)
Future(true)
} else {
Expand Down Expand Up @@ -71,7 +71,7 @@ class Crawler(system: ActorSystem, config: Config, val externalInterfaces: Seq[I
.map { size =>
if (size > 0) {
log.info("Indexed {} datasets", size)
if (config.getBoolean("indexer.makeSnapshots")) {
if (config.getBoolean("crawler.makeSnapshots")) {
log.info("Snapshotting...")
indexer.snapshot()
}
Expand All @@ -91,7 +91,7 @@ class Crawler(system: ActorSystem, config: Config, val externalInterfaces: Seq[I
Source.fromFuture(interface.getTotalDataSetCount())
.mapConcat { count =>
log.info("{} has {} datasets", interfaceDef.baseUrl, count)
val maxFromConfig = if (AppConfig.conf.hasPath("crawler.maxResults")) AppConfig.conf.getLong("crawler.maxResults") else Long.MaxValue
val maxFromConfig = if (config.hasPath("maxResults")) config.getLong("maxResults") else Long.MaxValue
createBatches(interfaceDef, 0, Math.min(maxFromConfig, count))
}
.throttle(1, 1 second, 1, ThrottleMode.Shaping)
Expand Down
Expand Up @@ -2,11 +2,13 @@ package au.csiro.data61.magda.search

import au.csiro.data61.magda.model.misc._

import scala.concurrent.{ExecutionContext, Future}
import scala.concurrent.{ ExecutionContext, Future }
import akka.stream.Materializer
import akka.actor.ActorSystem
import au.csiro.data61.magda.external.InterfaceConfig
import au.csiro.data61.magda.search.elasticsearch.ElasticSearchIndexer
import au.csiro.data61.magda.search.elasticsearch.ClientProvider
import com.typesafe.config.Config

trait SearchIndexer {
def index(source: InterfaceConfig, dataSets: List[DataSet]): Future[Any]
Expand All @@ -15,5 +17,6 @@ trait SearchIndexer {
}

object SearchIndexer {
def apply(implicit system: ActorSystem, ec: ExecutionContext, materializer: Materializer) = new ElasticSearchIndexer()
def apply(clientProvider: ClientProvider, config: Config)(implicit system: ActorSystem, ec: ExecutionContext, materializer: Materializer) =
new ElasticSearchIndexer(clientProvider, config, system, ec, materializer)
}

0 comments on commit cc83c20

Please sign in to comment.