Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix utf8 handling when run in docker #980

Merged
merged 2 commits into from
Mar 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,24 @@ lazy val root = (project in file("."))
val artifactTargetPath = s"/app/${artifact.name}"

new Dockerfile {
from(s"--platform=$PLATFORM ubuntu:jammy-20221020")
from(s"--platform=$PLATFORM ubuntu:jammy-20230308")
runRaw(
List(
"sed -i -e 's/archive\\.ubuntu\\.com/mirror\\.facebook\\.net/g' /etc/apt/sources.list",
"sed -i -e 's/security\\.ubuntu\\.com/mirror\\.facebook\\.net/g' /etc/apt/sources.list",
"apt-get update",
"apt-get install -y --no-install-recommends openjdk-17-jdk-headless htop procps curl inetutils-ping libgomp1",
"apt-get install -y --no-install-recommends openjdk-17-jdk-headless htop procps curl inetutils-ping libgomp1 locales",
"sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen",
"rm -rf /var/lib/apt/lists/*"
).mkString(" && ")
)
env(
Map(
"LANG" -> "en_US.UTF-8 ",
"LANGUAGE" -> "en_US:en ",
"LC_ALL" -> "en_US.UTF-8 "
)
)
add(new File("deploy/metarank.sh"), "/metarank.sh")
add(artifact, artifactTargetPath)
entryPoint("/metarank.sh")
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/ai/metarank/config/BoosterConfig.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ object BoosterConfig {
learningRate: Double = 0.1,
ndcgCutoff: Int = 10,
maxDepth: Int = 8,
seed: Int = Random.nextInt(Int.MaxValue),
seed: Int = 0,
numLeaves: Int = 16,
sampling: Double = 0.8
) extends BoosterConfig
Expand All @@ -31,7 +31,7 @@ object BoosterConfig {
learningRate: Double = 0.1,
ndcgCutoff: Int = 10,
maxDepth: Int = 8,
seed: Int = Random.nextInt(Int.MaxValue),
seed: Int = 0,
sampling: Double = 0.8
) extends BoosterConfig

Expand Down
3 changes: 3 additions & 0 deletions src/test/resources/japanese.json
Git LFS file not shown
26 changes: 26 additions & 0 deletions src/test/scala/ai/metarank/main/AutofeatureTest.scala
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package ai.metarank.main

import ai.metarank.config.BoosterConfig.XGBoostConfig
import ai.metarank.config.Config
import ai.metarank.feature.NumberFeature.NumberFeatureSchema
import ai.metarank.feature.StringFeature.StringFeatureSchema
import ai.metarank.main.CliArgs.AutoFeatureArgs
import ai.metarank.main.command.AutoFeature
import ai.metarank.main.command.autofeature.ConfigMirror
Expand Down Expand Up @@ -53,4 +55,28 @@ class AutofeatureTest extends AnyFlatSpec with Matchers {
val x2 = AutoFeature.yamlFormat.pretty(conf.asJson)
x1 shouldBe x2
}

it should "correctly export japanese" in {
val conf = ConfigMirror(
features = List(
StringFeatureSchema(
name = FeatureName("foo"),
field = FieldName(Item, "foo"),
scope = ItemScopeType,
values = NonEmptyList.of("メイズ", "オリジナル")
)
),
models = Map(
"default" -> LambdaMARTConfig(
backend = XGBoostConfig(iterations = 50),
features = NonEmptyList.of(FeatureName("foo")),
weights = Map("click" -> 1.0)
)
)
)
val generated = AutoFeature.yamlFormat.pretty(conf.asJson)
val parsed = Config.load(generated, Map.empty).unsafeRunSync()
parsed.features shouldBe conf.features
parsed.models shouldBe conf.models
}
}
14 changes: 14 additions & 0 deletions src/test/scala/ai/metarank/model/EventJsonTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package ai.metarank.model
import ai.metarank.model.Event.{InteractionEvent, ItemEvent, RankItem, RankingEvent}
import ai.metarank.model.Field.{BooleanField, NumberField, StringField, StringListField}
import ai.metarank.model.Identifier.{ItemId, SessionId, UserId}
import better.files.Resource
import cats.data.NonEmptyList
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers
Expand Down Expand Up @@ -157,4 +158,17 @@ class EventJsonTest extends AnyFlatSpec with Matchers {
decode[Timestamp]("\"123\"") shouldBe Right(Timestamp(123L))
decode[Timestamp]("\"2022-06-22T11:21:39Z\"") shouldBe Right(Timestamp(1655896899000L))
}

it should "decode japanese in field values" in {
val json = Resource.my.getAsString("/japanese.json")
val decoded = decode[Event](json)
decoded shouldBe Right(
ItemEvent(
id = EventId("beb21c70-b3ef-4fc8-9ded-e9e93707371a"),
timestamp = Timestamp(1679007381000L),
item = ItemId("a071m00000370MKAAY"),
fields = List(StringField("maker", "メイズ オリジナル"))
)
)
}
}