Skip to content

Commit 2bc7f66

Browse files
committed
doc: add readme and example usages
1 parent 8e0e688 commit 2bc7f66

File tree

9 files changed

+371
-17
lines changed

9 files changed

+371
-17
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,11 @@ jobs:
8787

8888
- name: Make target directories
8989
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v'))
90-
run: mkdir -p testing/.jvm/target target .js/target prometheus/.jvm/target core/.jvm/target .jvm/target .native/target project/target
90+
run: mkdir -p testing/.jvm/target target .js/target site/target prometheus/.jvm/target core/.jvm/target .jvm/target .native/target project/target
9191

9292
- name: Compress target directories
9393
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v'))
94-
run: tar cf targets.tar testing/.jvm/target target .js/target prometheus/.jvm/target core/.jvm/target .jvm/target .native/target project/target
94+
run: tar cf targets.tar testing/.jvm/target target .js/target site/target prometheus/.jvm/target core/.jvm/target .jvm/target .native/target project/target
9595

9696
- name: Upload target directories
9797
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v'))

build.sbt

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ ThisBuild / tlBaseVersion := "0.4" // your current series x.y
55

66
ThisBuild / organization := "no.nrk.bigquery"
77
ThisBuild / organizationName := "NRK"
8+
ThisBuild / organizationHomepage := Some(new URL("https://nrk.no"))
89
ThisBuild / startYear := Some(2020)
910
ThisBuild / licenses := Seq(License.Apache2)
1011
ThisBuild / developers := List(
@@ -88,7 +89,7 @@ val commonSettings = Seq(
8889

8990
lazy val root = tlCrossRootProject
9091
.settings(name := "bigquery-scala")
91-
.aggregate(core, testing, prometheus)
92+
.aggregate(core, testing, prometheus, docs)
9293
.disablePlugins(TypelevelCiSigningPlugin, Sonatype, SbtGpg)
9394

9495
lazy val core = crossProject(JVMPlatform)
@@ -170,4 +171,16 @@ lazy val testing = crossProject(JVMPlatform)
170171
)
171172
.disablePlugins(TypelevelCiSigningPlugin, Sonatype, SbtGpg)
172173

173-
//lazy val docs = project.in(file("site")).enablePlugins(TypelevelSitePlugin)
174+
lazy val docs = project
175+
.in(file("site"))
176+
// .enablePlugins(TypelevelSitePlugin)
177+
.enablePlugins(MdocPlugin, NoPublishPlugin)
178+
.disablePlugins(TypelevelCiSigningPlugin, Sonatype, SbtGpg)
179+
.dependsOn(core.jvm, testing.jvm)
180+
.settings(
181+
compile := {
182+
val result = (Compile / compile).value
183+
mdoc.toTask("").value
184+
result
185+
}
186+
)

docs/example_query.md

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# Query
2+
3+
## Table Schemas
4+
5+
To start of we need to define some tables we can query. The schema DSL is inspired by the BigQuery table json definition.
6+
7+
Here we have to tables, `my-gcp-project.prod.user_log` and `my-gcp-project.prod.users`
8+
9+
```scala mdoc
10+
import com.google.cloud.bigquery.Field.Mode
11+
import com.google.cloud.bigquery.StandardSQLTypeName
12+
import no.nrk.bigquery._
13+
import no.nrk.bigquery.implicits._
14+
import java.time.LocalDate
15+
16+
object Schemas {
17+
18+
object UserEventSchema {
19+
private val timestamp: BQField = BQField("timestamp", StandardSQLTypeName.TIMESTAMP, Mode.REQUIRED)
20+
val tableDef: BQTableDef.Table[LocalDate] = BQTableDef.Table(
21+
BQTableId(BQDataset(ProjectId("my-gcp-project"), "prod", Some(LocationId("eu"))), "user_log"),
22+
BQSchema.of(
23+
BQField("eventId", StandardSQLTypeName.STRING, Mode.REQUIRED),
24+
timestamp,
25+
BQField("userId", StandardSQLTypeName.STRING, Mode.REQUIRED),
26+
BQField.struct("activity", Mode.REQUIRED)(
27+
BQField("type", StandardSQLTypeName.INT64, Mode.REQUIRED),
28+
BQField("value", StandardSQLTypeName.STRING, Mode.NULLABLE)
29+
)
30+
),
31+
BQPartitionType.DatePartitioned(timestamp.ident)
32+
)
33+
}
34+
35+
object UserSchema {
36+
private val namesStruct: BQField = BQField.struct("names", Mode.REQUIRED)(
37+
BQField("firstName", StandardSQLTypeName.INT64, Mode.REQUIRED),
38+
BQField("middleName", StandardSQLTypeName.STRING, Mode.NULLABLE),
39+
BQField("lastName", StandardSQLTypeName.STRING, Mode.REQUIRED)
40+
)
41+
val tableDef: BQTableDef.Table[Unit] = BQTableDef.Table(
42+
BQTableId(BQDataset(ProjectId("my-gcp-project"), "prod", Some(LocationId("eu"))), "users"),
43+
BQSchema.of(
44+
BQField("userId", StandardSQLTypeName.STRING, Mode.REQUIRED),
45+
namesStruct
46+
),
47+
BQPartitionType.NotPartitioned
48+
)
49+
50+
val fullNameUdf: UDF = UDF(
51+
Ident("toFullName"),
52+
UDF.Param.fromField(namesStruct) :: Nil,
53+
UDF.Body.Sql(
54+
bqfr"""(names.firstName || ' ' || coalesce(' ' || names.middleName) || ' ' || names.lastName)""".stripMargin
55+
),
56+
Some(BQType.STRING)
57+
)
58+
59+
}
60+
}
61+
```
62+
63+
Now we can use the schema definitions to write up a query.
64+
65+
## Construct a query
66+
67+
In this example we have one table that uses daily partition where it's joined with a unpartition table. Note that
68+
we do not need to do any escaping or formatting of the values. Even user-defined-functions (UDF) will be included if we
69+
reference them.
70+
71+
```scala mdoc
72+
import no.nrk.bigquery._
73+
import Schemas._
74+
import no.nrk.bigquery.implicits._
75+
import java.time.LocalDate
76+
77+
object UserEventQuery {
78+
79+
def daily(day: LocalDate): BQQuery[UserActivityRow] = BQQuery(
80+
bqfr"""|select
81+
| event.userId,
82+
| ${UserSchema.fullNameUdf(bqfr"user.names")} as name,
83+
| array_agg(event.activity) as activities
84+
|from ${UserEventSchema.tableDef.assertPartition(day)} event
85+
|join ${UserSchema.tableDef.unpartitioned} user on user.userId = event.userId
86+
|group by 1, 2
87+
|""".stripMargin
88+
)
89+
90+
case class Activity(
91+
tpe: Long,
92+
value: Option[String]
93+
)
94+
95+
object Activity {
96+
implicit val read: BQRead[Activity] = BQRead.derived
97+
}
98+
99+
case class UserActivityRow(
100+
userId: String,
101+
name: String,
102+
activities: List[Activity]
103+
)
104+
105+
object UserActivityRow {
106+
implicit val read: BQRead[UserActivityRow] = BQRead.derived
107+
}
108+
}
109+
```
110+
111+
## Testing
112+
113+
Given the schema definition and the SQL query above we can render the queries that BiqQuery can validate for us. The result
114+
will be cached in a `generated` folder that should be checked into version control. The test framework checks the rendered
115+
version against the generated folder to determine the test it need to rerun using BigQuery. This make it possible to quickly
116+
run all tests without getting in to issues like api quotas or cost issued.
117+
118+
```scala mdoc
119+
import no.nrk.bigquery.testing.{BQSmokeTest, BigQueryTestClient}
120+
import java.time.LocalDate
121+
122+
class UserEventQueryTest extends BQSmokeTest(BigQueryTestClient.testClient) {
123+
124+
bqCheckTest("user-events-query") {
125+
UserEventQuery.daily(LocalDate.now())
126+
}
127+
}
128+
```

docs/example_udf.md

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# User defined functions (UDF)
2+
3+
## Defining UDF
4+
5+
BigQuery supports UDF written in SQL and JavaScript.
6+
7+
**SQL**
8+
```scala mdoc
9+
import no.nrk.bigquery._
10+
import no.nrk.bigquery.implicits._
11+
12+
object MySQLUdfs {
13+
14+
val addOneUdf = UDF(
15+
Ident("addOneSqlUdf"),
16+
Seq(UDF.Param("n", BQType.FLOAT64)),
17+
UDF.Body.Sql(bqfr"""(n + 1)"""),
18+
Some(BQType.FLOAT64)
19+
)
20+
21+
}
22+
```
23+
24+
**JavaScript**
25+
```scala mdoc
26+
import no.nrk.bigquery._
27+
28+
object MyJsUdfs {
29+
30+
val addOneUdf = UDF(
31+
Ident("addOneJsUdf"),
32+
Seq(UDF.Param("n", BQType.FLOAT64)),
33+
UDF.Body.Js("return n + 1", None),
34+
Some(BQType.FLOAT64)
35+
)
36+
37+
}
38+
```
39+
40+
41+
## Calling UDF in queries
42+
43+
Like any other function we can call UDFs by passing in the required arguments. The library will inline the UDF as an
44+
temporary function if it's referenced in a query.
45+
46+
```scala mdoc
47+
import no.nrk.bigquery._
48+
import no.nrk.bigquery.implicits._
49+
50+
val myQuery: BQSqlFrag =
51+
bqfr"""|select
52+
| ${MySQLUdfs.addOneUdf(bqfr"n")} as sql,
53+
| ${MyJsUdfs.addOneUdf(bqfr"n")} as js
54+
|from unnest([1 ,2, 3]) as n
55+
|""".stripMargin
56+
```
57+
58+
## Testing
59+
60+
```scala mdoc
61+
import io.circe.Json
62+
import no.nrk.bigquery.testing.{BQUdfSmokeTest, BigQueryTestClient}
63+
64+
class ExampleUdfTest extends BQUdfSmokeTest(BigQueryTestClient.testClient) {
65+
66+
bqCheckCall("add one to SQL udf", MySQLUdfs.addOneUdf(1), Json.fromInt(2))
67+
bqCheckCall("add one to JS udf", MyJsUdfs.addOneUdf(1), Json.fromInt(2))
68+
69+
}
70+
```

docs/example_view.md

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# View
2+
3+
## Table Schemas
4+
5+
To start of we need to define some tables we can query. The schema DSL is inspired by the BigQuery table json definition.
6+
7+
Here we have to tables, `my-gcp-project.prod.user_log` and `my-gcp-project.prod.users`
8+
9+
```scala mdoc
10+
import com.google.cloud.bigquery.Field.Mode
11+
import com.google.cloud.bigquery.StandardSQLTypeName
12+
import no.nrk.bigquery._
13+
import no.nrk.bigquery.implicits._
14+
import java.time.LocalDate
15+
16+
object Schemas {
17+
18+
object UserEventSchema {
19+
private val timestamp: BQField = BQField("timestamp", StandardSQLTypeName.TIMESTAMP, Mode.REQUIRED)
20+
val tableDef: BQTableDef.Table[LocalDate] = BQTableDef.Table(
21+
BQTableId(BQDataset(ProjectId("my-gcp-project"), "prod", Some(LocationId("eu"))), "user_log"),
22+
BQSchema.of(
23+
BQField("eventId", StandardSQLTypeName.STRING, Mode.REQUIRED),
24+
timestamp,
25+
BQField("userId", StandardSQLTypeName.STRING, Mode.REQUIRED),
26+
BQField.struct("activity", Mode.REQUIRED)(
27+
BQField("type", StandardSQLTypeName.INT64, Mode.REQUIRED),
28+
BQField("value", StandardSQLTypeName.STRING, Mode.NULLABLE)
29+
)
30+
),
31+
BQPartitionType.DatePartitioned(timestamp.ident)
32+
)
33+
}
34+
35+
object UserSchema {
36+
private val namesStruct: BQField = BQField.struct("names", Mode.REQUIRED)(
37+
BQField("firstName", StandardSQLTypeName.INT64, Mode.REQUIRED),
38+
BQField("middleName", StandardSQLTypeName.STRING, Mode.NULLABLE),
39+
BQField("lastName", StandardSQLTypeName.STRING, Mode.REQUIRED)
40+
)
41+
val tableDef: BQTableDef.Table[Unit] = BQTableDef.Table(
42+
BQTableId(BQDataset(ProjectId("my-gcp-project"), "prod", Some(LocationId("eu"))), "users"),
43+
BQSchema.of(
44+
BQField("userId", StandardSQLTypeName.STRING, Mode.REQUIRED),
45+
namesStruct
46+
),
47+
BQPartitionType.NotPartitioned
48+
)
49+
50+
val fullNameUdf: UDF = UDF(
51+
Ident("toFullName"),
52+
UDF.Param.fromField(namesStruct) :: Nil,
53+
UDF.Body.Sql(
54+
bqfr"""(names.firstName || ' ' || coalesce(' ' || names.middleName) || ' ' || names.lastName)""".stripMargin
55+
),
56+
Some(BQType.STRING)
57+
)
58+
59+
}
60+
}
61+
```
62+
63+
Now we can use the schema definitions to write up a query.
64+
65+
## Construct a view
66+
67+
In this example we join in the user names and normalize the struct values.
68+
69+
```scala mdoc
70+
import no.nrk.bigquery._
71+
import Schemas._
72+
import com.google.cloud.bigquery.Field.Mode
73+
import com.google.cloud.bigquery.StandardSQLTypeName
74+
import no.nrk.bigquery.implicits._
75+
76+
object UserEventView {
77+
78+
val query: BQSqlFrag =
79+
bqfr"""|select
80+
| event.timestamp,
81+
| event.userId,
82+
| (user.names.firstName || ' ' || user.names.lastName) as fullName,
83+
| event.activity.type as activityType,
84+
| event.activity.value as activityValue
85+
|from ${UserEventSchema.tableDef.unpartitioned} event
86+
|join ${UserSchema.tableDef.unpartitioned} user on user.userId = event.userId
87+
|where event.activity.value is not null
88+
|""".stripMargin
89+
90+
private val timestamp: BQField = BQField("timestamp", StandardSQLTypeName.TIMESTAMP, Mode.REQUIRED)
91+
92+
val viewDef: BQTableDef.View[LocalDate] = BQTableDef.View(
93+
BQTableId(BQDataset(ProjectId("my-gcp-project"), "prod", Some(LocationId("eu"))), "user_activity_view"),
94+
BQPartitionType.DatePartitioned(timestamp.ident),
95+
query,
96+
BQSchema.of(
97+
timestamp,
98+
BQField("userId", StandardSQLTypeName.STRING, Mode.REQUIRED),
99+
BQField("fullName", StandardSQLTypeName.STRING, Mode.REQUIRED),
100+
BQField("activityType", StandardSQLTypeName.INT64, Mode.REQUIRED),
101+
BQField("activityValue", StandardSQLTypeName.STRING, Mode.REQUIRED)
102+
)
103+
)
104+
105+
}
106+
107+
```
108+
109+
## Testing
110+
111+
Given the view definition and the SQL query above we can render the queries that BiqQuery can validate for us. The result
112+
will be cached in a `generated` folder that should be checked into version control. The test framework checks the rendered
113+
version against the generated folder to determine the test it need to rerun using BigQuery. This make it possible to quickly
114+
run all tests without getting in to issues like api quotas or cost issued.
115+
116+
```scala mdoc
117+
import no.nrk.bigquery.testing.{BQSmokeTest, BigQueryTestClient}
118+
119+
class UserEventViewTest extends BQSmokeTest(BigQueryTestClient.testClient) {
120+
121+
bqCheckViewTest("user-event-view", UserEventView.viewDef)
122+
123+
}
124+
```

docs/index.md

Lines changed: 0 additions & 13 deletions
This file was deleted.

0 commit comments

Comments
 (0)