Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: added trino and spark test #3525

Merged
merged 12 commits into from
Jul 13, 2023
11 changes: 9 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,11 @@ require (
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/go-uuid v1.0.3 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/heetch/avro v0.4.4 // indirect
github.com/imdario/mergo v0.3.12 // indirect
github.com/imdario/mergo v0.3.13 // indirect
github.com/jcmturner/gofork v1.7.6 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect
github.com/klauspost/asmfmt v1.3.2 // indirect
Expand Down Expand Up @@ -245,10 +247,11 @@ require (
github.com/tidwall/pretty v1.2.0 // indirect
github.com/tklauser/go-sysconf v0.3.11 // indirect
github.com/tklauser/numcpus v0.6.0 // indirect
github.com/trinodb/trino-go-client v0.312.0
github.com/xdg-go/pbkdf2 v1.0.0 // indirect
github.com/xdg-go/scram v1.1.2 // indirect
github.com/xdg-go/stringprep v1.0.4 // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
github.com/xeipuuv/gojsonschema v1.2.0 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
Expand Down Expand Up @@ -283,6 +286,10 @@ require (
google.golang.org/genproto/googleapis/api v0.0.0-20230530153820-e85fd2cbaebc // indirect
gopkg.in/alexcesaro/statsd.v2 v2.0.0 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/jcmturner/aescts.v1 v1.0.1 // indirect
gopkg.in/jcmturner/dnsutils.v1 v1.0.1 // indirect
gopkg.in/jcmturner/gokrb5.v6 v6.1.1 // indirect
gopkg.in/jcmturner/rpc.v1 v1.1.0 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
Expand Down
18 changes: 16 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1335,6 +1335,8 @@ github.com/hashicorp/go-uuid v0.0.0-20180228145832-27454136f036/go.mod h1:6SBZvO
github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8=
github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-version v1.2.1/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
Expand All @@ -1360,8 +1362,9 @@ github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w=
github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/imdario/mergo v0.3.12 h1:b6R2BslTbIEToALKP7LxUvijTsNI9TAe80pLWN2g/HU=
github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/imdario/mergo v0.3.13 h1:lFzP57bqS/wsqKssCGmtLAb8A0wKjLGrve2q3PPVcBk=
github.com/imdario/mergo v0.3.13/go.mod h1:4lJ1jqUDcsbIECGy0RUJAXNIhg+6ocWgb1ALK2O4oXg=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/intel/goresctrl v0.2.0/go.mod h1:+CZdzouYFn5EsxgqAQTEzMfwKwuc0fVdMrT9FCCAVRQ=
github.com/invopop/jsonschema v0.7.0/go.mod h1:O9uiLokuu0+MGFlyiaqtWxwqJm41/+8Nj0lD7A36YH0=
Expand Down Expand Up @@ -1409,6 +1412,8 @@ github.com/jackc/puddle v1.2.1/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dv
github.com/jawher/mow.cli v1.0.4/go.mod h1:5hQj2V8g+qYmLUVWqu4Wuja1pI57M83EChYLVZ0sMKk=
github.com/jawher/mow.cli v1.2.0/go.mod h1:y+pcA3jBAdo/GIZx/0rFjw/K2bVEODP9rfZOfaiq8Ko=
github.com/jcmturner/gofork v0.0.0-20180107083740-2aebee971930/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o=
github.com/jcmturner/gofork v1.7.6 h1:QH0l3hzAU1tfT3rZCnW5zXl+orbkNMMRGJfdJjHVETg=
github.com/jcmturner/gofork v1.7.6/go.mod h1:1622LH6i/EZqLloHfE7IeZ0uEJwMSUyQ/nDd82IeqRo=
github.com/jeremywohl/flatten v1.0.1 h1:LrsxmB3hfwJuE+ptGOijix1PIfOoKLJ3Uee/mzbgtrs=
github.com/jeremywohl/flatten v1.0.1/go.mod h1:4AmD/VxjWcI5SRB0n6szE2A6s2fsNHDLO0nAlMHgfLQ=
github.com/jhump/gopoet v0.0.0-20190322174617-17282ff210b3/go.mod h1:me9yfT6IJSlOL3FCfrg+L6yzUEZ+5jW6WHt4Sk+UPUI=
Expand Down Expand Up @@ -1855,6 +1860,8 @@ github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7Am
github.com/tklauser/numcpus v0.6.0 h1:kebhY2Qt+3U6RNK7UqpYNA+tJ23IBEGKkB7JQBfDYms=
github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4=
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/trinodb/trino-go-client v0.312.0 h1:O7bfMFCrt8pWChx1NAJEYwTVlwKjwGDZ+ZaMHRzk0O0=
github.com/trinodb/trino-go-client v0.312.0/go.mod h1:yI2MwYvoFWr4Xb9BCBUEbth9eXrnqGw9CtpL6ae6Wns=
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
Expand All @@ -1876,8 +1883,9 @@ github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY=
github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4=
github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8=
github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
Expand Down Expand Up @@ -2673,10 +2681,16 @@ gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/ini.v1 v1.66.6/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/jcmturner/aescts.v1 v1.0.1 h1:cVVZBK2b1zY26haWB4vbBiZrfFQnfbTVrE3xZq6hrEw=
gopkg.in/jcmturner/aescts.v1 v1.0.1/go.mod h1:nsR8qBOg+OucoIW+WMhB3GspUQXq9XorLnQb9XtvcOo=
gopkg.in/jcmturner/dnsutils.v1 v1.0.1 h1:cIuC1OLRGZrld+16ZJvvZxVJeKPsvd5eUIvxfoN5hSM=
gopkg.in/jcmturner/dnsutils.v1 v1.0.1/go.mod h1:m3v+5svpVOhtFAP/wSz+yzh4Mc0Fg7eRhxkJMWSIz9Q=
gopkg.in/jcmturner/goidentity.v3 v3.0.0 h1:1duIyWiTaYvVx3YX2CYtpJbUFd7/UuPYCfgXtQ3VTbI=
gopkg.in/jcmturner/goidentity.v3 v3.0.0/go.mod h1:oG2kH0IvSYNIu80dVAyu/yoefjq1mNfM5bm88whjWx4=
gopkg.in/jcmturner/gokrb5.v6 v6.1.1 h1:n0KFjpbuM5pFMN38/Ay+Br3l91netGSVqHPHEXeWUqk=
gopkg.in/jcmturner/gokrb5.v6 v6.1.1/go.mod h1:NFjHNLrHQiruory+EmqDXCGv6CrjkeYeA+bR9mIfNFk=
gopkg.in/jcmturner/gokrb5.v7 v7.3.0/go.mod h1:l8VISx+WGYp+Fp7KRbsiUuXTTOnxIc3Tuvyavf11/WM=
gopkg.in/jcmturner/rpc.v1 v1.1.0 h1:QHIUxTX1ISuAv9dD2wJ9HWQVuWDX/Zc0PfeC2tjc4rU=
gopkg.in/jcmturner/rpc.v1 v1.1.0/go.mod h1:YIdkC4XfD6GXbzje11McwsDuOlZQSb9W4vfLvuNnlv8=
gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA=
gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k=
Expand Down
153 changes: 152 additions & 1 deletion warehouse/integrations/datalake/datalake_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package datalake_test

import (
"context"
"database/sql"
"encoding/json"
"fmt"
"os"
Expand Down Expand Up @@ -33,6 +34,8 @@ import (
backendconfig "github.com/rudderlabs/rudder-server/backend-config"

warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils"

_ "github.com/trinodb/trino-go-client/trino"
)

type gcsTestCredentials struct {
Expand Down Expand Up @@ -67,7 +70,13 @@ func TestIntegration(t *testing.T) {
t.Skip("Skipping tests. Add 'SLOW=1' env var to run test.")
}

c := testcompose.New(t, compose.FilePaths([]string{"testdata/docker-compose.yml", "../testdata/docker-compose.jobsdb.yml", "../testdata/docker-compose.minio.yml"}))
c := testcompose.New(t, compose.FilePaths([]string{
"testdata/docker-compose.yml",
"testdata/docker-compose.trino.yml",
"testdata/docker-compose.spark.yml",
"../testdata/docker-compose.jobsdb.yml",
"../testdata/docker-compose.minio.yml",
}))
c.Start(context.Background())

misc.Init()
Expand All @@ -78,6 +87,7 @@ func TestIntegration(t *testing.T) {
jobsDBPort := c.Port("jobsDb", 5432)
minioPort := c.Port("minio", 9000)
azurePort := c.Port("azure", 10000)
trinoPort := c.Port("trino", 8080)

httpPort, err := kithelper.GetFreePort()
require.NoError(t, err)
Expand Down Expand Up @@ -407,4 +417,145 @@ func TestIntegration(t *testing.T) {
}
testhelper.VerifyConfigurationTest(t, dest)
})

t.Run("Trino", func(t *testing.T) {
dsn := fmt.Sprintf("http://user@localhost:%d?catalog=minio&schema=default&session_properties=minio.parquet_use_column_index=true",
trinoPort,
)
db, err := sql.Open("trino", dsn)
require.NoError(t, err)

require.Eventually(t, func() bool {
_, err := db.ExecContext(ctx, "SELECT 1")
return err == nil
}, 60*time.Second, 1*time.Second)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could try more often, we don't even have log pollution here. Every 100ms for example could work.


require.NoError(t, testhelper.WithConstantRetries(func() error {
_, err = db.ExecContext(ctx, `
CREATE SCHEMA IF NOT EXISTS minio.rudderstack WITH (
location = 's3a://`+s3BucketName+`/')
`)
return err
}))

require.NoError(t, testhelper.WithConstantRetries(func() error {
_, err = db.ExecContext(ctx, `
CREATE TABLE IF NOT EXISTS minio.rudderstack.tracks (
"_timestamp" TIMESTAMP,
context_destination_id VARCHAR,
context_destination_type VARCHAR,
context_ip VARCHAR,
context_library_name VARCHAR,
context_passed_ip VARCHAR,
context_request_ip VARCHAR,
context_source_id VARCHAR,
context_source_type VARCHAR,
event VARCHAR,
event_text VARCHAR,
id VARCHAR,
original_timestamp TIMESTAMP,
received_at TIMESTAMP,
sent_at TIMESTAMP,
"timestamp" TIMESTAMP,
user_id VARCHAR,
uuid_ts TIMESTAMP
)
WITH (
external_location = 's3a://`+s3BucketName+`/some-prefix/rudder-datalake/s_3_datalake_integration/tracks/2023/05/12/04/',
format = 'PARQUET'
)
`)
return err
}))

var count int64

require.NoError(t, testhelper.WithConstantRetries(func() error {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest using require.Eventually with a short interval here as well, for sure one that is a fraction of a second. Same applies for others below.

return db.QueryRowContext(ctx, `
select
count(*)
from
minio.rudderstack.tracks
`).Scan(&count)
}))
require.Equal(t, int64(8), count)

require.NoError(t, testhelper.WithConstantRetries(func() error {
return db.QueryRowContext(ctx, `
select
count(*)
from
minio.rudderstack.tracks
where
context_destination_id = '`+s3DestinationID+`'
`).Scan(&count)
}))
require.Equal(t, int64(8), count)
})

t.Run("Spark", func(t *testing.T) {
_ = c.Exec(ctx,
"spark-master",
"spark-sql",
"-e",
`
CREATE EXTERNAL TABLE tracks (
_timestamp timestamp,
context_destination_id string,
context_destination_type string,
context_ip string,
context_library_name string,
context_passed_ip string,
context_request_ip string,
context_source_id string,
context_source_type string,
event string,
event_text string, id string,
original_timestamp timestamp,
received_at timestamp,
sent_at timestamp,
timestamp timestamp,
user_id string,
uuid_ts timestamp
)
STORED AS PARQUET
location "s3a://s3-datalake-test/some-prefix/rudder-datalake/s_3_datalake_integration/tracks/2023/05/12/04/";
`,
"-S",
)

countOutput := c.Exec(ctx,
"spark-master",
"spark-sql",
"-e",
`
select
count(*)
from
tracks;
`,
"-S",
)
countOutput = strings.ReplaceAll(strings.ReplaceAll(countOutput, "\n", ""), "\r", "") // remove trailing newline
require.NotEmpty(t, countOutput)
require.Equal(t, string(countOutput[len(countOutput)-1]), "8", countOutput) // last character is the count

filteredCountOutput := c.Exec(ctx,
"spark-master",
"spark-sql",
"-e",
`
select
count(*)
from
tracks
where
context_destination_id = '`+s3DestinationID+`';
`,
"-S",
)
filteredCountOutput = strings.ReplaceAll(strings.ReplaceAll(filteredCountOutput, "\n", ""), "\r", "") // remove trailing newline
require.NotEmpty(t, filteredCountOutput)
require.Equal(t, string(filteredCountOutput[len(filteredCountOutput)-1]), "8", filteredCountOutput) // last character is the count
})
}
46 changes: 46 additions & 0 deletions warehouse/integrations/datalake/testdata/conf/core-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<configuration>
<property>
<name>hive.metastore.uris</name>
<value>thrift://hive-metastore:9083</value>
</property>
<property>
<name>fs.s3.impl</name>
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
</property>
<property>
<name>fs.s3a.impl</name>
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
</property>
<property>
<name>fs.s3n.impl</name>
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
</property>
<property>
<name>s3.impl.disable.cache</name>
<value>true</value>
</property>
<property>
<name>s3a.impl.disable.cache</name>
<value>true</value>
</property>
<property>
<name>s3n.impl.disable.cache</name>
<value>true</value>
</property>
<property>
<name>fs.s3a.endpoint</name>
<value>http://minio:9000</value>
</property>
<property>
<name>fs.s3a.path.style.access</name>
<value>true</value>
</property>
<property>
<name>fs.s3a.access.key</name>
<value>MYACCESSKEY</value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value>MYSECRETKEY</value>
</property>
</configuration>
46 changes: 46 additions & 0 deletions warehouse/integrations/datalake/testdata/conf/hive-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<configuration>
<property>
<name>hive.metastore.uris</name>
<value>thrift://hive-metastore:9083</value>
</property>
<property>
<name>fs.s3.impl</name>
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
</property>
<property>
<name>fs.s3a.impl</name>
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
</property>
<property>
<name>fs.s3n.impl</name>
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
</property>
<property>
<name>s3.impl.disable.cache</name>
<value>true</value>
</property>
<property>
<name>s3a.impl.disable.cache</name>
<value>true</value>
</property>
<property>
<name>s3n.impl.disable.cache</name>
<value>true</value>
</property>
<property>
<name>fs.s3a.endpoint</name>
<value>http://minio:9000</value>
</property>
<property>
<name>fs.s3a.path.style.access</name>
<value>true</value>
</property>
<property>
<name>fs.s3a.access.key</name>
<value>MYACCESSKEY</value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value>MYSECRETKEY</value>
</property>
</configuration>
Loading