diff --git a/CHANGELOG.md b/CHANGELOG.md index bbd9e2c9c56..b5a5c57417d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,112 @@ # Changelog +## [1.10.0](https://github.com/rudderlabs/rudder-server/compare/v1.9.0...v1.10.0) (2023-06-23) + + +### Features + +* add config param for batch router for name customize ([#3461](https://github.com/rudderlabs/rudder-server/issues/3461)) ([c16e692](https://github.com/rudderlabs/rudder-server/commit/c16e692da2e13d6b9f141bb06c70b84bce0a069c)) +* error detail reporting ([#3265](https://github.com/rudderlabs/rudder-server/issues/3265)) ([34f4c0d](https://github.com/rudderlabs/rudder-server/commit/34f4c0d2a9fd14d2d436962020c50fc36e37c472)) +* **router:** support for isolation modes using limiters ([#3379](https://github.com/rudderlabs/rudder-server/issues/3379)) ([fbe109f](https://github.com/rudderlabs/rudder-server/commit/fbe109f682fb0eac8f2797236bd1497caaddcca0)) +* **warehouse:** staging file schema consolidation ([#3088](https://github.com/rudderlabs/rudder-server/issues/3088)) ([b245915](https://github.com/rudderlabs/rudder-server/commit/b245915a1c922856e37483ea751dc2c17705caf5)) + + +### Bug Fixes + +* **batchrouter:** batchrouter stops processing events for destinations where a destType-specific config option is set ([#3421](https://github.com/rudderlabs/rudder-server/issues/3421)) ([ee87636](https://github.com/rudderlabs/rudder-server/commit/ee87636c7ddf6672fad51c430d7bdd6b203bbfc1)) +* clickhouse flaky test ([#3392](https://github.com/rudderlabs/rudder-server/issues/3392)) ([a89ef27](https://github.com/rudderlabs/rudder-server/commit/a89ef2712e1e0272ee02b76b31f8e6b5c2c406db)) +* error extraction for errors in destination transformation ([#3499](https://github.com/rudderlabs/rudder-server/issues/3499)) ([5c43457](https://github.com/rudderlabs/rudder-server/commit/5c43457d8c2ef0776fd6677b3b8f28c87861b03e)) +* flag for warehouse altering ([#3460](https://github.com/rudderlabs/rudder-server/issues/3460)) ([c23ffb2](https://github.com/rudderlabs/rudder-server/commit/c23ffb2a8ed7dd2f1cfc82339db2d881ae16f064)) +* gateway flaky test ([#3356](https://github.com/rudderlabs/rudder-server/issues/3356)) ([b245915](https://github.com/rudderlabs/rudder-server/commit/b245915a1c922856e37483ea751dc2c17705caf5)) +* **gateway:** use get method for /fetch-tables ([#3528](https://github.com/rudderlabs/rudder-server/issues/3528)) ([08cd99c](https://github.com/rudderlabs/rudder-server/commit/08cd99c11fddb257e0123f9fbd3fb857e927ca43)) +* gw transient errors crash ([#3397](https://github.com/rudderlabs/rudder-server/issues/3397)) ([6ad51e8](https://github.com/rudderlabs/rudder-server/commit/6ad51e8e7411584d61a546b8645e4bc1b88c1fbb)) +* jobsforwarder panics with slice bounds out of range ([#3414](https://github.com/rudderlabs/rudder-server/issues/3414)) ([0bda13f](https://github.com/rudderlabs/rudder-server/commit/0bda13f3fb2ca7a7dd1efb3a029454118079e750)) +* kafka flaky batching test ([#3447](https://github.com/rudderlabs/rudder-server/issues/3447)) ([fcd49e4](https://github.com/rudderlabs/rudder-server/commit/fcd49e4d09edf612a2e3b1f1c84033d4fcfadb76)) +* migration fails with error pq: VACUUM cannot run inside a transaction block ([#3464](https://github.com/rudderlabs/rudder-server/issues/3464)) ([9e32802](https://github.com/rudderlabs/rudder-server/commit/9e328022f9adb6a8e31cccfa14cada97fad65f9e)) +* regulation-worker flaky test ([#3374](https://github.com/rudderlabs/rudder-server/issues/3374)) ([b245915](https://github.com/rudderlabs/rudder-server/commit/b245915a1c922856e37483ea751dc2c17705caf5)) +* respect user schema modification by user in warehouse ([#3419](https://github.com/rudderlabs/rudder-server/issues/3419)) ([fda4baf](https://github.com/rudderlabs/rudder-server/commit/fda4baf400f1e401516e01ac9d6144a21c07d713)) +* router panics with limit for rt_pickup must be greater than 0 ([#3467](https://github.com/rudderlabs/rudder-server/issues/3467)) ([cc56b56](https://github.com/rudderlabs/rudder-server/commit/cc56b566b2f6180b5c73ab193be7653d2ad7139e)) +* **router:** using wrong partition tag in limiter stats ([#3518](https://github.com/rudderlabs/rudder-server/issues/3518)) ([893504f](https://github.com/rudderlabs/rudder-server/commit/893504f90439e956d92a53ddf49ee5a29961772b)) +* stats duplicated labels ([#3411](https://github.com/rudderlabs/rudder-server/issues/3411)) ([0bda13f](https://github.com/rudderlabs/rudder-server/commit/0bda13f3fb2ca7a7dd1efb3a029454118079e750)) +* warehouse flaky test ([#3402](https://github.com/rudderlabs/rudder-server/issues/3402)) ([3f88f50](https://github.com/rudderlabs/rudder-server/commit/3f88f50ca017238fd4878da594d8b9662785b31c)) +* warehouse proxy endpoints ([#3476](https://github.com/rudderlabs/rudder-server/issues/3476)) ([c23ffb2](https://github.com/rudderlabs/rudder-server/commit/c23ffb2a8ed7dd2f1cfc82339db2d881ae16f064)) + + +### Miscellaneous + +* add missing index warehouse load files table ([#3519](https://github.com/rudderlabs/rudder-server/issues/3519)) ([9dcea7b](https://github.com/rudderlabs/rudder-server/commit/9dcea7bd8db4a20ae1ee11f76ccc7e35c4324f0a)) +* add resp body status check ([#3446](https://github.com/rudderlabs/rudder-server/issues/3446)) ([e0d7b6d](https://github.com/rudderlabs/rudder-server/commit/e0d7b6d32a16b3f58c3d4fce0ac32297941a713e)) +* add support for detecting bot events in gateway ([#3504](https://github.com/rudderlabs/rudder-server/issues/3504)) ([51a4c45](https://github.com/rudderlabs/rudder-server/commit/51a4c459ab3fd7be5531ae1ae3acd75bf6559200)) +* applying 1.9.3 hotfixes to main branch ([#3423](https://github.com/rudderlabs/rudder-server/issues/3423)) ([ee1630d](https://github.com/rudderlabs/rudder-server/commit/ee1630de55d45e29ba81d6239bc0ff6d0d7f64af)) +* avoid querying a dataset in case AfterJobID falls after said dataset ([#3478](https://github.com/rudderlabs/rudder-server/issues/3478)) ([f612aae](https://github.com/rudderlabs/rudder-server/commit/f612aae091e65f1ccd1ae84dd0a3058685f6f26d)) +* batch event schema messages for faster processing ([#3406](https://github.com/rudderlabs/rudder-server/issues/3406)) ([0bda13f](https://github.com/rudderlabs/rudder-server/commit/0bda13f3fb2ca7a7dd1efb3a029454118079e750)) +* clean up warehouse indexes and queries ([#3448](https://github.com/rudderlabs/rudder-server/issues/3448)) ([7a3589a](https://github.com/rudderlabs/rudder-server/commit/7a3589afd411b73cf82fd0d46abcfe825a983298)) +* cleanup for integration tests for warehouse ([#3412](https://github.com/rudderlabs/rudder-server/issues/3412)) ([3323427](https://github.com/rudderlabs/rudder-server/commit/332342744b2f7e77ac78ac961628c5a065d2ca74)) +* **deps:** bump cloud.google.com/go/pubsub from 1.30.1 to 1.31.0 ([#3399](https://github.com/rudderlabs/rudder-server/issues/3399)) ([ef8e86c](https://github.com/rudderlabs/rudder-server/commit/ef8e86c784be5a31fdfe01684f53b64a79588e53)) +* **deps:** bump github.com/aws/aws-sdk-go from 1.44.264 to 1.44.265 ([#3361](https://github.com/rudderlabs/rudder-server/issues/3361)) ([b245915](https://github.com/rudderlabs/rudder-server/commit/b245915a1c922856e37483ea751dc2c17705caf5)) +* **deps:** bump github.com/aws/aws-sdk-go from 1.44.265 to 1.44.266 ([#3368](https://github.com/rudderlabs/rudder-server/issues/3368)) ([b245915](https://github.com/rudderlabs/rudder-server/commit/b245915a1c922856e37483ea751dc2c17705caf5)) +* **deps:** bump github.com/aws/aws-sdk-go from 1.44.266 to 1.44.271 ([#3409](https://github.com/rudderlabs/rudder-server/issues/3409)) ([fc48d93](https://github.com/rudderlabs/rudder-server/commit/fc48d93a2b3643cde4633a069b8b5b2f038ae418)) +* **deps:** bump github.com/aws/aws-sdk-go from 1.44.271 to 1.44.275 ([#3442](https://github.com/rudderlabs/rudder-server/issues/3442)) ([8b179b6](https://github.com/rudderlabs/rudder-server/commit/8b179b68d3444bb22ae97ad3f7d93a5fdafc0f57)) +* **deps:** bump github.com/aws/aws-sdk-go from 1.44.275 to 1.44.280 ([#3481](https://github.com/rudderlabs/rudder-server/issues/3481)) ([9c0a4e7](https://github.com/rudderlabs/rudder-server/commit/9c0a4e78d06f154933762b98cb1a07458326a016)) +* **deps:** bump github.com/aws/aws-sdk-go from 1.44.280 to 1.44.281 ([#3488](https://github.com/rudderlabs/rudder-server/issues/3488)) ([af72e90](https://github.com/rudderlabs/rudder-server/commit/af72e90dd4a3481bc755fc04659eb439245ec296)) +* **deps:** bump github.com/aws/aws-sdk-go from 1.44.281 to 1.44.282 ([#3494](https://github.com/rudderlabs/rudder-server/issues/3494)) ([aa6e194](https://github.com/rudderlabs/rudder-server/commit/aa6e194b0950bb14d72714a1e39d5459ab8589f0)) +* **deps:** bump github.com/databricks/databricks-sql-go from 1.2.0 to 1.3.0 ([#3466](https://github.com/rudderlabs/rudder-server/issues/3466)) ([3f187d9](https://github.com/rudderlabs/rudder-server/commit/3f187d9bc75b1dfbbfb96336e2f5e481e654762e)) +* **deps:** bump github.com/golang-migrate/migrate/v4 from 4.15.2 to 4.16.1 ([#3441](https://github.com/rudderlabs/rudder-server/issues/3441)) ([ef16663](https://github.com/rudderlabs/rudder-server/commit/ef166634df0109a0386a29e2f06d259adbaad563)) +* **deps:** bump github.com/golang-migrate/migrate/v4 from 4.16.1 to 4.16.2 ([#3480](https://github.com/rudderlabs/rudder-server/issues/3480)) ([7465dd4](https://github.com/rudderlabs/rudder-server/commit/7465dd4daa88104c1aef07b95b05cddd0fca639c)) +* **deps:** bump github.com/hashicorp/go-retryablehttp from 0.7.2 to 0.7.4 ([#3457](https://github.com/rudderlabs/rudder-server/issues/3457)) ([b3cd0f6](https://github.com/rudderlabs/rudder-server/commit/b3cd0f635a0c273a3050204c5b9538a46b0a3f18)) +* **deps:** bump github.com/hashicorp/golang-lru/v2 from 2.0.2 to 2.0.3 ([#3475](https://github.com/rudderlabs/rudder-server/issues/3475)) ([74b4163](https://github.com/rudderlabs/rudder-server/commit/74b4163e29999abd7d14802cc784fe387232b62d)) +* **deps:** bump github.com/minio/minio-go/v7 from 7.0.52 to 7.0.53 ([#3370](https://github.com/rudderlabs/rudder-server/issues/3370)) ([b245915](https://github.com/rudderlabs/rudder-server/commit/b245915a1c922856e37483ea751dc2c17705caf5)) +* **deps:** bump github.com/minio/minio-go/v7 from 7.0.53 to 7.0.56 ([#3437](https://github.com/rudderlabs/rudder-server/issues/3437)) ([dbd9bf1](https://github.com/rudderlabs/rudder-server/commit/dbd9bf19e75bf7ee5b6614dd26c3ee037262f341)) +* **deps:** bump github.com/onsi/ginkgo/v2 from 2.9.5 to 2.9.7 ([#3449](https://github.com/rudderlabs/rudder-server/issues/3449)) ([1af6b90](https://github.com/rudderlabs/rudder-server/commit/1af6b90a01d1090dc061f0e34ebf3a017d789798)) +* **deps:** bump github.com/onsi/ginkgo/v2 from 2.9.7 to 2.10.0 ([#3458](https://github.com/rudderlabs/rudder-server/issues/3458)) ([dacfdc8](https://github.com/rudderlabs/rudder-server/commit/dacfdc845c9a8e3f5740558655c84afc7e1df955)) +* **deps:** bump github.com/onsi/gomega from 1.27.6 to 1.27.7 ([#3360](https://github.com/rudderlabs/rudder-server/issues/3360)) ([b245915](https://github.com/rudderlabs/rudder-server/commit/b245915a1c922856e37483ea751dc2c17705caf5)) +* **deps:** bump github.com/onsi/gomega from 1.27.7 to 1.27.8 ([#3459](https://github.com/rudderlabs/rudder-server/issues/3459)) ([d64c405](https://github.com/rudderlabs/rudder-server/commit/d64c405a52e5d970fb8c8ba93d511e96f08859d9)) +* **deps:** bump github.com/rudderlabs/rudder-go-kit from 0.14.3 to 0.15.0 ([#3473](https://github.com/rudderlabs/rudder-server/issues/3473)) ([6eea667](https://github.com/rudderlabs/rudder-server/commit/6eea667edcb1f291c4bb892ddcd88c2f672391d8)) +* **deps:** bump github.com/snowflakedb/gosnowflake from 1.6.20 to 1.6.21 ([#3385](https://github.com/rudderlabs/rudder-server/issues/3385)) ([4a06f44](https://github.com/rudderlabs/rudder-server/commit/4a06f445275b755eeb90f8bb857a1f9baaa25f6f)) +* **deps:** bump github.com/snowflakedb/gosnowflake from 1.6.21 to 1.6.22 ([#3503](https://github.com/rudderlabs/rudder-server/issues/3503)) ([0f9c816](https://github.com/rudderlabs/rudder-server/commit/0f9c816333eec0d386bf5dad0480db02c0b81c9b)) +* **deps:** bump github.com/stretchr/testify from 1.8.2 to 1.8.3 ([#3359](https://github.com/rudderlabs/rudder-server/issues/3359)) ([b245915](https://github.com/rudderlabs/rudder-server/commit/b245915a1c922856e37483ea751dc2c17705caf5)) +* **deps:** bump github.com/stretchr/testify from 1.8.3 to 1.8.4 ([#3450](https://github.com/rudderlabs/rudder-server/issues/3450)) ([f5367ed](https://github.com/rudderlabs/rudder-server/commit/f5367ed3aff0c8bcba985ce97390712df4be4068)) +* **deps:** bump github.com/urfave/cli/v2 from 2.25.3 to 2.25.5 ([#3418](https://github.com/rudderlabs/rudder-server/issues/3418)) ([63a24eb](https://github.com/rudderlabs/rudder-server/commit/63a24ebbd78a9fd7f492acd85f06b838c9090ff2)) +* **deps:** bump github.com/urfave/cli/v2 from 2.25.5 to 2.25.6 ([#3489](https://github.com/rudderlabs/rudder-server/issues/3489)) ([3a00ee6](https://github.com/rudderlabs/rudder-server/commit/3a00ee6b6372b7f43f5a9eb23170265c4fe6ec82)) +* **deps:** bump golang.org/x/oauth2 from 0.8.0 to 0.9.0 ([#3495](https://github.com/rudderlabs/rudder-server/issues/3495)) ([5021989](https://github.com/rudderlabs/rudder-server/commit/5021989ad115e948c437c5e32000eb6afc177ed2)) +* **deps:** bump golang.org/x/sync from 0.2.0 to 0.3.0 ([#3502](https://github.com/rudderlabs/rudder-server/issues/3502)) ([f3391d7](https://github.com/rudderlabs/rudder-server/commit/f3391d705ae9ab0c1e9c6254e4d669e4ae2b51b7)) +* **deps:** bump google.golang.org/api from 0.122.0 to 0.123.0 ([#3362](https://github.com/rudderlabs/rudder-server/issues/3362)) ([b245915](https://github.com/rudderlabs/rudder-server/commit/b245915a1c922856e37483ea751dc2c17705caf5)) +* **deps:** bump google.golang.org/api from 0.123.0 to 0.124.0 ([#3386](https://github.com/rudderlabs/rudder-server/issues/3386)) ([da6215c](https://github.com/rudderlabs/rudder-server/commit/da6215c495611444615ab18a8b7fa53adcdc3141)) +* **deps:** bump google.golang.org/api from 0.124.0 to 0.125.0 ([#3427](https://github.com/rudderlabs/rudder-server/issues/3427)) ([a81b684](https://github.com/rudderlabs/rudder-server/commit/a81b684587a5085bc09e5dbc0abc48b661767db7)) +* **deps:** bump google.golang.org/api from 0.125.0 to 0.126.0 ([#3472](https://github.com/rudderlabs/rudder-server/issues/3472)) ([0f573e4](https://github.com/rudderlabs/rudder-server/commit/0f573e49492b9bbee992826bacc80956bbefd640)) +* **deps:** bump google.golang.org/api from 0.126.0 to 0.127.0 ([#3487](https://github.com/rudderlabs/rudder-server/issues/3487)) ([b525949](https://github.com/rudderlabs/rudder-server/commit/b52594963e536b247cb4fe634e299c1fa4244e1f)) +* drop merged schema column ([#3493](https://github.com/rudderlabs/rudder-server/issues/3493)) ([ba797a4](https://github.com/rudderlabs/rudder-server/commit/ba797a42d3f837640ecd9795010be8cecc2d2ee1)) +* fix flaky dedup test ([#3417](https://github.com/rudderlabs/rudder-server/issues/3417)) ([2f0a684](https://github.com/rudderlabs/rudder-server/commit/2f0a68424d779a5d860fdfd8ebf9a2c26bdb71ba)) +* fix idle query ([#3430](https://github.com/rudderlabs/rudder-server/issues/3430)) ([3cf342d](https://github.com/rudderlabs/rudder-server/commit/3cf342d7360a5c1a0aa8a38653dff5903497e505)) +* gateway health endpoint should return 503 if db is down ([#3351](https://github.com/rudderlabs/rudder-server/issues/3351)) ([e22c790](https://github.com/rudderlabs/rudder-server/commit/e22c7901a530e1a401c7096c856c7e8ad7ee2a1b)) +* gateway stores singular event batches ([#3256](https://github.com/rudderlabs/rudder-server/issues/3256)) ([1ccec6e](https://github.com/rudderlabs/rudder-server/commit/1ccec6e6647213ecc0a98533931156080b6db9c6)) +* getUploadsToProcess error handling ([#3380](https://github.com/rudderlabs/rudder-server/issues/3380)) ([b245915](https://github.com/rudderlabs/rudder-server/commit/b245915a1c922856e37483ea751dc2c17705caf5)) +* jobsdb max age for jobs, cleanup routine ([#3420](https://github.com/rudderlabs/rudder-server/issues/3420)) ([5025a72](https://github.com/rudderlabs/rudder-server/commit/5025a72704bc469ed06023bbb27d351690f329a1)) +* kafka manager errors ([#3465](https://github.com/rudderlabs/rudder-server/issues/3465)) ([21487c2](https://github.com/rudderlabs/rudder-server/commit/21487c2a35f81ba72810199a286ca938985102fb)) +* make tests required for passing ([#3347](https://github.com/rudderlabs/rudder-server/issues/3347)) ([b245915](https://github.com/rudderlabs/rudder-server/commit/b245915a1c922856e37483ea751dc2c17705caf5)) +* move suppression-backup-service from rudderlabs to rudderstack directory in dockerhub ([#3505](https://github.com/rudderlabs/rudder-server/issues/3505)) ([6c9b25b](https://github.com/rudderlabs/rudder-server/commit/6c9b25bc724c48e650cf3c2fb3a0ad8b367e3e2a)) +* nil check for health handler ([#3500](https://github.com/rudderlabs/rudder-server/issues/3500)) ([195e2a0](https://github.com/rudderlabs/rudder-server/commit/195e2a029a5432e73713ce0f24757eb609c9f45f)) +* pass context ([#3326](https://github.com/rudderlabs/rudder-server/issues/3326)) ([990a405](https://github.com/rudderlabs/rudder-server/commit/990a40510c1ecf88467ccb858d981a1643d3e590)) +* periodically push zero output measurement for detecting stuck processing pipelines ([#3453](https://github.com/rudderlabs/rudder-server/issues/3453)) ([4116f37](https://github.com/rudderlabs/rudder-server/commit/4116f37881328fa019f398716503cd72b60896c8)) +* **processor:** support multiple jobsdb writers when source isolation is enabled ([#3428](https://github.com/rudderlabs/rudder-server/issues/3428)) ([b25003d](https://github.com/rudderlabs/rudder-server/commit/b25003dd1b8715ff6dd6d3547bc0300a6d621c5e)) +* remove deprecated merged schema field ([#3482](https://github.com/rudderlabs/rudder-server/issues/3482)) ([39a0915](https://github.com/rudderlabs/rudder-server/commit/39a091528a23b108aeb7402e8b15a58355b10e54)) +* remove namespace tag from measurements ([#3468](https://github.com/rudderlabs/rudder-server/issues/3468)) ([a6ac7bd](https://github.com/rudderlabs/rudder-server/commit/a6ac7bd0c443d4e17d59faa24fc16c70e911bd5e)) +* replace announcement header with data learning centre link ([#3358](https://github.com/rudderlabs/rudder-server/issues/3358)) ([b245915](https://github.com/rudderlabs/rudder-server/commit/b245915a1c922856e37483ea751dc2c17705caf5)) +* revert drop merged schema column ([#3501](https://github.com/rudderlabs/rudder-server/issues/3501)) ([c8861ee](https://github.com/rudderlabs/rudder-server/commit/c8861eeecfb523436bbcf752d2bd654d10c5ae6e)) +* **router:** observability on job iterator discards & stop iteration if throttled with destination isolation enabled [#3491](https://github.com/rudderlabs/rudder-server/issues/3491) ([66e32ad](https://github.com/rudderlabs/rudder-server/commit/66e32ad68fdb501a9b7eb42fa3145643ab0306e6)) +* **router:** periodic flush during pickup ([#3497](https://github.com/rudderlabs/rudder-server/issues/3497)) ([1193acc](https://github.com/rudderlabs/rudder-server/commit/1193acc57cb3804351cbf76be2a352b6548461ec)) +* set limits for event schema messages and discard messages above these limits ([#3435](https://github.com/rudderlabs/rudder-server/issues/3435)) ([b70c075](https://github.com/rudderlabs/rudder-server/commit/b70c0759bad104de8ecbf44f6d9dfbfb365b8d6c)) +* source transformation failures stat ([#3524](https://github.com/rudderlabs/rudder-server/issues/3524)) ([1146217](https://github.com/rudderlabs/rudder-server/commit/1146217407938018655da13b14e06e7e5a5a5959)) +* source transformation failures stat tag correction ([#3541](https://github.com/rudderlabs/rudder-server/issues/3541)) ([b17dec9](https://github.com/rudderlabs/rudder-server/commit/b17dec9dc6b4169331fe5f4f9eb7ba97f6b41ce6)) +* split unit tests ([#3416](https://github.com/rudderlabs/rudder-server/issues/3416)) ([471a562](https://github.com/rudderlabs/rudder-server/commit/471a56230f8fe6ba8f8032c401d18f279dd4cd01)) +* split unit tests ([#3492](https://github.com/rudderlabs/rudder-server/issues/3492)) ([1d0c720](https://github.com/rudderlabs/rudder-server/commit/1d0c720bff3d7fcb6205eb1f5554dff3dbadfc5b)) +* tests coverage ([#3349](https://github.com/rudderlabs/rudder-server/issues/3349)) ([b245915](https://github.com/rudderlabs/rudder-server/commit/b245915a1c922856e37483ea751dc2c17705caf5)) +* timeout for warehouse sql queries ([#3433](https://github.com/rudderlabs/rudder-server/issues/3433)) ([ca512a0](https://github.com/rudderlabs/rudder-server/commit/ca512a06c3a66b0307932132609786ba589ec787)) +* upgrade golangci version and lint fixes ([#3443](https://github.com/rudderlabs/rudder-server/issues/3443)) ([3d03653](https://github.com/rudderlabs/rudder-server/commit/3d03653242b2a71b5309b5e33b02184a5a43fd7d)) +* upgrade to badgerV4 ([#3340](https://github.com/rudderlabs/rudder-server/issues/3340)) ([3ecea32](https://github.com/rudderlabs/rudder-server/commit/3ecea32bbd88e105c380d3753558db05bd3e5507)) +* using parquet-rudderlabs-go ([#3490](https://github.com/rudderlabs/rudder-server/issues/3490)) ([fbbf101](https://github.com/rudderlabs/rudder-server/commit/fbbf101faa2c1598aba643a4f9931af7a58e5caa)) +* vaccum status tables if they cross threshold ([#3434](https://github.com/rudderlabs/rudder-server/issues/3434)) ([8d35882](https://github.com/rudderlabs/rudder-server/commit/8d35882d13f68bcc9ca3bc641fea96870c473096)) + ## [1.9.6](https://github.com/rudderlabs/rudder-server/compare/v1.9.5...v1.9.6) (2023-06-09) diff --git a/enterprise/replay/dumpsloader.go b/enterprise/replay/dumpsloader.go index c72463b3a42..fa6745f5231 100644 --- a/enterprise/replay/dumpsloader.go +++ b/enterprise/replay/dumpsloader.go @@ -9,7 +9,7 @@ import ( "time" "github.com/rudderlabs/rudder-go-kit/config" - "github.com/rudderlabs/rudder-server/services/filemanager" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/google/uuid" "github.com/tidwall/gjson" @@ -112,7 +112,7 @@ func (gwHandle *GWReplayRequestHandler) fetchDumpsList(ctx context.Context) { gwHandle.handle.prefix, gwHandle.handle.startAfterKey, maxItems, - &gwHandle.handle.uploader, + gwHandle.handle.uploader, ) for iter.Next() { object := iter.Get() @@ -177,7 +177,7 @@ func (procHandle *ProcErrorRequestHandler) fetchDumpsList(ctx context.Context) { procHandle.handle.prefix, procHandle.handle.startAfterKey, maxItems, - &procHandle.handle.uploader, + procHandle.handle.uploader, ) for iter.Next() { object := iter.Get() diff --git a/enterprise/replay/replay.go b/enterprise/replay/replay.go index 374de626ff7..a33f7bbfe02 100644 --- a/enterprise/replay/replay.go +++ b/enterprise/replay/replay.go @@ -7,10 +7,10 @@ import ( "time" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-server/jobsdb" "github.com/rudderlabs/rudder-server/processor/transformer" - "github.com/rudderlabs/rudder-server/services/filemanager" ) type Handler struct { diff --git a/enterprise/replay/setup.go b/enterprise/replay/setup.go index e6adf89ca81..f5a08e8df1b 100644 --- a/enterprise/replay/setup.go +++ b/enterprise/replay/setup.go @@ -5,9 +5,10 @@ import ( "strings" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-server/jobsdb" - "github.com/rudderlabs/rudder-server/services/filemanager" + "github.com/rudderlabs/rudder-server/utils/filemanagerutil" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/utils/types" ) @@ -27,10 +28,9 @@ func initFileManager(log logger.Logger) (filemanager.FileManager, string, error) } provider := config.GetString("JOBS_BACKUP_STORAGE_PROVIDER", "S3") - fileManagerFactory := filemanager.DefaultFileManagerFactory - configFromEnv := filemanager.GetProviderConfigForBackupsFromEnv(context.TODO()) - uploader, err := fileManagerFactory.New(&filemanager.SettingsT{ + configFromEnv := filemanagerutil.ProviderConfigOpts(context.TODO(), provider, config.Default) + uploader, err := filemanager.New(&filemanager.Settings{ Provider: provider, Config: misc.GetObjectStorageConfig(misc.ObjectStorageOptsT{ Provider: provider, @@ -39,6 +39,7 @@ func initFileManager(log logger.Logger) (filemanager.FileManager, string, error) // TODO: need to figure out how to bring workspaceID here // when we support IAM role here. }), + Conf: config.Default, }) if err != nil { log.Errorf("[[ Replay ]] Error creating file manager: %s", err.Error()) diff --git a/enterprise/replay/sourceWorker.go b/enterprise/replay/sourceWorker.go index bcf10cd1957..7b7af44bd3f 100644 --- a/enterprise/replay/sourceWorker.go +++ b/enterprise/replay/sourceWorker.go @@ -18,11 +18,11 @@ import ( "github.com/tidwall/gjson" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/jobsdb" "github.com/rudderlabs/rudder-server/processor/integrations" "github.com/rudderlabs/rudder-server/processor/transformer" - "github.com/rudderlabs/rudder-server/services/filemanager" ) type SourceWorkerT struct { diff --git a/gateway/gateway.go b/gateway/gateway.go index 624560ec4c2..2c3e1672355 100644 --- a/gateway/gateway.go +++ b/gateway/gateway.go @@ -1310,6 +1310,7 @@ func (gateway *HandleT) StartWebHandler(ctx context.Context) error { ) srvMux.Route("/internal", func(r chi.Router) { r.Post("/v1/extract", gateway.webExtractHandler) + r.Get("/v1/warehouse/fetch-tables", gateway.whProxy.ServeHTTP) }) srvMux.Route("/v1", func(r chi.Router) { @@ -1331,6 +1332,7 @@ func (gateway *HandleT) StartWebHandler(ctx context.Context) error { r.Post("/pending-events", gateway.whProxy.ServeHTTP) r.Post("/trigger-upload", gateway.whProxy.ServeHTTP) r.Post("/jobs", gateway.whProxy.ServeHTTP) + // TODO: Remove this endpoint once sources change is released r.Get("/fetch-tables", gateway.whProxy.ServeHTTP) r.Get("/jobs/status", gateway.whProxy.ServeHTTP) diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index 2782a4d8e36..b721bbd437e 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -1250,7 +1250,9 @@ func endpointsToVerify() ([]string, []string, []string) { "/v1/job-status/123", "/v1/job-status/123/failed-records", "/v1/warehouse/jobs/status", + // TODO: Remove this endpoint once sources change is released "/v1/warehouse/fetch-tables", + "/internal/v1/warehouse/fetch-tables", } postEndpoints := []string{ diff --git a/gateway/webhook/webhook.go b/gateway/webhook/webhook.go index 7ea1cf2a185..7fb7fb396d9 100644 --- a/gateway/webhook/webhook.go +++ b/gateway/webhook/webhook.go @@ -16,6 +16,7 @@ import ( "time" "github.com/hashicorp/go-retryablehttp" + "github.com/samber/lo" "golang.org/x/exp/slices" "github.com/rudderlabs/rudder-go-kit/logger" @@ -370,17 +371,23 @@ func (bt *batchWebhookTransformerT) batchTransformLoop() { // stats bt.stats.sourceStats[breq.sourceType].sourceTransform.Since(transformStart) + var reason string if batchResponse.batchError == nil && len(batchResponse.responses) != len(payloadArr) { batchResponse.batchError = errors.New("webhook batch transform response events size does not equal sent events size") + reason = "in out mismatch" pkgLogger.Errorf("%w", batchResponse.batchError) } if batchResponse.batchError != nil { + if reason == "" { + reason = "batch response error" + } statusCode := http.StatusInternalServerError if batchResponse.statusCode != 0 { statusCode = batchResponse.statusCode } pkgLogger.Errorf("webhook %s source transformation failed with error: %w and status code: %s", breq.sourceType, batchResponse.batchError, statusCode) - countWebhookErrors(breq.sourceType, statusCode, len(breq.batchRequest)) + bt.webhook.recordWebhookErrors(breq.sourceType, reason, webRequests, statusCode) + for _, req := range breq.batchRequest { req.done <- transformerResponse{StatusCode: statusCode, Err: batchResponse.batchError.Error()} } @@ -392,22 +399,24 @@ func (bt *batchWebhookTransformerT) batchTransformLoop() { for idx, resp := range batchResponse.responses { webRequest := webRequests[idx] if resp.Err == "" && resp.Output != nil { - var errMessage string + var errMessage, reason string outputPayload, err := json.Marshal(resp.Output) if err != nil { errMessage = response.SourceTransformerInvalidOutputFormatInResponse + reason = "marshal error" } else { errMessage = bt.webhook.enqueueInGateway(webRequest, outputPayload) + reason = "enqueueInGateway failed" } if errMessage != "" { pkgLogger.Errorf("webhook %s source transformation failed: %s", breq.sourceType, errMessage) - countWebhookErrors(breq.sourceType, response.GetErrorStatusCode(errMessage), 1) + bt.webhook.countWebhookErrors(breq.sourceType, webRequest.writeKey, reason, response.GetErrorStatusCode(errMessage), 1) webRequest.done <- bt.markResponseFail(errMessage) continue } } else if resp.StatusCode != http.StatusOK { pkgLogger.Errorf("webhook %s source transformation failed with error: %s and status code: %s", breq.sourceType, resp.Err, resp.StatusCode) - countWebhookErrors(breq.sourceType, resp.StatusCode, 1) + bt.webhook.countWebhookErrors(breq.sourceType, webRequest.writeKey, "non 200 response", resp.StatusCode, 1) } webRequest.done <- resp @@ -457,13 +466,28 @@ func (webhook *HandleT) Shutdown() error { return webhook.backgroundWait() } -func countWebhookErrors(sourceType string, statusCode, count int) { - stats.Default.NewTaggedStat("webhook_num_errors", stats.CountType, stats.Tags{ - "sourceType": sourceType, - "statusCode": strconv.Itoa(statusCode), +func (webhook *HandleT) countWebhookErrors(sourceType, writeKey, reason string, statusCode, count int) { + stat := webhook.gwHandle.NewSourceStat(writeKey, "webhook") + webhook.stats.NewTaggedStat("webhook_num_errors", stats.CountType, stats.Tags{ + "writeKey": writeKey, + "workspaceId": stat.WorkspaceID, + "sourceID": stat.SourceID, + "statusCode": strconv.Itoa(statusCode), + "sourceType": sourceType, + "reason": reason, }).Count(count) } +func (webhook *HandleT) recordWebhookErrors(sourceType, reason string, reqs []*webhookT, statusCode int) { + reqsGroupedByWriteKey := lo.GroupBy(reqs, func(request *webhookT) string { + return request.writeKey + }) + + for writeKey, reqs := range reqsGroupedByWriteKey { + webhook.countWebhookErrors(sourceType, writeKey, reason, statusCode, len(reqs)) + } +} + // TODO: Check if correct func newWebhookStat(sourceType string) *webhookSourceStatT { tags := map[string]string{ diff --git a/gateway/webhook/webhook_test.go b/gateway/webhook/webhook_test.go index 371eda3685c..cb23b82f45b 100644 --- a/gateway/webhook/webhook_test.go +++ b/gateway/webhook/webhook_test.go @@ -12,10 +12,12 @@ import ( "github.com/golang/mock/gomock" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/rudderlabs/rudder-go-kit/config" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" + "github.com/rudderlabs/rudder-go-kit/stats/memstats" gwStats "github.com/rudderlabs/rudder-server/gateway/internal/stats" mockWebhook "github.com/rudderlabs/rudder-server/gateway/mocks" "github.com/rudderlabs/rudder-server/gateway/response" @@ -64,7 +66,7 @@ func TestWebhookRequestHandlerWithTransformerBatchGeneralError(t *testing.T) { mockGW.EXPECT().IncrementAckCount(gomock.Any()).Times(1) mockGW.EXPECT().GetWebhookSourceDefName(sampleWriteKey).Return(sourceDefName, true) mockGW.EXPECT().TrackRequestMetrics(gomock.Any()).Times(1) - mockGW.EXPECT().NewSourceStat(gomock.Any(), gomock.Any()).Return(&gwStats.SourceStat{}).Times(1) + mockGW.EXPECT().NewSourceStat(gomock.Any(), gomock.Any()).Return(&gwStats.SourceStat{}).Times(2) webhookHandler.Register(sourceDefName) req := httptest.NewRequest(http.MethodPost, "/v1/webhook?writeKey="+sampleWriteKey, bytes.NewBufferString(sampleJson)) @@ -105,7 +107,7 @@ func TestWebhookRequestHandlerWithTransformerBatchPayloadLengthMismatchError(t * mockGW.EXPECT().IncrementAckCount(gomock.Any()).Times(1) mockGW.EXPECT().GetWebhookSourceDefName(sampleWriteKey).Return(sourceDefName, true) mockGW.EXPECT().TrackRequestMetrics(gomock.Any()).Times(1) - mockGW.EXPECT().NewSourceStat(gomock.Any(), gomock.Any()).Return(&gwStats.SourceStat{}).Times(1) + mockGW.EXPECT().NewSourceStat(gomock.Any(), gomock.Any()).Return(&gwStats.SourceStat{}).Times(2) webhookHandler.Register(sourceDefName) req := httptest.NewRequest(http.MethodPost, "/v1/webhook?writeKey="+sampleWriteKey, bytes.NewBufferString(sampleJson)) @@ -145,7 +147,7 @@ func TestWebhookRequestHandlerWithTransformerRequestError(t *testing.T) { mockGW.EXPECT().IncrementAckCount(gomock.Any()).Times(1) mockGW.EXPECT().GetWebhookSourceDefName(sampleWriteKey).Return(sourceDefName, true) mockGW.EXPECT().TrackRequestMetrics(gomock.Any()).Times(1) - mockGW.EXPECT().NewSourceStat(gomock.Any(), gomock.Any()).Return(&gwStats.SourceStat{}).Times(1) + mockGW.EXPECT().NewSourceStat(gomock.Any(), gomock.Any()).Return(&gwStats.SourceStat{}).Times(2) webhookHandler.Register(sourceDefName) req := httptest.NewRequest(http.MethodPost, "/v1/webhook?writeKey="+sampleWriteKey, bytes.NewBufferString(sampleJson)) @@ -281,3 +283,76 @@ func TestWebhookRequestHandlerWithOutputToGatewayAndSource(t *testing.T) { assert.Equal(t, sampleJson, strings.TrimSpace(w.Body.String())) _ = webhookHandler.Shutdown() } + +func TestRecordWebhookErrors(t *testing.T) { + initWebhook() + ctrl := gomock.NewController(t) + mockGW := mockWebhook.NewMockGatewayI(ctrl) + statsStore := memstats.New() + webhookHandler := Setup(mockGW, statsStore) + reqs := []*webhookT{ + {writeKey: "w1"}, {writeKey: "w2"}, {writeKey: "w1"}, {writeKey: "w3"}, {writeKey: "w2"}, {writeKey: "w1"}, + } + mockGW.EXPECT().NewSourceStat(gomock.Any(), gomock.Any()).DoAndReturn(func(writeKey, reqType string) *gwStats.SourceStat { + switch writeKey { + case "w1": + return &gwStats.SourceStat{ + Source: "source1", + SourceID: "sourceID1", + WriteKey: writeKey, + ReqType: reqType, + WorkspaceID: "workspaceID1", + SourceType: "webhook1", + } + case "w2": + return &gwStats.SourceStat{ + Source: "source2", + SourceID: "sourceID2", + WriteKey: writeKey, + ReqType: reqType, + WorkspaceID: "workspaceID2", + SourceType: "webhook2", + } + case "w3": + return &gwStats.SourceStat{ + Source: "source3", + SourceID: "sourceID3", + WriteKey: writeKey, + ReqType: reqType, + WorkspaceID: "workspaceID3", + SourceType: "webhook3", + } + } + return nil + }).Times(3) + + webhookHandler.recordWebhookErrors("cio", "err1", reqs, 400) + + m := statsStore.Get("webhook_num_errors", stats.Tags{ + "writeKey": "w1", + "workspaceId": "workspaceID1", + "sourceID": "sourceID1", + "statusCode": "400", + "sourceType": "cio", + "reason": "err1", + }) + require.EqualValues(t, m.LastValue(), 3) + m = statsStore.Get("webhook_num_errors", stats.Tags{ + "writeKey": "w2", + "workspaceId": "workspaceID2", + "sourceID": "sourceID2", + "statusCode": "400", + "sourceType": "cio", + "reason": "err1", + }) + require.EqualValues(t, m.LastValue(), 2) + m = statsStore.Get("webhook_num_errors", stats.Tags{ + "writeKey": "w3", + "workspaceId": "workspaceID3", + "sourceID": "sourceID3", + "statusCode": "400", + "sourceType": "cio", + "reason": "err1", + }) + require.EqualValues(t, m.LastValue(), 1) +} diff --git a/go.mod b/go.mod index a0b61ce32d3..692d6eb4901 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ replace ( github.com/prometheus/client_golang => github.com/prometheus/client_golang v1.15.0 github.com/satori/go.uuid => github.com/satori/go.uuid v1.1.0 github.com/spf13/viper => github.com/spf13/viper v1.15.0 + github.com/xitongsys/parquet-go => github.com/rudderlabs/parquet-go v0.0.2 go.mongodb.org/mongo-driver => go.mongodb.org/mongo-driver v1.11.4 golang.org/x/crypto => golang.org/x/crypto v0.8.0 golang.org/x/image => golang.org/x/image v0.5.0 @@ -21,20 +22,18 @@ replace ( gopkg.in/yaml.v2 => gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 => gopkg.in/yaml.v3 v3.0.1 k8s.io/kubernetes => k8s.io/kubernetes v1.22.2 - ) require ( cloud.google.com/go/bigquery v1.51.2 cloud.google.com/go/pubsub v1.31.0 - cloud.google.com/go/storage v1.30.1 github.com/Azure/azure-storage-blob-go v0.15.0 github.com/ClickHouse/clickhouse-go v1.5.4 github.com/alexeyco/simpletable v1.0.0 github.com/allisson/go-pglock/v2 v2.0.1 github.com/apache/pulsar-client-go v0.10.0 github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de - github.com/aws/aws-sdk-go v1.44.287 + github.com/aws/aws-sdk-go v1.44.289 github.com/bugsnag/bugsnag-go/v2 v2.2.0 github.com/cenkalti/backoff v2.2.1+incompatible github.com/cenkalti/backoff/v4 v4.2.1 @@ -60,8 +59,7 @@ require ( github.com/lib/pq v1.10.9 github.com/linkedin/goavro/v2 v2.12.0 github.com/minio/minio-go v6.0.14+incompatible - github.com/minio/minio-go/v6 v6.0.57 - github.com/minio/minio-go/v7 v7.0.57 + github.com/minio/minio-go/v7 v7.0.58 github.com/mitchellh/mapstructure v1.5.0 github.com/mkmik/multierror v0.3.0 github.com/onsi/ginkgo/v2 v2.11.0 @@ -71,6 +69,7 @@ require ( github.com/prometheus/client_model v0.4.0 github.com/rs/cors v1.9.0 github.com/rudderlabs/analytics-go v3.3.3+incompatible + github.com/rudderlabs/compose-test v0.1.2 github.com/rudderlabs/rudder-go-kit v0.15.1 github.com/rudderlabs/sql-tunnels v0.1.3 github.com/samber/lo v1.38.1 @@ -105,6 +104,7 @@ require ( cloud.google.com/go/compute v1.19.3 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect cloud.google.com/go/iam v1.0.1 // indirect + cloud.google.com/go/storage v1.30.1 // indirect github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect github.com/99designs/keyring v1.2.2 // indirect github.com/AthenZ/athenz v1.10.39 // indirect @@ -233,7 +233,6 @@ require ( github.com/rivo/uniseg v0.1.0 // indirect github.com/rs/xid v1.5.0 // indirect github.com/rs/zerolog v1.28.0 // indirect - github.com/rudderlabs/compose-test v0.1.2 github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/segmentio/backo-go v1.0.1 // indirect github.com/shirou/gopsutil/v3 v3.23.4 // indirect @@ -296,5 +295,3 @@ require ( gopkg.in/yaml.v3 v3.0.1 // indirect gotest.tools/gotestsum v1.8.2 // indirect ) - -replace github.com/xitongsys/parquet-go => github.com/rudderlabs/parquet-go v0.0.2 diff --git a/go.sum b/go.sum index 21a754f454c..b6b17d0ef11 100644 --- a/go.sum +++ b/go.sum @@ -762,8 +762,8 @@ github.com/aws/aws-sdk-go v1.30.19/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZve github.com/aws/aws-sdk-go v1.32.6/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= github.com/aws/aws-sdk-go v1.37.0/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= github.com/aws/aws-sdk-go v1.43.31/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= -github.com/aws/aws-sdk-go v1.44.287 h1:CUq2/h0gZ2LOCF61AgQSEMPMfas4gTiQfHBO88gGET0= -github.com/aws/aws-sdk-go v1.44.287/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= +github.com/aws/aws-sdk-go v1.44.289 h1:5CVEjiHFvdiVlKPBzv0rjG4zH/21W/onT18R5AH/qx0= +github.com/aws/aws-sdk-go v1.44.289/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/aws/aws-sdk-go-v2 v1.16.2/go.mod h1:ytwTPBG6fXTZLxxeeCCWj2/EMYp/xDUgX+OET6TLNNU= github.com/aws/aws-sdk-go-v2 v1.17.7 h1:CLSjnhJSTSogvqUGhIC6LqFKATMRexcxLZ0i/Nzk9Eg= github.com/aws/aws-sdk-go-v2 v1.17.7/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= @@ -1289,7 +1289,6 @@ github.com/googleapis/gnostic v0.5.1/go.mod h1:6U4PtQXGIEt/Z3h5MAT7FNofLnw9vXk2c github.com/googleapis/gnostic v0.5.5/go.mod h1:7+EbHbldMins07ALC74bsA81Ovc97DwqyJO1AENw9kA= github.com/googleapis/go-type-adapters v1.0.0/go.mod h1:zHW75FOG2aur7gAO2B+MLby+cLsWGBF62rFAi7WjWO4= github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8/go.mod h1:dvDLG8qkwmyD9a/MJJN3XJcT3xFxOKAvTZGvuZmac9g= -github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/mux v1.7.4/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= @@ -1444,7 +1443,6 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= -github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/juju/qthttptest v0.1.1/go.mod h1:aTlAv8TYaflIiTDIQYzxnl1QdPjAg8Q8qJMErpKy6A4= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= @@ -1465,7 +1463,6 @@ github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47e github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= github.com/klauspost/compress v1.16.5 h1:IFV2oUNUzZaz+XyusxpLzpzS8Pt5rh0Z16For/djlyI= github.com/klauspost/compress v1.16.5/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= -github.com/klauspost/cpuid v1.2.3/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= @@ -1557,17 +1554,13 @@ github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpsp github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= -github.com/minio/md5-simd v1.1.0/go.mod h1:XpBqgZULrMYD3R+M28PcmP0CkI7PEMzB3U77ZrKZ0Gw= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= github.com/minio/minio-go v6.0.14+incompatible h1:fnV+GD28LeqdN6vT2XdGKW8Qe/IfjJDswNVuni6km9o= github.com/minio/minio-go v6.0.14+incompatible/go.mod h1:7guKYtitv8dktvNUGrhzmNlA5wrAABTQXCoesZdFQO8= -github.com/minio/minio-go/v6 v6.0.57 h1:ixPkbKkyD7IhnluRgQpGSpHdpvNVaW6OD5R9IAO/9Tw= -github.com/minio/minio-go/v6 v6.0.57/go.mod h1:5+R/nM9Pwrh0vqF+HbYYDQ84wdUFPyXHkrdT4AIkifM= github.com/minio/minio-go/v7 v7.0.34/go.mod h1:nCrRzjoSUQh8hgKKtu3Y708OLvRLtuASMg2/nvmbarw= -github.com/minio/minio-go/v7 v7.0.57 h1:xsFiOiWjpC1XAGbFEUOzj1/gMXGz7ljfxifwcb/5YXU= -github.com/minio/minio-go/v7 v7.0.57/go.mod h1:NUDy4A4oXPq1l2yK6LTSvCEzAMeIcoz9lcj5dbzSrRE= -github.com/minio/sha256-simd v0.1.1/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM= +github.com/minio/minio-go/v7 v7.0.58 h1:B9/8Az8Om/2kX8Ys2ai2PZbBTokRE5W6P5OaqnAs6po= +github.com/minio/minio-go/v7 v7.0.58/go.mod h1:NUDy4A4oXPq1l2yK6LTSvCEzAMeIcoz9lcj5dbzSrRE= github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM= github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8= @@ -1788,15 +1781,12 @@ github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5g github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= -github.com/sirupsen/logrus v1.5.0/go.mod h1:+F7Ogzej0PZc/94MaYx/nvG9jOFMD2osvC3s+Squfpo= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/sirupsen/logrus v1.9.2 h1:oxx1eChJGI6Uks2ZC4W1zpLlVgqB8ner4EuQwV4Ik1Y= github.com/sirupsen/logrus v1.9.2/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= -github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/snowflakedb/gosnowflake v1.6.22 h1:2crLpqmFVyV03NPAxxAtzQBMFn6wUPqOJ1uRl4ruOJ4= github.com/snowflakedb/gosnowflake v1.6.22/go.mod h1:P2fE/xiD2kQXpr48OdgnazkzPsKD6aVtnHD3WP8yD9c= github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0= @@ -2287,7 +2277,6 @@ golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3 golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190422233926-fe54fb35175b/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190425163242-31fd60d6bfdc/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= @@ -2688,7 +2677,6 @@ gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMy gopkg.in/httprequest.v1 v1.2.1/go.mod h1:x2Otw96yda5+8+6ZeWwHIJTFkEHWP/qP8pJOzqEtWPM= gopkg.in/inconshreveable/log15.v2 v2.0.0-20180818164646-67afb5ed74ec/go.mod h1:aPpfJ7XW+gOuirDoZ8gHhLh3kZ1B08FtV2bbmy7Jv3s= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= -gopkg.in/ini.v1 v1.42.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/ini.v1 v1.66.6/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= diff --git a/jobsdb/backup.go b/jobsdb/backup.go index a4d4a00bcb3..648576c25f8 100644 --- a/jobsdb/backup.go +++ b/jobsdb/backup.go @@ -14,8 +14,8 @@ import ( "golang.org/x/sync/errgroup" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/stats" - "github.com/rudderlabs/rudder-server/services/filemanager" fileuploader "github.com/rudderlabs/rudder-server/services/fileuploader" "github.com/rudderlabs/rudder-server/utils/misc" ) @@ -616,7 +616,7 @@ func (jd *HandleT) uploadTableDump(ctx context.Context, workspaceID, path string pathPrefixes = append(pathPrefixes, config.GetString("INSTANCE_ID", "1")) } - var output filemanager.UploadOutput + var output filemanager.UploadedFile output, err = jd.backupUploadWithExponentialBackoff(ctx, file, workspaceID, pathPrefixes...) if err != nil { jd.logger.Errorf("[JobsDB] :: Failed to upload table dump for workspaceId %s. Error: %s", workspaceID, err.Error()) @@ -626,11 +626,11 @@ func (jd *HandleT) uploadTableDump(ctx context.Context, workspaceID, path string return nil } -func (jd *HandleT) backupUploadWithExponentialBackoff(ctx context.Context, file *os.File, workspaceID string, pathPrefixes ...string) (filemanager.UploadOutput, error) { +func (jd *HandleT) backupUploadWithExponentialBackoff(ctx context.Context, file *os.File, workspaceID string, pathPrefixes ...string) (filemanager.UploadedFile, error) { // get a file uploader fileUploader, err := jd.fileUploaderProvider.GetFileManager(workspaceID) if err != nil { - return filemanager.UploadOutput{}, err + return filemanager.UploadedFile{}, err } bo := backoff.NewExponentialBackOff() bo.MaxInterval = time.Minute @@ -638,7 +638,7 @@ func (jd *HandleT) backupUploadWithExponentialBackoff(ctx context.Context, file boRetries := backoff.WithMaxRetries(bo, uint64(config.GetInt64("MAX_BACKOFF_RETRIES", 3))) boCtx := backoff.WithContext(boRetries, ctx) - var output filemanager.UploadOutput + var output filemanager.UploadedFile backup := func() error { output, err = fileUploader.Upload(ctx, file, pathPrefixes...) return err diff --git a/jobsdb/jobsdb_backup_test.go b/jobsdb/jobsdb_backup_test.go index cb764b04ed0..a4e9a8522fe 100644 --- a/jobsdb/jobsdb_backup_test.go +++ b/jobsdb/jobsdb_backup_test.go @@ -19,11 +19,12 @@ import ( "github.com/tidwall/gjson" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" + "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/jobsdb/internal/lock" "github.com/rudderlabs/rudder-server/jobsdb/prebackup" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/services/fileuploader" "github.com/rudderlabs/rudder-server/testhelper" "github.com/rudderlabs/rudder-server/testhelper/destination" @@ -101,8 +102,7 @@ func TestBackupTable(t *testing.T) { require.NoError(t, err, "expected no error while inserting rt data") // create a filemanager instance - fmFactory := filemanager.FileManagerFactoryT{} - fm, err := fmFactory.New(&filemanager.SettingsT{ + fm, err := filemanager.New(&filemanager.Settings{ Provider: "MINIO", Config: map[string]interface{}{ "bucketName": minioResource.BucketName, @@ -116,13 +116,14 @@ func TestBackupTable(t *testing.T) { require.NoError(t, err, "expected no error while creating file manager") // wait for the backup to finish - var file []*filemanager.FileObject + var file []*filemanager.FileInfo require.Eventually(t, func() bool { - file, err = fm.ListFilesWithPrefix(context.Background(), "", prefix, 5) + file, err = fm.ListFilesWithPrefix(context.Background(), "", prefix, 5).Next() if len(file) != 3 { t.Log("file list: ", file, " err: ", err) - fm, _ = fmFactory.New(&filemanager.SettingsT{ + fm, _ = filemanager.New(&filemanager.Settings{ + Logger: logger.NOP, Provider: "MINIO", Config: map[string]interface{}{ "bucketName": minioResource.BucketName, @@ -311,9 +312,9 @@ func TestMultipleWorkspacesBackupTable(t *testing.T) { workspace := "defaultWorkspaceID-" + strconv.Itoa(i+1) fm, err := fileuploaderProvider.GetFileManager(workspace) require.NoError(t, err) - var file []*filemanager.FileObject + var file []*filemanager.FileInfo require.Eventually(t, func() bool { - file, err = fm.ListFilesWithPrefix(context.Background(), "", prefix, 10) + file, err = fm.ListFilesWithPrefix(context.Background(), "", prefix, 10).Next() if len(file) != 3 { t.Log("file list: ", file, " err: ", err, "len: ", len(file)) diff --git a/mocks/services/filemanager/mock_filemanager.go b/mocks/services/filemanager/mock_filemanager.go deleted file mode 100644 index c3f967dcca9..00000000000 --- a/mocks/services/filemanager/mock_filemanager.go +++ /dev/null @@ -1,194 +0,0 @@ -// Code generated by MockGen. DO NOT EDIT. -// Source: github.com/rudderlabs/rudder-server/services/filemanager (interfaces: FileManagerFactory,FileManager) - -// Package mock_filemanager is a generated GoMock package. -package mock_filemanager - -import ( - context "context" - os "os" - reflect "reflect" - time "time" - - gomock "github.com/golang/mock/gomock" - filemanager "github.com/rudderlabs/rudder-server/services/filemanager" -) - -// MockFileManagerFactory is a mock of FileManagerFactory interface. -type MockFileManagerFactory struct { - ctrl *gomock.Controller - recorder *MockFileManagerFactoryMockRecorder -} - -// MockFileManagerFactoryMockRecorder is the mock recorder for MockFileManagerFactory. -type MockFileManagerFactoryMockRecorder struct { - mock *MockFileManagerFactory -} - -// NewMockFileManagerFactory creates a new mock instance. -func NewMockFileManagerFactory(ctrl *gomock.Controller) *MockFileManagerFactory { - mock := &MockFileManagerFactory{ctrl: ctrl} - mock.recorder = &MockFileManagerFactoryMockRecorder{mock} - return mock -} - -// EXPECT returns an object that allows the caller to indicate expected use. -func (m *MockFileManagerFactory) EXPECT() *MockFileManagerFactoryMockRecorder { - return m.recorder -} - -// New mocks base method. -func (m *MockFileManagerFactory) New(arg0 *filemanager.SettingsT) (filemanager.FileManager, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "New", arg0) - ret0, _ := ret[0].(filemanager.FileManager) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// New indicates an expected call of New. -func (mr *MockFileManagerFactoryMockRecorder) New(arg0 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "New", reflect.TypeOf((*MockFileManagerFactory)(nil).New), arg0) -} - -// MockFileManager is a mock of FileManager interface. -type MockFileManager struct { - ctrl *gomock.Controller - recorder *MockFileManagerMockRecorder -} - -// MockFileManagerMockRecorder is the mock recorder for MockFileManager. -type MockFileManagerMockRecorder struct { - mock *MockFileManager -} - -// NewMockFileManager creates a new mock instance. -func NewMockFileManager(ctrl *gomock.Controller) *MockFileManager { - mock := &MockFileManager{ctrl: ctrl} - mock.recorder = &MockFileManagerMockRecorder{mock} - return mock -} - -// EXPECT returns an object that allows the caller to indicate expected use. -func (m *MockFileManager) EXPECT() *MockFileManagerMockRecorder { - return m.recorder -} - -// DeleteObjects mocks base method. -func (m *MockFileManager) DeleteObjects(arg0 context.Context, arg1 []string) error { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "DeleteObjects", arg0, arg1) - ret0, _ := ret[0].(error) - return ret0 -} - -// DeleteObjects indicates an expected call of DeleteObjects. -func (mr *MockFileManagerMockRecorder) DeleteObjects(arg0, arg1 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteObjects", reflect.TypeOf((*MockFileManager)(nil).DeleteObjects), arg0, arg1) -} - -// Download mocks base method. -func (m *MockFileManager) Download(arg0 context.Context, arg1 *os.File, arg2 string) error { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "Download", arg0, arg1, arg2) - ret0, _ := ret[0].(error) - return ret0 -} - -// Download indicates an expected call of Download. -func (mr *MockFileManagerMockRecorder) Download(arg0, arg1, arg2 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Download", reflect.TypeOf((*MockFileManager)(nil).Download), arg0, arg1, arg2) -} - -// GetConfiguredPrefix mocks base method. -func (m *MockFileManager) GetConfiguredPrefix() string { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetConfiguredPrefix") - ret0, _ := ret[0].(string) - return ret0 -} - -// GetConfiguredPrefix indicates an expected call of GetConfiguredPrefix. -func (mr *MockFileManagerMockRecorder) GetConfiguredPrefix() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetConfiguredPrefix", reflect.TypeOf((*MockFileManager)(nil).GetConfiguredPrefix)) -} - -// GetDownloadKeyFromFileLocation mocks base method. -func (m *MockFileManager) GetDownloadKeyFromFileLocation(arg0 string) string { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetDownloadKeyFromFileLocation", arg0) - ret0, _ := ret[0].(string) - return ret0 -} - -// GetDownloadKeyFromFileLocation indicates an expected call of GetDownloadKeyFromFileLocation. -func (mr *MockFileManagerMockRecorder) GetDownloadKeyFromFileLocation(arg0 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetDownloadKeyFromFileLocation", reflect.TypeOf((*MockFileManager)(nil).GetDownloadKeyFromFileLocation), arg0) -} - -// GetObjectNameFromLocation mocks base method. -func (m *MockFileManager) GetObjectNameFromLocation(arg0 string) (string, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetObjectNameFromLocation", arg0) - ret0, _ := ret[0].(string) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// GetObjectNameFromLocation indicates an expected call of GetObjectNameFromLocation. -func (mr *MockFileManagerMockRecorder) GetObjectNameFromLocation(arg0 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetObjectNameFromLocation", reflect.TypeOf((*MockFileManager)(nil).GetObjectNameFromLocation), arg0) -} - -// ListFilesWithPrefix mocks base method. -func (m *MockFileManager) ListFilesWithPrefix(arg0 context.Context, arg1, arg2 string, arg3 int64) ([]*filemanager.FileObject, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "ListFilesWithPrefix", arg0, arg1, arg2, arg3) - ret0, _ := ret[0].([]*filemanager.FileObject) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// ListFilesWithPrefix indicates an expected call of ListFilesWithPrefix. -func (mr *MockFileManagerMockRecorder) ListFilesWithPrefix(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListFilesWithPrefix", reflect.TypeOf((*MockFileManager)(nil).ListFilesWithPrefix), arg0, arg1, arg2, arg3) -} - -// SetTimeout mocks base method. -func (m *MockFileManager) SetTimeout(arg0 time.Duration) { - m.ctrl.T.Helper() - m.ctrl.Call(m, "SetTimeout", arg0) -} - -// SetTimeout indicates an expected call of SetTimeout. -func (mr *MockFileManagerMockRecorder) SetTimeout(arg0 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetTimeout", reflect.TypeOf((*MockFileManager)(nil).SetTimeout), arg0) -} - -// Upload mocks base method. -func (m *MockFileManager) Upload(arg0 context.Context, arg1 *os.File, arg2 ...string) (filemanager.UploadOutput, error) { - m.ctrl.T.Helper() - varargs := []interface{}{arg0, arg1} - for _, a := range arg2 { - varargs = append(varargs, a) - } - ret := m.ctrl.Call(m, "Upload", varargs...) - ret0, _ := ret[0].(filemanager.UploadOutput) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// Upload indicates an expected call of Upload. -func (mr *MockFileManagerMockRecorder) Upload(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - varargs := append([]interface{}{arg0, arg1}, arg2...) - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Upload", reflect.TypeOf((*MockFileManager)(nil).Upload), varargs...) -} diff --git a/processor/processor.go b/processor/processor.go index 1171bea1ee5..e6ba5bee64d 100644 --- a/processor/processor.go +++ b/processor/processor.go @@ -1406,135 +1406,137 @@ func (proc *Handle) processJobsForDest(partition string, subJobs subJob) *transf for _, batchEvent := range jobList { - var singularEvents []types.SingularEventT - var ok bool - singularEvents, ok = misc.ParseRudderEventBatch(batchEvent.EventPayload) - writeKey := gjson.Get(string(batchEvent.EventPayload), "writeKey").Str - requestIP := gjson.Get(string(batchEvent.EventPayload), "requestIP").Str - receivedAt := gjson.Get(string(batchEvent.EventPayload), "receivedAt").Time() - - if ok { - // Iterate through all the events in the batch - for _, singularEvent := range singularEvents { - messageId := misc.GetStringifiedData(singularEvent["messageId"]) - source, sourceError := proc.getSourceByWriteKey(writeKey) - if sourceError != nil { - proc.logger.Error("Dropping Job since Source not found for writeKey : ", writeKey) - continue - } + var gatewayBatchEvent types.GatewayBatchRequest + err := jsonfast.Unmarshal(batchEvent.EventPayload, &gatewayBatchEvent) + if err != nil { + proc.logger.Warnf("json parsing of event payload for %s: %v", batchEvent.JobID, err) + gatewayBatchEvent.Batch = []types.SingularEventT{} + } - if proc.config.enableDedup { - payload, _ := jsonfast.Marshal(singularEvent) - messageSize := int64(len(payload)) - if ok, previousSize := proc.dedup.Set(dedup.KeyValue{Key: messageId, Value: messageSize}); !ok { - proc.logger.Debugf("Dropping event with duplicate messageId: %s", messageId) - sourceDupStats[dupStatKey{sourceID: source.ID, equalSize: messageSize == previousSize}] += 1 - continue - } - uniqueMessageIds[messageId] = struct{}{} - } + writeKey := gatewayBatchEvent.WriteKey + requestIP := gatewayBatchEvent.RequestIP + receivedAt := gatewayBatchEvent.ReceivedAt - proc.updateSourceEventStatsDetailed(singularEvent, writeKey) + // Iterate through all the events in the batch + for _, singularEvent := range gatewayBatchEvent.Batch { + messageId := misc.GetStringifiedData(singularEvent["messageId"]) + source, sourceError := proc.getSourceByWriteKey(writeKey) + if sourceError != nil { + proc.logger.Errorf("Dropping Job since Source not found for writeKey %q: %v", writeKey, sourceError) + continue + } - // We count this as one, not destination specific ones - totalEvents++ - eventsByMessageID[messageId] = types.SingularEventWithReceivedAt{ - SingularEvent: singularEvent, - ReceivedAt: receivedAt, + if proc.config.enableDedup { + payload, _ := jsonfast.Marshal(singularEvent) + messageSize := int64(len(payload)) + if ok, previousSize := proc.dedup.Set(dedup.KeyValue{Key: messageId, Value: messageSize}); !ok { + proc.logger.Debugf("Dropping event with duplicate messageId: %s", messageId) + sourceDupStats[dupStatKey{sourceID: source.ID, equalSize: messageSize == previousSize}] += 1 + continue } + uniqueMessageIds[messageId] = struct{}{} + } - commonMetadataFromSingularEvent := makeCommonMetadataFromSingularEvent( - singularEvent, - batchEvent, - receivedAt, - source, - ) + proc.updateSourceEventStatsDetailed(singularEvent, writeKey) - payloadFunc := ro.Memoize(func() json.RawMessage { - if proc.transientSources.Apply(source.ID) { - return nil - } - payloadBytes, err := jsonfast.Marshal(singularEvent) - if err != nil { - return nil - } - return payloadBytes - }, - ) - if proc.config.eventSchemaV2Enabled && // schemas enabled - // source has schemas enabled or if we override schemas for all sources - (source.EventSchemasEnabled || proc.config.eventSchemaV2AllSources) && - // TODO: could use source.SourceDefinition.Category instead? - commonMetadataFromSingularEvent.SourceJobRunID == "" { - if payload := payloadFunc(); payload != nil { - eventSchemaJobs = append(eventSchemaJobs, - &jobsdb.JobT{ - UUID: batchEvent.UUID, - UserID: batchEvent.UserID, - Parameters: batchEvent.Parameters, - CustomVal: batchEvent.CustomVal, - EventPayload: payload, - CreatedAt: time.Now(), - ExpireAt: time.Now(), - WorkspaceId: batchEvent.WorkspaceId, - }, - ) - } - } + // We count this as one, not destination specific ones + totalEvents++ + eventsByMessageID[messageId] = types.SingularEventWithReceivedAt{ + SingularEvent: singularEvent, + ReceivedAt: receivedAt, + } - // REPORTING - GATEWAY metrics - START - // dummy event for metrics purposes only - event := &transformer.TransformerResponseT{} - if proc.isReportingEnabled() { - event.Metadata = *commonMetadataFromSingularEvent - proc.updateMetricMaps( - inCountMetadataMap, - inCountMap, - connectionDetailsMap, - statusDetailsMap, - event, - jobsdb.Succeeded.State, - types.GATEWAY, - func() json.RawMessage { - if payload := payloadFunc(); payload != nil { - return payload - } - return []byte("{}") + commonMetadataFromSingularEvent := makeCommonMetadataFromSingularEvent( + singularEvent, + batchEvent, + receivedAt, + source, + ) + + payloadFunc := ro.Memoize(func() json.RawMessage { + if proc.transientSources.Apply(source.ID) { + return nil + } + payloadBytes, err := jsonfast.Marshal(singularEvent) + if err != nil { + return nil + } + return payloadBytes + }, + ) + if proc.config.eventSchemaV2Enabled && // schemas enabled + // source has schemas enabled or if we override schemas for all sources + (source.EventSchemasEnabled || proc.config.eventSchemaV2AllSources) && + // TODO: could use source.SourceDefinition.Category instead? + commonMetadataFromSingularEvent.SourceJobRunID == "" { + if payload := payloadFunc(); payload != nil { + eventSchemaJobs = append(eventSchemaJobs, + &jobsdb.JobT{ + UUID: batchEvent.UUID, + UserID: batchEvent.UserID, + Parameters: batchEvent.Parameters, + CustomVal: batchEvent.CustomVal, + EventPayload: payload, + CreatedAt: time.Now(), + ExpireAt: time.Now(), + WorkspaceId: batchEvent.WorkspaceId, }, ) } - // REPORTING - GATEWAY metrics - END - - // Getting all the destinations which are enabled for this - // event - if !proc.isDestinationAvailable(singularEvent, writeKey) { - continue - } + } - if _, ok := groupedEventsByWriteKey[WriteKeyT(writeKey)]; !ok { - groupedEventsByWriteKey[WriteKeyT(writeKey)] = make([]transformer.TransformerEventT, 0) - } - shallowEventCopy := transformer.TransformerEventT{} - shallowEventCopy.Message = singularEvent - shallowEventCopy.Message["request_ip"] = requestIP - enhanceWithTimeFields(&shallowEventCopy, singularEvent, receivedAt) - enhanceWithMetadata( - commonMetadataFromSingularEvent, - &shallowEventCopy, - &backendconfig.DestinationT{}, + // REPORTING - GATEWAY metrics - START + // dummy event for metrics purposes only + event := &transformer.TransformerResponseT{} + if proc.isReportingEnabled() { + event.Metadata = *commonMetadataFromSingularEvent + proc.updateMetricMaps( + inCountMetadataMap, + inCountMap, + connectionDetailsMap, + statusDetailsMap, + event, + jobsdb.Succeeded.State, + types.GATEWAY, + func() json.RawMessage { + if payload := payloadFunc(); payload != nil { + return payload + } + return []byte("{}") + }, ) + } + // REPORTING - GATEWAY metrics - END + + // Getting all the destinations which are enabled for this + // event + if !proc.isDestinationAvailable(singularEvent, writeKey) { + continue + } + + if _, ok := groupedEventsByWriteKey[WriteKeyT(writeKey)]; !ok { + groupedEventsByWriteKey[WriteKeyT(writeKey)] = make([]transformer.TransformerEventT, 0) + } + shallowEventCopy := transformer.TransformerEventT{} + shallowEventCopy.Message = singularEvent + shallowEventCopy.Message["request_ip"] = requestIP + enhanceWithTimeFields(&shallowEventCopy, singularEvent, receivedAt) + enhanceWithMetadata( + commonMetadataFromSingularEvent, + &shallowEventCopy, + &backendconfig.DestinationT{}, + ) - // TODO: TP ID preference 1.event.context set by rudderTyper 2.From WorkSpaceConfig (currently being used) - shallowEventCopy.Metadata.TrackingPlanId = source.DgSourceTrackingPlanConfig.TrackingPlan.Id - shallowEventCopy.Metadata.TrackingPlanVersion = source.DgSourceTrackingPlanConfig.TrackingPlan.Version - shallowEventCopy.Metadata.SourceTpConfig = source.DgSourceTrackingPlanConfig.Config - shallowEventCopy.Metadata.MergedTpConfig = source.DgSourceTrackingPlanConfig.GetMergedConfig(commonMetadataFromSingularEvent.EventType) + // TODO: TP ID preference 1.event.context set by rudderTyper 2.From WorkSpaceConfig (currently being used) + shallowEventCopy.Metadata.TrackingPlanId = source.DgSourceTrackingPlanConfig.TrackingPlan.Id + shallowEventCopy.Metadata.TrackingPlanVersion = source.DgSourceTrackingPlanConfig.TrackingPlan.Version + shallowEventCopy.Metadata.SourceTpConfig = source.DgSourceTrackingPlanConfig.Config + shallowEventCopy.Metadata.MergedTpConfig = source.DgSourceTrackingPlanConfig.GetMergedConfig(commonMetadataFromSingularEvent.EventType) - groupedEventsByWriteKey[WriteKeyT(writeKey)] = append(groupedEventsByWriteKey[WriteKeyT(writeKey)], shallowEventCopy) + groupedEventsByWriteKey[WriteKeyT(writeKey)] = append(groupedEventsByWriteKey[WriteKeyT(writeKey)], shallowEventCopy) - if proc.isReportingEnabled() { - proc.updateMetricMaps(inCountMetadataMap, outCountMap, connectionDetailsMap, destFilterStatusDetailMap, event, jobsdb.Succeeded.State, types.DESTINATION_FILTER, func() json.RawMessage { return []byte(`{}`) }) - } + if proc.isReportingEnabled() { + proc.updateMetricMaps(inCountMetadataMap, outCountMap, connectionDetailsMap, destFilterStatusDetailMap, event, jobsdb.Succeeded.State, types.DESTINATION_FILTER, func() json.RawMessage { return []byte(`{}`) }) } } diff --git a/processor/stash/stash_test.go b/processor/stash/stash_test.go index 19106db6299..7a87e3da3c7 100644 --- a/processor/stash/stash_test.go +++ b/processor/stash/stash_test.go @@ -14,10 +14,10 @@ import ( "github.com/stretchr/testify/require" "github.com/tidwall/gjson" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/jobsdb" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/services/fileuploader" "github.com/rudderlabs/rudder-server/testhelper" "github.com/rudderlabs/rudder-server/testhelper/destination" @@ -148,9 +148,9 @@ func TestStoreErrorsToObjectStorage(t *testing.T) { workspace := "defaultWorkspaceID-" + strconv.Itoa(i+1) fm, err := st.fileuploader.GetFileManager(workspace) require.NoError(t, err) - var file []*filemanager.FileObject + var file []*filemanager.FileInfo require.Eventually(t, func() bool { - file, err = fm.ListFilesWithPrefix(context.Background(), "", "", 5) + file, err = fm.ListFilesWithPrefix(context.Background(), "", "", 5).Next() if !storageSettings[workspace].Preferences.ProcErrors { return true } diff --git a/regulation-worker/cmd/main.go b/regulation-worker/cmd/main.go index 1e1dfc24def..bd3f2392b32 100644 --- a/regulation-worker/cmd/main.go +++ b/regulation-worker/cmd/main.go @@ -11,6 +11,7 @@ import ( "time" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" svcMetric "github.com/rudderlabs/rudder-go-kit/stats/metric" @@ -25,7 +26,6 @@ import ( "github.com/rudderlabs/rudder-server/regulation-worker/internal/service" "github.com/rudderlabs/rudder-server/rruntime" "github.com/rudderlabs/rudder-server/services/diagnostics" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/services/oauth" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/utils/types/deployment" @@ -92,7 +92,7 @@ func Run(ctx context.Context) error { Deleter: delete.NewRouter( &kvstore.KVDeleteManager{}, &batch.BatchManager{ - FMFactory: &filemanager.FileManagerFactoryT{}, + FMFactory: filemanager.New, FilesLimit: config.GetInt("REGULATION_WORKER_FILES_LIMIT", 1000), }, &api.APIManager{ diff --git a/regulation-worker/cmd/main_test.go b/regulation-worker/cmd/main_test.go index 3eb058ce3f0..743c1698838 100644 --- a/regulation-worker/cmd/main_test.go +++ b/regulation-worker/cmd/main_test.go @@ -22,11 +22,12 @@ import ( "github.com/ory/dockertest/v3" "github.com/stretchr/testify/require" + "github.com/rudderlabs/rudder-go-kit/filemanager" + "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource" backendconfig "github.com/rudderlabs/rudder-server/backend-config" main "github.com/rudderlabs/rudder-server/regulation-worker/cmd" "github.com/rudderlabs/rudder-server/regulation-worker/internal/model" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/services/kvstoremanager" "github.com/rudderlabs/rudder-server/testhelper/destination" ) @@ -42,7 +43,7 @@ var ( key string fields map[string]interface{} } - uploadOutputs []filemanager.UploadOutput + uploadOutputs []filemanager.UploadedFile fileList []string @@ -374,10 +375,10 @@ func verifyBatchDeletion(t *testing.T, minioConfig map[string]interface{}) { goldenFile, err := io.ReadAll(gzipReader) require.NoError(t, err, "batch verification failed") - var fmFactory filemanager.FileManagerFactoryT - fm, err := fmFactory.New(&filemanager.SettingsT{ + fm, err := filemanager.New(&filemanager.Settings{ Provider: "S3", Config: minioConfig, + Logger: logger.NOP, }) require.NoError(t, err, "batch verification failed") @@ -466,10 +467,10 @@ func insertMinioData(t *testing.T, minioConfig map[string]interface{}) { t.Fatal("File list empty, no data to test") } - var fmFactory filemanager.FileManagerFactoryT - fm, err := fmFactory.New(&filemanager.SettingsT{ + fm, err := filemanager.New(&filemanager.Settings{ Provider: "S3", Config: minioConfig, + Logger: logger.NOP, }) require.NoError(t, err) diff --git a/regulation-worker/internal/delete/batch/batch.go b/regulation-worker/internal/delete/batch/batch.go index edcea61dbae..55a440365fc 100644 --- a/regulation-worker/internal/delete/batch/batch.go +++ b/regulation-worker/internal/delete/batch/batch.go @@ -21,11 +21,11 @@ import ( "golang.org/x/sync/errgroup" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" "github.com/rudderlabs/rudder-server/regulation-worker/internal/delete/batch/filehandler" "github.com/rudderlabs/rudder-server/regulation-worker/internal/model" - "github.com/rudderlabs/rudder-server/services/filemanager" ) var ( @@ -37,23 +37,27 @@ var ( type Batch struct { mu sync.Mutex FM filemanager.FileManager + session filemanager.ListSession TmpDirPath string } // listFiles fetches the files from filemanager under prefix mentioned and for a // specified limit. -func (b *Batch) listFiles(ctx context.Context, prefix string, limit int) (fileObjects []*filemanager.FileObject, err error) { +func (b *Batch) listFiles(ctx context.Context, prefix string, limit int) (fileObjects []*filemanager.FileInfo, err error) { pkgLogger.Debugf("getting a list of files from destination under prefix: %s with limit: %d", prefix, limit) + if b.session == nil { + b.session = b.FM.ListFilesWithPrefix(ctx, "", prefix, int64(limit)) + } - if fileObjects, err = b.FM.ListFilesWithPrefix(ctx, "", prefix, int64(limit)); err != nil { - return []*filemanager.FileObject{}, fmt.Errorf("list files under prefix: %s and limit: %d from filemanager: %v", prefix, limit, err) + if fileObjects, err = b.session.Next(); err != nil { + return []*filemanager.FileInfo{}, fmt.Errorf("list files under prefix: %s and limit: %d from filemanager: %v", prefix, limit, err) } return } // two pointer algorithm implementation to remove all the files from which users are already deleted. -func removeCleanedFiles(files []*filemanager.FileObject, cleanedFiles []string) []*filemanager.FileObject { +func removeCleanedFiles(files []*filemanager.FileInfo, cleanedFiles []string) []*filemanager.FileInfo { pkgLogger.Debugf("removing already cleaned files") sort.Slice(files, func(i, j int) bool { return files[i].Key < files[j].Key @@ -79,7 +83,7 @@ func removeCleanedFiles(files []*filemanager.FileObject, cleanedFiles []string) } } j = 0 - finalFiles := make([]*filemanager.FileObject, len(files)-presentCount) + finalFiles := make([]*filemanager.FileInfo, len(files)-presentCount) for i := 0; i < len(files); i++ { if !present[i] { @@ -304,7 +308,7 @@ func (b *Batch) upload(_ context.Context, uploadFileAbsPath, actualFileName, abs type BatchManager struct { FilesLimit int - FMFactory filemanager.FileManagerFactory + FMFactory filemanager.Factory } func (*BatchManager) GetSupportedDestinations() []string { @@ -325,7 +329,7 @@ func (bm *BatchManager) Delete( pkgLogger.Debugf("deleting job: %v", job, "from batch destination: %v", destName) - fm, err := bm.FMFactory.New(&filemanager.SettingsT{Provider: destName, Config: destConfig}) + fm, err := bm.FMFactory(&filemanager.Settings{Provider: destName, Config: destConfig}) if err != nil { pkgLogger.Errorf("fetching file manager for destination: %s, %w", destName, err) return model.JobStatus{Status: model.JobStatusAborted, Error: err} @@ -503,7 +507,7 @@ func getFileSize(fileAbsPath string) int { func (b *Batch) cleanup(ctx context.Context, prefix string) { pkgLogger.Debugf("cleaning up temp files created during the operation") - err := b.FM.DeleteObjects( + err := b.FM.Delete( ctx, []string{filepath.Join(prefix, StatusTrackerFileName)}, ) diff --git a/regulation-worker/internal/delete/batch/batch_test.go b/regulation-worker/internal/delete/batch/batch_test.go index 402c31e74c5..53f5aa795d9 100644 --- a/regulation-worker/internal/delete/batch/batch_test.go +++ b/regulation-worker/internal/delete/batch/batch_test.go @@ -16,9 +16,9 @@ import ( "github.com/stretchr/testify/require" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-server/regulation-worker/internal/delete/batch" "github.com/rudderlabs/rudder-server/regulation-worker/internal/model" - "github.com/rudderlabs/rudder-server/services/filemanager" ) var ( @@ -79,7 +79,7 @@ func TestBatchDelete(t *testing.T) { }, } bm := batch.BatchManager{ - FMFactory: mockFileManagerFactory{}, + FMFactory: mockFileManagerFactory, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -143,10 +143,8 @@ func TestBatchDelete(t *testing.T) { } } -type mockFileManagerFactory struct{} - // creates a tmp directory & copy all the content of testData in it, to use it as mockBucket & store it in mockFileManager struct. -func (mockFileManagerFactory) New(_ *filemanager.SettingsT) (filemanager.FileManager, error) { +func mockFileManagerFactory(_ *filemanager.Settings) (filemanager.FileManager, error) { // create tmp directory // parent directory of all the temporary files created/downloaded in the process of deletion. tmpDirPath, err := os.MkdirTemp("", "") @@ -174,7 +172,7 @@ type mockFileManager struct { func (*mockFileManager) SetTimeout(_ time.Duration) {} // Given a file pointer with cleaned file content upload to the appropriate destination, with the same name as the original. -func (fm *mockFileManager) Upload(_ context.Context, file *os.File, prefixes ...string) (filemanager.UploadOutput, error) { +func (fm *mockFileManager) Upload(_ context.Context, file *os.File, prefixes ...string) (filemanager.UploadedFile, error) { splitFileName := strings.Split(file.Name(), "/") fileName := "" if len(prefixes) > 0 { @@ -185,15 +183,15 @@ func (fm *mockFileManager) Upload(_ context.Context, file *os.File, prefixes ... finalFileName := fmt.Sprintf("%s/%s", fm.mockBucketLocation, fileName) uploadFilePtr, err := os.OpenFile(finalFileName, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644) if err != nil { - return filemanager.UploadOutput{}, err + return filemanager.UploadedFile{}, err } defer uploadFilePtr.Close() _, err = io.Copy(uploadFilePtr, file) if err != nil { - return filemanager.UploadOutput{}, err + return filemanager.UploadedFile{}, err } - return filemanager.UploadOutput{ + return filemanager.UploadedFile{ Location: fm.mockBucketLocation + "/" + fileName, ObjectName: fileName, }, nil @@ -218,7 +216,7 @@ func (fm *mockFileManager) Download(_ context.Context, outputFilePtr *os.File, l } // Given a file name as key, delete if it is present in the bucket. -func (fm *mockFileManager) DeleteObjects(_ context.Context, keys []string) error { +func (fm *mockFileManager) Delete(_ context.Context, keys []string) error { for _, key := range keys { fileLocation := fmt.Sprint(fm.mockBucketLocation, "/", key) _, err := exec.Command("rm", "-rf", fileLocation).Output() @@ -230,9 +228,13 @@ func (fm *mockFileManager) DeleteObjects(_ context.Context, keys []string) error } // given prefix & maxItems, return with list of Fileobject in the bucket. -func (fm *mockFileManager) ListFilesWithPrefix(_ context.Context, _, _ string, _ int64) (fileObjects []*filemanager.FileObject, err error) { +func (fm *mockFileManager) ListFilesWithPrefix(_ context.Context, _, _ string, _ int64) filemanager.ListSession { + return fm +} + +func (fm *mockFileManager) Next() (fileObjects []*filemanager.FileInfo, err error) { if fm.listCalled { - return []*filemanager.FileObject{}, nil + return []*filemanager.FileInfo{}, nil } fm.listCalled = true searchDir := fm.mockBucketLocation @@ -240,7 +242,7 @@ func (fm *mockFileManager) ListFilesWithPrefix(_ context.Context, _, _ string, _ splitStr := strings.Split(path, mockBucket) finalStr := strings.TrimLeft(splitStr[len(splitStr)-1], "/") if finalStr != "" { - fileObjects = append(fileObjects, &filemanager.FileObject{Key: splitStr[len(splitStr)-1]}) + fileObjects = append(fileObjects, &filemanager.FileInfo{Key: splitStr[len(splitStr)-1]}) } return nil }) @@ -258,6 +260,6 @@ func (*mockFileManager) GetDownloadKeyFromFileLocation(_ string) string { return "" } -func (*mockFileManager) GetConfiguredPrefix() string { +func (*mockFileManager) Prefix() string { return "" } diff --git a/router/batchrouter/batchrouterBenchmark_test.go b/router/batchrouter/batchrouterBenchmark_test.go index 8826c3d47fd..bbdf525abfa 100644 --- a/router/batchrouter/batchrouterBenchmark_test.go +++ b/router/batchrouter/batchrouterBenchmark_test.go @@ -1,26 +1,26 @@ package batchrouter import ( + "context" jsonstd "encoding/json" + "os" "strings" "testing" + "time" - "github.com/golang/mock/gomock" "github.com/google/uuid" "golang.org/x/sync/errgroup" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/jobsdb" - mocksFileManager "github.com/rudderlabs/rudder-server/mocks/services/filemanager" ) func Benchmark_GetStorageDateFormat(b *testing.B) { config.Reset() - mockCtrl := gomock.NewController(b) - mockFileManager := mocksFileManager.NewMockFileManager(mockCtrl) destination := &Connection{ Source: backendconfig.SourceT{}, Destination: backendconfig.DestinationT{}, @@ -33,10 +33,7 @@ func Benchmark_GetStorageDateFormat(b *testing.B) { destination.Destination.ID = randomString() destination.Source.ID = randomString() - mockFileManager.EXPECT().GetConfiguredPrefix().AnyTimes() - mockFileManager.EXPECT().ListFilesWithPrefix(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes() - - _, _ = dfProvider.GetFormat(logger.NOP, mockFileManager, destination, folderName) + _, _ = dfProvider.GetFormat(logger.NOP, mockFileManager{}, destination, folderName) } }) } @@ -96,3 +93,41 @@ func Benchmark_JSONUnmarshal(b *testing.B) { _ = g.Wait() } } + +type mockFileManager struct{} + +func (m mockFileManager) ListFilesWithPrefix(ctx context.Context, startAfter, prefix string, maxItems int64) filemanager.ListSession { + return m +} + +func (mockFileManager) Next() (fileObjects []*filemanager.FileInfo, err error) { + return nil, nil +} + +func (mockFileManager) Upload(context.Context, *os.File, ...string) (filemanager.UploadedFile, error) { + return filemanager.UploadedFile{}, nil +} + +func (mockFileManager) Download(context.Context, *os.File, string) error { + return nil +} + +func (mockFileManager) Delete(ctx context.Context, keys []string) error { + return nil +} + +func (mockFileManager) Prefix() string { + return "" +} + +func (mockFileManager) SetTimeout(timeout time.Duration) { + // no-op +} + +func (mockFileManager) GetObjectNameFromLocation(string) (string, error) { + return "", nil +} + +func (mockFileManager) GetDownloadKeyFromFileLocation(string) string { + return "" +} diff --git a/router/batchrouter/batchrouter_isolation_test.go b/router/batchrouter/batchrouter_isolation_test.go index 69b4f409bd3..1c083661eb4 100644 --- a/router/batchrouter/batchrouter_isolation_test.go +++ b/router/batchrouter/batchrouter_isolation_test.go @@ -22,6 +22,7 @@ import ( "golang.org/x/sync/errgroup" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" kithelper "github.com/rudderlabs/rudder-go-kit/testhelper" @@ -30,7 +31,6 @@ import ( "github.com/rudderlabs/rudder-server/jobsdb" "github.com/rudderlabs/rudder-server/router/batchrouter/isolation" "github.com/rudderlabs/rudder-server/runner" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/testhelper/destination" "github.com/rudderlabs/rudder-server/testhelper/health" "github.com/rudderlabs/rudder-server/testhelper/workspaceConfig" @@ -294,7 +294,7 @@ func BatchrouterIsolationScenario(t testing.TB, spec *BrtIsolationScenarioSpec) if spec.verifyDestinations { t.Logf("Verifying the destinations") verify := func(prefix, workspaceID, destType string, count int) { - fm, err := filemanager.DefaultFileManagerFactory.New(&filemanager.SettingsT{ + fm, err := filemanager.New(&filemanager.Settings{ Provider: "MINIO", Config: misc.GetObjectStorageConfig(misc.ObjectStorageOptsT{ Provider: "MINIO", @@ -308,7 +308,7 @@ func BatchrouterIsolationScenario(t testing.TB, spec *BrtIsolationScenarioSpec) }), }) require.NoError(t, err, "it should be able to create a file manager") - fileObjects, err := fm.ListFilesWithPrefix(context.Background(), "", prefix+"/"+workspaceID+"/", int64(count)) + fileObjects, err := fm.ListFilesWithPrefix(context.Background(), "", prefix+"/"+workspaceID+"/", int64(count)).Next() require.NoError(t, err, "it should be able to list files") var eventCount int for _, fileObject := range fileObjects { diff --git a/router/batchrouter/batchrouter_test.go b/router/batchrouter/batchrouter_test.go index e8cc5931df8..9a28d6d69b0 100644 --- a/router/batchrouter/batchrouter_test.go +++ b/router/batchrouter/batchrouter_test.go @@ -21,16 +21,16 @@ import ( "github.com/stretchr/testify/require" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" + "github.com/rudderlabs/rudder-go-kit/filemanager/mock_filemanager" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-server/admin" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/jobsdb" mocksBackendConfig "github.com/rudderlabs/rudder-server/mocks/backend-config" mocksJobsDB "github.com/rudderlabs/rudder-server/mocks/jobsdb" - mocksFileManager "github.com/rudderlabs/rudder-server/mocks/services/filemanager" mocksMultitenant "github.com/rudderlabs/rudder-server/mocks/services/multitenant" router_utils "github.com/rudderlabs/rudder-server/router/utils" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/services/rsources" "github.com/rudderlabs/rudder-server/services/transientsource" "github.com/rudderlabs/rudder-server/utils/misc" @@ -67,7 +67,7 @@ var sampleBackendConfig = backendconfig.ConfigT{ var ( sampleConfigPrefix = "config_prefix" - sampleFileObjects = []*filemanager.FileObject{ + sampleFileObjects = []*filemanager.FileInfo{ { Key: fmt.Sprintf("%s/%s/%s/%s/%s", sampleConfigPrefix, SourceIDEnabled, WriteKeyEnabled, "01-02-2006", "tmp1.log"), LastModified: time.Now(), @@ -87,10 +87,10 @@ type testContext struct { mockBatchRouterJobsDB *mocksJobsDB.MockJobsDB mockProcErrorsDB *mocksJobsDB.MockJobsDB mockBackendConfig *mocksBackendConfig.MockBackendConfig - mockFileManagerFactory *mocksFileManager.MockFileManagerFactory - mockFileManager *mocksFileManager.MockFileManager + mockFileManagerFactory filemanager.Factory + mockFileManager *mock_filemanager.MockFileManager mockConfigPrefix string - mockFileObjects []*filemanager.FileObject + mockFileObjects []*filemanager.FileInfo mockMultitenantI *mocksMultitenant.MockMultiTenantI } @@ -101,8 +101,8 @@ func (c *testContext) Setup() { c.mockBatchRouterJobsDB = mocksJobsDB.NewMockJobsDB(c.mockCtrl) c.mockProcErrorsDB = mocksJobsDB.NewMockJobsDB(c.mockCtrl) c.mockBackendConfig = mocksBackendConfig.NewMockBackendConfig(c.mockCtrl) - c.mockFileManagerFactory = mocksFileManager.NewMockFileManagerFactory(c.mockCtrl) - c.mockFileManager = mocksFileManager.NewMockFileManager(c.mockCtrl) + c.mockFileManager = mock_filemanager.NewMockFileManager(c.mockCtrl) + c.mockFileManagerFactory = func(settings *filemanager.Settings) (filemanager.FileManager, error) { return c.mockFileManager, nil } c.mockMultitenantI = mocksMultitenant.NewMockMultiTenantI(c.mockCtrl) tFunc := c.asyncHelper.ExpectAndNotifyCallbackWithName("backend_config") @@ -183,10 +183,9 @@ var _ = Describe("BatchRouter", func() { batchrouter.readPerDestination = false batchrouter.fileManagerFactory = c.mockFileManagerFactory - c.mockFileManagerFactory.EXPECT().New(gomock.Any()).Times(1).Return(c.mockFileManager, nil) - c.mockFileManager.EXPECT().Upload(gomock.Any(), gomock.Any(), gomock.Any()).Return(filemanager.UploadOutput{Location: "local", ObjectName: "file"}, nil) - c.mockFileManager.EXPECT().GetConfiguredPrefix().Return(c.mockConfigPrefix) - c.mockFileManager.EXPECT().ListFilesWithPrefix(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(c.mockFileObjects, nil) + c.mockFileManager.EXPECT().Upload(gomock.Any(), gomock.Any(), gomock.Any()).Return(filemanager.UploadedFile{Location: "local", ObjectName: "file"}, nil) + c.mockFileManager.EXPECT().Prefix().Return(c.mockConfigPrefix) + c.mockFileManager.EXPECT().ListFilesWithPrefix(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(filemanager.MockListSession(c.mockFileObjects, nil)) s3Payload := `{ "userId": "identified user id", diff --git a/router/batchrouter/handle.go b/router/batchrouter/handle.go index b6583732dbb..45039d88a4f 100644 --- a/router/batchrouter/handle.go +++ b/router/batchrouter/handle.go @@ -23,6 +23,7 @@ import ( "golang.org/x/exp/slices" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" kitsync "github.com/rudderlabs/rudder-go-kit/sync" @@ -34,7 +35,6 @@ import ( router_utils "github.com/rudderlabs/rudder-server/router/utils" destinationdebugger "github.com/rudderlabs/rudder-server/services/debugger/destination" "github.com/rudderlabs/rudder-server/services/diagnostics" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/services/multitenant" "github.com/rudderlabs/rudder-server/services/rmetrics" "github.com/rudderlabs/rudder-server/services/rsources" @@ -58,7 +58,7 @@ type Handle struct { multitenantI multitenant.MultiTenantI reporting types.Reporting backendConfig backendconfig.BackendConfig - fileManagerFactory filemanager.FileManagerFactory + fileManagerFactory filemanager.Factory transientSources transientsource.Service rsourcesService rsources.JobService warehouseClient *client.Warehouse @@ -364,7 +364,7 @@ func (brt *Handle) upload(provider string, batchJobs *BatchedJobs, isWarehouse b brt.logger.Debugf("BRT: Logged to local file: %v", gzipFilePath) useRudderStorage := isWarehouse && misc.IsConfiguredToUseRudderObjectStorage(batchJobs.Connection.Destination.Config) - uploader, err := brt.fileManagerFactory.New(&filemanager.SettingsT{ + uploader, err := brt.fileManagerFactory(&filemanager.Settings{ Provider: provider, Config: misc.GetObjectStorageConfig(misc.ObjectStorageOptsT{ Provider: provider, @@ -581,7 +581,7 @@ func (brt *Handle) updateJobStatus(batchJobs *BatchedJobs, isWarehouse bool, err connectionDetailsMap := make(map[string]*types.ConnectionDetails) transformedAtMap := make(map[string]string) statusDetailsMap := make(map[string]*types.StatusDetail) - jobStateCounts := make(map[string]map[string]int) + jobStateCounts := make(map[string]int) for _, job := range batchJobs.Jobs { jobState := batchJobState var firstAttemptedAt time.Time @@ -647,10 +647,7 @@ func (brt *Handle) updateJobStatus(batchJobs *BatchedJobs, isWarehouse bool, err WorkspaceId: job.WorkspaceId, } statusList = append(statusList, &status) - if jobStateCounts[jobState] == nil { - jobStateCounts[jobState] = make(map[string]int) - } - jobStateCounts[jobState][strconv.Itoa(attemptNum)] = jobStateCounts[jobState][strconv.Itoa(attemptNum)] + 1 + jobStateCounts[jobState] = jobStateCounts[jobState] + 1 // REPORTING - START if brt.reporting != nil && brt.reportingEnabled { diff --git a/router/batchrouter/handle_lifecycle.go b/router/batchrouter/handle_lifecycle.go index df5a449ce3d..5001142e253 100644 --- a/router/batchrouter/handle_lifecycle.go +++ b/router/batchrouter/handle_lifecycle.go @@ -19,6 +19,7 @@ import ( "github.com/rudderlabs/rudder-go-kit/bytesize" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" kitsync "github.com/rudderlabs/rudder-go-kit/sync" @@ -29,7 +30,6 @@ import ( router_utils "github.com/rudderlabs/rudder-server/router/utils" destinationdebugger "github.com/rudderlabs/rudder-server/services/debugger/destination" "github.com/rudderlabs/rudder-server/services/diagnostics" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/services/multitenant" "github.com/rudderlabs/rudder-server/services/rsources" "github.com/rudderlabs/rudder-server/services/transientsource" @@ -62,7 +62,7 @@ func (brt *Handle) Setup( brt.multitenantI = multitenantStat brt.reporting = reporting brt.backendConfig = backendConfig - brt.fileManagerFactory = filemanager.DefaultFileManagerFactory + brt.fileManagerFactory = filemanager.New brt.transientSources = transientSources brt.rsourcesService = rsourcesService if brt.warehouseClient == nil { @@ -255,7 +255,7 @@ func (brt *Handle) crashRecover() { brt.jobsDB.JournalDeleteEntry(entry.OpID) continue } - downloader, err := brt.fileManagerFactory.New(&filemanager.SettingsT{ + downloader, err := brt.fileManagerFactory(&filemanager.Settings{ Provider: object.Provider, Config: object.Config, }) diff --git a/router/batchrouter/handle_observability.go b/router/batchrouter/handle_observability.go index 44716eaebc7..33636ed3f2d 100644 --- a/router/batchrouter/handle_observability.go +++ b/router/batchrouter/handle_observability.go @@ -52,7 +52,7 @@ func (brt *Handle) collectMetrics(ctx context.Context) { } } -func sendDestStatusStats(batchDestination *Connection, jobStateCounts map[string]map[string]int, destType string, isWarehouse bool) { +func sendDestStatusStats(batchDestination *Connection, jobStateCounts map[string]int, destType string, isWarehouse bool) { tags := map[string]string{ "module": "batch_router", "destType": destType, @@ -61,14 +61,9 @@ func sendDestStatusStats(batchDestination *Connection, jobStateCounts map[string "sourceId": misc.GetTagName(batchDestination.Source.ID, batchDestination.Source.Name), } - for jobState, countByAttemptMap := range jobStateCounts { + for jobState, count := range jobStateCounts { tags["job_state"] = jobState - for attempt, count := range countByAttemptMap { - tags["attempt_number"] = attempt - if count > 0 { - stats.Default.NewTaggedStat("event_status", stats.CountType, tags).Count(count) - } - } + stats.Default.NewTaggedStat("event_status", stats.CountType, tags).Count(count) } } diff --git a/router/batchrouter/util.go b/router/batchrouter/util.go index 783ebac117b..031d5611f5a 100644 --- a/router/batchrouter/util.go +++ b/router/batchrouter/util.go @@ -10,10 +10,10 @@ import ( "golang.org/x/exp/slices" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/jobsdb" - "github.com/rudderlabs/rudder-server/services/filemanager" warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) @@ -102,7 +102,7 @@ func (sdfp *storageDateFormatProvider) GetFormat(log logger.Logger, manager file getFullPrefix := func(manager filemanager.FileManager, prefix string) (fullPrefix string) { fullPrefix = prefix - configPrefix := manager.GetConfiguredPrefix() + configPrefix := manager.Prefix() if configPrefix != "" { if configPrefix[len(configPrefix)-1:] == "/" { @@ -114,7 +114,7 @@ func (sdfp *storageDateFormatProvider) GetFormat(log logger.Logger, manager file return } fullPrefix := getFullPrefix(manager, prefix) - fileObjects, err := manager.ListFilesWithPrefix(context.TODO(), "", fullPrefix, 5) + fileObjects, err := manager.ListFilesWithPrefix(context.TODO(), "", fullPrefix, 5).Next() if err != nil { log.Errorf("[BRT]: Failed to fetch fileObjects with connIdentifier: %s, prefix: %s, Err: %v", connIdentifier, fullPrefix, err) // Returning the earlier default as we might not able to fetch the list. diff --git a/router/handle_lifecycle.go b/router/handle_lifecycle.go index 186575ba98d..5d185b516e7 100644 --- a/router/handle_lifecycle.go +++ b/router/handle_lifecycle.go @@ -215,7 +215,7 @@ func (rt *Handle) Setup( for _, pstat := range allPStats { statTags := stats.Tags{ "destType": rt.destType, - "partition": key, + "partition": pstat.Partition, } stats.Default.NewTaggedStat("rt_"+key+"_limiter_stats_throughput", stats.GaugeType, statTags).Gauge(pstat.Throughput) stats.Default.NewTaggedStat("rt_"+key+"_limiter_stats_errors", stats.GaugeType, statTags).Gauge(pstat.Errors) diff --git a/router/worker.go b/router/worker.go index eb5c8048176..6752260726e 100644 --- a/router/worker.go +++ b/router/worker.go @@ -816,7 +816,6 @@ func (w *worker) sendRouterResponseCountStat(status *jobsdb.JobStatusT, destinat "respStatusCode": status.ErrorCode, "destination": destinationTag, "destId": destination.ID, - "attempt_number": strconv.Itoa(status.AttemptNum), "workspaceId": status.WorkspaceId, // To indicate if the failure should be alerted for router-aborted-count "alert": strconv.FormatBool(alert), @@ -830,13 +829,12 @@ func (w *worker) sendEventDeliveryStat(destinationJobMetadata *types.JobMetadata destinationTag := misc.GetTagName(destination.ID, destination.Name) if status.JobState == jobsdb.Succeeded.State { eventsDeliveredStat := stats.Default.NewTaggedStat("event_delivery", stats.CountType, stats.Tags{ - "module": "router", - "destType": w.rt.destType, - "destID": destination.ID, - "destination": destinationTag, - "attempt_number": strconv.Itoa(status.AttemptNum), - "workspaceId": status.WorkspaceId, - "source": destinationJobMetadata.SourceID, + "module": "router", + "destType": w.rt.destType, + "destID": destination.ID, + "destination": destinationTag, + "workspaceId": status.WorkspaceId, + "source": destinationJobMetadata.SourceID, }) eventsDeliveredStat.Count(1) if destinationJobMetadata.ReceivedAt != "" { @@ -844,12 +842,11 @@ func (w *worker) sendEventDeliveryStat(destinationJobMetadata *types.JobMetadata if err == nil { eventsDeliveryTimeStat := stats.Default.NewTaggedStat( "event_delivery_time", stats.TimerType, map[string]string{ - "module": "router", - "destType": w.rt.destType, - "destID": destination.ID, - "destination": destinationTag, - "attempt_number": strconv.Itoa(status.AttemptNum), - "workspaceId": status.WorkspaceId, + "module": "router", + "destType": w.rt.destType, + "destID": destination.ID, + "destination": destinationTag, + "workspaceId": status.WorkspaceId, }) eventsDeliveryTimeStat.SendTiming(time.Since(receivedTime)) diff --git a/services/archiver/archiver.go b/services/archiver/archiver.go index 5f9e484f781..ce7924e3408 100644 --- a/services/archiver/archiver.go +++ b/services/archiver/archiver.go @@ -9,9 +9,10 @@ import ( "github.com/iancoleman/strcase" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-server/services/archiver/tablearchiver" - "github.com/rudderlabs/rudder-server/services/filemanager" + "github.com/rudderlabs/rudder-server/utils/filemanagerutil" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/utils/timeutil" ) @@ -63,9 +64,9 @@ func ArchiveOldRecords(tableName, tsColumn string, archivalTimeInDays int, dbHan ) defer os.Remove(path) - fManager, err := filemanager.DefaultFileManagerFactory.New(&filemanager.SettingsT{ + fManager, err := filemanager.New(&filemanager.Settings{ Provider: config.GetString("JOBS_BACKUP_STORAGE_PROVIDER", "S3"), - Config: filemanager.GetProviderConfigForBackupsFromEnv(context.TODO()), + Config: filemanagerutil.GetProviderConfigForBackupsFromEnv(context.TODO(), config.Default), }) if err != nil { pkgLogger.Errorf("[Archiver]: Error in creating a file manager for :%s: , %v", config.GetString("JOBS_BACKUP_STORAGE_PROVIDER", "S3"), err) diff --git a/services/archiver/tablearchiver/tablearchiver.go b/services/archiver/tablearchiver/tablearchiver.go index 46a60dda8d8..d54e66d538c 100644 --- a/services/archiver/tablearchiver/tablearchiver.go +++ b/services/archiver/tablearchiver/tablearchiver.go @@ -10,8 +10,8 @@ import ( "text/template" "text/template/parse" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/utils/misc" ) diff --git a/services/diagnostics/diagnostics.go b/services/diagnostics/diagnostics.go index 76e38aa67d0..dcf4aafed9a 100644 --- a/services/diagnostics/diagnostics.go +++ b/services/diagnostics/diagnostics.go @@ -26,7 +26,6 @@ const ( RouterSuccess = "router_success" RouterFailed = "router_failed" RouterDestination = "router_destination" - RouterAttemptNum = "router_attempt_num" RouterCompletedTime = "router_average_job_time" BatchRouterEvents = "batch_router_events" BatchRouterSuccess = "batch_router_success" diff --git a/services/filemanager/azureBlobStoragemanager.go b/services/filemanager/azureBlobStoragemanager.go deleted file mode 100644 index d5ab19fbb00..00000000000 --- a/services/filemanager/azureBlobStoragemanager.go +++ /dev/null @@ -1,353 +0,0 @@ -package filemanager - -import ( - "bytes" - "context" - "errors" - "fmt" - "net/url" - "os" - "path" - "strings" - "time" - - "github.com/Azure/azure-storage-blob-go/azblob" -) - -func suppressMinorErrors(err error) error { - if err != nil { - if storageError, ok := err.(azblob.StorageError); ok { // This error is a Service-specific - switch storageError.ServiceCode() { // Compare serviceCode to ServiceCodeXxx constants - case azblob.ServiceCodeContainerAlreadyExists: - pkgLogger.Debug("Received 409. Container already exists") - return nil - } - } - } - return err -} - -func (manager *AzureBlobStorageManager) getBaseURL() *url.URL { - protocol := "https" - if manager.Config.DisableSSL != nil && *manager.Config.DisableSSL { - protocol = "http" - } - - endpoint := "blob.core.windows.net" - if manager.Config.EndPoint != nil && *manager.Config.EndPoint != "" { - endpoint = *manager.Config.EndPoint - } - - baseURL := url.URL{ - Scheme: protocol, - Host: fmt.Sprintf("%s.%s", manager.Config.AccountName, endpoint), - } - - if manager.Config.UseSASTokens { - baseURL.RawQuery = manager.Config.SASToken - } - - if manager.Config.ForcePathStyle != nil && *manager.Config.ForcePathStyle { - baseURL.Host = endpoint - baseURL.Path = fmt.Sprintf("/%s/", manager.Config.AccountName) - } - - return &baseURL -} - -func (manager *AzureBlobStorageManager) getContainerURL() (azblob.ContainerURL, error) { - if manager.Config.Container == "" { - return azblob.ContainerURL{}, errors.New("no container configured") - } - - credential, err := manager.getCredentials() - if err != nil { - return azblob.ContainerURL{}, err - } - - p := azblob.NewPipeline(credential, azblob.PipelineOptions{}) - - // From the Azure portal, get your storage account blob service URL endpoint. - baseURL := manager.getBaseURL() - serviceURL := azblob.NewServiceURL(*baseURL, p) - containerURL := serviceURL.NewContainerURL(manager.Config.Container) - - return containerURL, nil -} - -func (manager *AzureBlobStorageManager) getCredentials() (azblob.Credential, error) { - if manager.Config.UseSASTokens { - return azblob.NewAnonymousCredential(), nil - } - - accountName, accountKey := manager.Config.AccountName, manager.Config.AccountKey - if accountName == "" || accountKey == "" { - return nil, errors.New("either accountName or accountKey is empty") - } - - // Create a default request pipeline using your storage account name and account key. - return azblob.NewSharedKeyCredential(accountName, accountKey) -} - -// Upload passed in file to Azure Blob Storage -func (manager *AzureBlobStorageManager) Upload(ctx context.Context, file *os.File, prefixes ...string) (UploadOutput, error) { - containerURL, err := manager.getContainerURL() - if err != nil { - return UploadOutput{}, err - } - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - if manager.createContainer() { - _, err = containerURL.Create(ctx, azblob.Metadata{}, azblob.PublicAccessNone) - err = suppressMinorErrors(err) - if err != nil { - return UploadOutput{}, err - } - } - - fileName := path.Join(manager.Config.Prefix, path.Join(prefixes...), path.Base(file.Name())) - - // Here's how to upload a blob. - blobURL := containerURL.NewBlockBlobURL(fileName) - _, err = azblob.UploadFileToBlockBlob(ctx, file, blobURL, azblob.UploadToBlockBlobOptions{ - BlockSize: 4 * 1024 * 1024, - Parallelism: 16, - }) - if err != nil { - return UploadOutput{}, err - } - - return UploadOutput{Location: manager.blobLocation(&blobURL), ObjectName: fileName}, nil -} - -func (manager *AzureBlobStorageManager) createContainer() bool { - return !manager.Config.UseSASTokens -} - -func (manager *AzureBlobStorageManager) blobLocation(blobURL *azblob.BlockBlobURL) string { - if !manager.Config.UseSASTokens { - return blobURL.String() - } - - // Reset SAS Query parameters - blobURLParts := azblob.NewBlobURLParts(blobURL.URL()) - blobURLParts.SAS = azblob.SASQueryParameters{} - newBlobURL := blobURLParts.URL() - return newBlobURL.String() -} - -func (manager *AzureBlobStorageManager) ListFilesWithPrefix(ctx context.Context, startAfter, prefix string, maxItems int64) (fileObjects []*FileObject, err error) { - containerURL, err := manager.getContainerURL() - if err != nil { - return []*FileObject{}, err - } - - blobListingDetails := azblob.BlobListingDetails{ - Metadata: true, - } - segmentOptions := azblob.ListBlobsSegmentOptions{ - Details: blobListingDetails, - Prefix: prefix, - MaxResults: int32(maxItems), - } - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - // List the blobs in the container - var response *azblob.ListBlobsFlatSegmentResponse - - // Checking if maxItems > 0 to avoid function calls which expect only maxItems to be returned and not more in the code - for maxItems > 0 && manager.Config.Marker.NotDone() { - response, err = containerURL.ListBlobsFlatSegment(ctx, manager.Config.Marker, segmentOptions) - if err != nil { - return - } - manager.Config.Marker = response.NextMarker - - fileObjects = make([]*FileObject, 0) - for idx := range response.Segment.BlobItems { - if strings.Compare(response.Segment.BlobItems[idx].Name, startAfter) > 0 { - fileObjects = append(fileObjects, &FileObject{response.Segment.BlobItems[idx].Name, response.Segment.BlobItems[idx].Properties.LastModified}) - maxItems-- - } - } - } - return -} - -func (manager *AzureBlobStorageManager) Download(ctx context.Context, output *os.File, key string) error { - containerURL, err := manager.getContainerURL() - if err != nil { - return err - } - - blobURL := containerURL.NewBlockBlobURL(key) - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - // Here's how to download the blob - downloadResponse, err := blobURL.Download(ctx, 0, azblob.CountToEnd, azblob.BlobAccessConditions{}, false, azblob.ClientProvidedKeyOptions{}) - if err != nil { - return err - } - - // NOTE: automatically retries are performed if the connection fails - bodyStream := downloadResponse.Body(azblob.RetryReaderOptions{MaxRetryRequests: 20}) - - // read the body into a buffer - downloadedData := bytes.Buffer{} - _, err = downloadedData.ReadFrom(bodyStream) - if err != nil { - return err - } - - _, err = output.Write(downloadedData.Bytes()) - return err -} - -/* -GetObjectNameFromLocation gets the object name/key name from the object location url - - https://account-name.blob.core.windows.net/container-name/key - >> key -*/ -func (manager *AzureBlobStorageManager) GetObjectNameFromLocation(location string) (string, error) { - strToken := strings.Split(location, fmt.Sprintf("%s/", manager.Config.Container)) - return strToken[len(strToken)-1], nil -} - -func (manager *AzureBlobStorageManager) GetDownloadKeyFromFileLocation(location string) string { - str := strings.Split(location, fmt.Sprintf("%s/", manager.Config.Container)) - return str[len(str)-1] -} - -type AzureBlobStorageManager struct { - Config *AzureBlobStorageConfig - timeout time.Duration -} - -func (manager *AzureBlobStorageManager) SetTimeout(timeout time.Duration) { - manager.timeout = timeout -} - -func (manager *AzureBlobStorageManager) getTimeout() time.Duration { - if manager.timeout > 0 { - return manager.timeout - } - - return getBatchRouterTimeoutConfig("AZURE_BLOB") -} - -func GetAzureBlogStorageConfig(config map[string]interface{}) *AzureBlobStorageConfig { - var containerName, accountName, accountKey, sasToken, prefix string - var endPoint *string - var marker azblob.Marker - var forcePathStyle, disableSSL *bool - var useSASTokens bool - if config["containerName"] != nil { - tmp, ok := config["containerName"].(string) - if ok { - containerName = tmp - } - } - if config["prefix"] != nil { - tmp, ok := config["prefix"].(string) - if ok { - prefix = tmp - } - } - if config["accountName"] != nil { - tmp, ok := config["accountName"].(string) - if ok { - accountName = tmp - } - } - if config["useSASTokens"] != nil { - tmp, ok := config["useSASTokens"].(bool) - if ok { - useSASTokens = tmp - } - } - if config["sasToken"] != nil { - tmp, ok := config["sasToken"].(string) - if ok { - sasToken = strings.TrimPrefix(tmp, "?") - } - } - if config["accountKey"] != nil { - tmp, ok := config["accountKey"].(string) - if ok { - accountKey = tmp - } - } - if config["endPoint"] != nil { - tmp, ok := config["endPoint"].(string) - if ok { - endPoint = &tmp - } - } - if config["forcePathStyle"] != nil { - tmp, ok := config["forcePathStyle"].(bool) - if ok { - forcePathStyle = &tmp - } - } - if config["disableSSL"] != nil { - tmp, ok := config["disableSSL"].(bool) - if ok { - disableSSL = &tmp - } - } - return &AzureBlobStorageConfig{ - Container: containerName, - Prefix: prefix, - AccountName: accountName, - AccountKey: accountKey, - UseSASTokens: useSASTokens, - SASToken: sasToken, - EndPoint: endPoint, - ForcePathStyle: forcePathStyle, - DisableSSL: disableSSL, - Marker: marker, - } -} - -type AzureBlobStorageConfig struct { - Container string - Prefix string - AccountName string - AccountKey string - SASToken string - EndPoint *string - ForcePathStyle *bool - DisableSSL *bool - Marker azblob.Marker - UseSASTokens bool -} - -func (manager *AzureBlobStorageManager) DeleteObjects(ctx context.Context, keys []string) (err error) { - containerURL, err := manager.getContainerURL() - if err != nil { - return err - } - - for _, key := range keys { - blobURL := containerURL.NewBlockBlobURL(key) - - _ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - _, err := blobURL.Delete(_ctx, azblob.DeleteSnapshotsOptionNone, azblob.BlobAccessConditions{}) - if err != nil { - cancel() - return err - } - cancel() - } - return -} - -func (manager *AzureBlobStorageManager) GetConfiguredPrefix() string { - return manager.Config.Prefix -} diff --git a/services/filemanager/digitalOceanSpacesManager.go b/services/filemanager/digitalOceanSpacesManager.go deleted file mode 100644 index d12bf6f0e3d..00000000000 --- a/services/filemanager/digitalOceanSpacesManager.go +++ /dev/null @@ -1,311 +0,0 @@ -package filemanager - -import ( - "context" - "errors" - "fmt" - "net/url" - "os" - "path" - "strings" - "time" - - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/aws/awserr" - "github.com/aws/aws-sdk-go/aws/credentials" - "github.com/aws/aws-sdk-go/aws/session" - "github.com/aws/aws-sdk-go/service/s3" - "github.com/samber/lo" - - SpacesManager "github.com/aws/aws-sdk-go/service/s3/s3manager" - - "github.com/rudderlabs/rudder-server/utils/misc" -) - -func (manager *DOSpacesManager) getSession() (*session.Session, error) { - var region string - if manager.Config.Region != nil { - region = *manager.Config.Region - } else { - region = misc.GetSpacesLocation(manager.Config.EndPoint) - } - return session.NewSession(&aws.Config{ - Region: aws.String(region), - Credentials: credentials.NewStaticCredentials(manager.Config.AccessKeyID, manager.Config.AccessKey, ""), - Endpoint: aws.String(manager.Config.EndPoint), - DisableSSL: manager.Config.DisableSSL, - S3ForcePathStyle: manager.Config.ForcePathStyle, - }) -} - -// Upload passed in file to spaces -func (manager *DOSpacesManager) Upload(ctx context.Context, file *os.File, prefixes ...string) (UploadOutput, error) { - if manager.Config.Bucket == "" { - return UploadOutput{}, errors.New("no storage bucket configured to uploader") - } - - fileName := path.Join(manager.Config.Prefix, path.Join(prefixes...), path.Base(file.Name())) - - uploadInput := &SpacesManager.UploadInput{ - ACL: aws.String("bucket-owner-full-control"), - Bucket: aws.String(manager.Config.Bucket), - Key: aws.String(fileName), - Body: file, - } - uploadSession, err := manager.getSession() - if err != nil { - return UploadOutput{}, fmt.Errorf("error starting Digital Ocean Spaces session: %w", err) - } - DOmanager := SpacesManager.NewUploader(uploadSession) - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - output, err := DOmanager.UploadWithContext(ctx, uploadInput) - if err != nil { - if awsError, ok := err.(awserr.Error); ok && awsError.Code() == "MissingRegion" { - err = fmt.Errorf(fmt.Sprintf(`Bucket '%s' not found.`, manager.Config.Bucket)) - } - return UploadOutput{}, err - } - - return UploadOutput{Location: output.Location, ObjectName: fileName}, err -} - -func (manager *DOSpacesManager) Download(ctx context.Context, output *os.File, key string) error { - downloadSession, err := manager.getSession() - if err != nil { - return fmt.Errorf("error starting Digital Ocean Spaces session: %w", err) - } - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - downloader := SpacesManager.NewDownloader(downloadSession) - _, err = downloader.DownloadWithContext(ctx, output, - &s3.GetObjectInput{ - Bucket: aws.String(manager.Config.Bucket), - Key: aws.String(key), - }) - - return err -} - -func (manager *DOSpacesManager) GetDownloadKeyFromFileLocation(location string) string { - parsedUrl, err := url.Parse(location) - if err != nil { - fmt.Println("error while parsing location url: ", err) - } - trimedUrl := strings.TrimLeft(parsedUrl.Path, "/") - if (manager.Config.ForcePathStyle != nil && *manager.Config.ForcePathStyle) || (!strings.Contains(parsedUrl.Host, manager.Config.Bucket)) { - return strings.TrimPrefix(trimedUrl, fmt.Sprintf(`%s/`, manager.Config.Bucket)) - } - return trimedUrl -} - -/* -GetObjectNameFromLocation gets the object name/key name from the object location url - - https://rudder.sgp1.digitaloceanspaces.com/key - >> key -*/ -func (manager *DOSpacesManager) GetObjectNameFromLocation(location string) (string, error) { - parsedURL, err := url.Parse(location) - if err != nil { - return "", err - } - trimedUrl := strings.TrimLeft(parsedURL.Path, "/") - if (manager.Config.ForcePathStyle != nil && *manager.Config.ForcePathStyle) || (!strings.Contains(parsedURL.Host, manager.Config.Bucket)) { - return strings.TrimPrefix(trimedUrl, fmt.Sprintf(`%s/`, manager.Config.Bucket)), nil - } - return trimedUrl, nil -} - -func (manager *DOSpacesManager) ListFilesWithPrefix(ctx context.Context, startAfter, prefix string, maxItems int64) (fileObjects []*FileObject, err error) { - if !manager.Config.IsTruncated { - pkgLogger.Infof("Manager is truncated: %v so returning here", manager.Config.IsTruncated) - return - } - fileObjects = make([]*FileObject, 0) - - sess, err := manager.getSession() - if err != nil { - return []*FileObject{}, fmt.Errorf("error starting Digital Ocean Spaces session: %w", err) - } - - // Create S3 service client - svc := s3.New(sess) - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - listObjectsV2Input := s3.ListObjectsV2Input{ - Bucket: aws.String(manager.Config.Bucket), - Prefix: aws.String(prefix), - MaxKeys: &maxItems, - // Delimiter: aws.String("/"), - } - // startAfter is to resume a paused task. - if startAfter != "" { - listObjectsV2Input.StartAfter = aws.String(startAfter) - } - if manager.Config.ContinuationToken != nil { - listObjectsV2Input.ContinuationToken = manager.Config.ContinuationToken - } - - // Get the list of items - resp, err := svc.ListObjectsV2WithContext(ctx, &listObjectsV2Input) - if err != nil { - pkgLogger.Errorf("Error while listing Digital Ocean Spaces objects: %v", err) - return - } - if resp.IsTruncated != nil { - manager.Config.IsTruncated = *resp.IsTruncated - } - manager.Config.ContinuationToken = resp.NextContinuationToken - for _, item := range resp.Contents { - fileObjects = append(fileObjects, &FileObject{*item.Key, *item.LastModified}) - } - return -} - -func (manager *DOSpacesManager) DeleteObjects(ctx context.Context, keys []string) error { - sess, err := manager.getSession() - if err != nil { - return fmt.Errorf("error starting Digital Ocean Spaces session: %w", err) - } - - objects := make([]*s3.ObjectIdentifier, len(keys)) - for i, key := range keys { - objects[i] = &s3.ObjectIdentifier{Key: aws.String(key)} - } - - svc := s3.New(sess) - - batchSize := 1000 // max accepted by DeleteObjects API - chunks := lo.Chunk(objects, batchSize) - for _, chunk := range chunks { - input := &s3.DeleteObjectsInput{ - Bucket: aws.String(manager.Config.Bucket), - Delete: &s3.Delete{ - Objects: chunk, - }, - } - - _ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - _, err := svc.DeleteObjectsWithContext(_ctx, input) - if err != nil { - if aerr, ok := err.(awserr.Error); ok { - pkgLogger.Errorf(`Error while deleting digital ocean spaces objects: %v, error code: %v`, aerr.Error(), aerr.Code()) - } else { - // Print the error, cast err to awserr.Error to get the Code and - // Message from an error. - pkgLogger.Errorf(`Error while deleting digital ocean spaces objects: %v`, aerr.Error()) - } - cancel() - return err - } - cancel() - } - return nil -} - -type DOSpacesManager struct { - Config *DOSpacesConfig - timeout time.Duration -} - -func (manager *DOSpacesManager) SetTimeout(timeout time.Duration) { - manager.timeout = timeout -} - -func (manager *DOSpacesManager) getTimeout() time.Duration { - if manager.timeout > 0 { - return manager.timeout - } - - return getBatchRouterTimeoutConfig("DIGITAL_OCEAN_SPACES") -} - -func GetDOSpacesConfig(config map[string]interface{}) *DOSpacesConfig { - var bucketName, prefix, endPoint, accessKeyID, accessKey string - var continuationToken *string - var region *string - var forcePathStyle, disableSSL *bool - if config["bucketName"] != nil { - tmp, ok := config["bucketName"].(string) - if ok { - bucketName = tmp - } - } - if config["prefix"] != nil { - tmp, ok := config["prefix"].(string) - if ok { - prefix = tmp - } - } - if config["endPoint"] != nil { - tmp, ok := config["endPoint"].(string) - if ok { - endPoint = tmp - } - } - if config["accessKeyID"] != nil { - tmp, ok := config["accessKeyID"].(string) - if ok { - accessKeyID = tmp - } - } - if config["accessKey"] != nil { - tmp, ok := config["accessKey"].(string) - if ok { - accessKey = tmp - } - } - if config["region"] != nil { - tmp, ok := config["region"].(string) - if ok { - region = &tmp - } - } - if config["forcePathStyle"] != nil { - tmp, ok := config["forcePathStyle"].(bool) - if ok { - forcePathStyle = &tmp - } - } - if config["disableSSL"] != nil { - tmp, ok := config["disableSSL"].(bool) - if ok { - disableSSL = &tmp - } - } - return &DOSpacesConfig{ - Bucket: bucketName, - EndPoint: endPoint, - Prefix: prefix, - AccessKeyID: accessKeyID, - AccessKey: accessKey, - Region: region, - ForcePathStyle: forcePathStyle, - DisableSSL: disableSSL, - ContinuationToken: continuationToken, - IsTruncated: true, - } -} - -type DOSpacesConfig struct { - Bucket string - Prefix string - EndPoint string - AccessKeyID string - AccessKey string - Region *string - ForcePathStyle *bool - DisableSSL *bool - ContinuationToken *string - IsTruncated bool -} - -func (manager *DOSpacesManager) GetConfiguredPrefix() string { - return manager.Config.Prefix -} diff --git a/services/filemanager/fileManager_test.go b/services/filemanager/fileManager_test.go deleted file mode 100644 index 90d18e1b97d..00000000000 --- a/services/filemanager/fileManager_test.go +++ /dev/null @@ -1,546 +0,0 @@ -package filemanager_test - -import ( - "context" - "encoding/base64" - "flag" - "fmt" - "io" - "log" - "net/http" - "os" - "os/signal" - "path" - "path/filepath" - "regexp" - "strings" - "syscall" - "testing" - "time" - - "cloud.google.com/go/storage" - jsoniter "github.com/json-iterator/go" - "github.com/minio/minio-go/v6" - "github.com/ory/dockertest/v3" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "google.golang.org/api/option" - - "github.com/Azure/azure-storage-blob-go/azblob" - - "github.com/rudderlabs/rudder-go-kit/config" - "github.com/rudderlabs/rudder-go-kit/logger" - "github.com/rudderlabs/rudder-server/services/filemanager" - "github.com/rudderlabs/rudder-server/utils/httputil" -) - -var ( - AzuriteEndpoint, gcsURL, minioEndpoint, azureSASTokens string - base64Secret = base64.StdEncoding.EncodeToString([]byte(secretAccessKey)) - bucket = "filemanager-test-1" - region = "us-east-1" - accessKeyId = "MYACCESSKEY" - secretAccessKey = "MYSECRETKEY" - hold bool - regexRequiredSuffix = regexp.MustCompile(".json.gz$") - fileList []string -) - -func TestMain(m *testing.M) { - config.Reset() - logger.Reset() - - os.Exit(run(m)) -} - -// run minio server & store data in it. -func run(m *testing.M) int { - flag.BoolVar(&hold, "hold", false, "hold environment clean-up after test execution until Ctrl+C is provided") - flag.Parse() - - // docker pool setup - pool, err := dockertest.NewPool("") - if err != nil { - panic(fmt.Errorf("Could not connect to docker: %s", err)) - } - - // running minio container on docker - minioResource, err := pool.RunWithOptions(&dockertest.RunOptions{ - Repository: "minio/minio", - Tag: "latest", - Cmd: []string{"server", "/data"}, - Env: []string{ - fmt.Sprintf("MINIO_ACCESS_KEY=%s", accessKeyId), - fmt.Sprintf("MINIO_SECRET_KEY=%s", secretAccessKey), - fmt.Sprintf("MINIO_SITE_REGION=%s", region), - }, - }) - if err != nil { - panic(fmt.Errorf("Could not start resource: %s", err)) - } - defer func() { - if err := pool.Purge(minioResource); err != nil { - log.Printf("Could not purge resource: %s \n", err) - } - }() - - minioEndpoint = fmt.Sprintf("localhost:%s", minioResource.GetPort("9000/tcp")) - - // check if minio server is up & running. - if err := pool.Retry(func() error { - url := fmt.Sprintf("http://%s/minio/health/live", minioEndpoint) - resp, err := http.Get(url) - if err != nil { - return err - } - defer func() { httputil.CloseResponse(resp) }() - - if resp.StatusCode != http.StatusOK { - return fmt.Errorf("status code not OK") - } - return nil - }); err != nil { - log.Fatalf("Could not connect to docker: %s", err) - } - fmt.Println("minio is up & running properly") - - useSSL := false - minioClient, err := minio.New(minioEndpoint, accessKeyId, secretAccessKey, useSSL) - if err != nil { - panic(err) - } - fmt.Println("minioClient created successfully") - - // creating bucket inside minio where testing will happen. - err = minioClient.MakeBucket(bucket, "us-east-1") - if err != nil { - panic(err) - } - fmt.Println("bucket created successfully") - - // Running Azure emulator, Azurite. - AzuriteResource, err := pool.RunWithOptions(&dockertest.RunOptions{ - Repository: "mcr.microsoft.com/azure-storage/azurite", - Tag: "latest", - Env: []string{ - fmt.Sprintf("AZURITE_ACCOUNTS=%s:%s", accessKeyId, base64Secret), - fmt.Sprintf("DefaultEndpointsProtocol=%s", "http"), - }, - }) - if err != nil { - log.Fatalf("Could not start azure resource: %s", err) - } - defer func() { - if err := pool.Purge(AzuriteResource); err != nil { - log.Printf("Could not purge resource: %s \n", err) - } - }() - AzuriteEndpoint = fmt.Sprintf("localhost:%s", AzuriteResource.GetPort("10000/tcp")) - fmt.Println("Azurite endpoint", AzuriteEndpoint) - fmt.Println("azurite resource successfully created") - - azureSASTokens, err = createAzureSASTokens() - if err != nil { - log.Fatalf("Could not create azure sas tokens: %s", err) - } - - // Running GCS emulator - GCSResource, err := pool.RunWithOptions(&dockertest.RunOptions{ - Repository: "fsouza/fake-gcs-server", - Tag: "latest", - Cmd: []string{"-scheme", "http"}, - }) - if err != nil { - log.Fatalf("Could not start resource: %s", err) - } - defer func() { - if err := pool.Purge(GCSResource); err != nil { - log.Printf("Could not purge resource: %s \n", err) - } - }() - - GCSEndpoint := fmt.Sprintf("localhost:%s", GCSResource.GetPort("4443/tcp")) - fmt.Println("GCS test server successfully created with endpoint: ", GCSEndpoint) - gcsURL = fmt.Sprintf("http://%s/storage/v1/", GCSEndpoint) - os.Setenv("STORAGE_EMULATOR_HOST", fmt.Sprintf("%s/storage/v1/", GCSEndpoint)) - client, err := storage.NewClient(context.TODO(), option.WithEndpoint(gcsURL)) - if err != nil { - log.Fatalf("failed to create client: %v", err) - } - bkt := client.Bucket(bucket) - err = bkt.Create(context.Background(), "test", &storage.BucketAttrs{Name: bucket}) - if err != nil { - fmt.Println("error while creating bucket: ", err) - } - fmt.Println("bucket created successfully") - - // getting list of files in `testData` directory while will be used to testing filemanager. - searchDir := "./goldenDirectory" - err = filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error { - if regexRequiredSuffix.MatchString(path) { - fileList = append(fileList, path) - } - return nil - }) - if err != nil { - panic(err) - } - if len(fileList) == 0 { - panic("file list empty, no data to test.") - } - fmt.Println("files list: ", fileList) - - code := m.Run() - blockOnHold() - return code -} - -func createAzureSASTokens() (string, error) { - credential, err := azblob.NewSharedKeyCredential(accessKeyId, base64Secret) - if err != nil { - return "", err - } - - sasQueryParams, err := azblob.AccountSASSignatureValues{ - Protocol: azblob.SASProtocolHTTPSandHTTP, - ExpiryTime: time.Now().UTC().Add(1 * time.Hour), - Permissions: azblob.AccountSASPermissions{Read: true, List: true, Write: true, Delete: true}.String(), - Services: azblob.AccountSASServices{Blob: true}.String(), - ResourceTypes: azblob.AccountSASResourceTypes{Container: true, Object: true}.String(), - }.NewSASQueryParameters(credential) - if err != nil { - return "", err - } - - return sasQueryParams.Encode(), nil -} - -func TestFileManager(t *testing.T) { - tests := []struct { - name string - skip string - destName string - config map[string]interface{} - }{ - { - name: "testing s3manager functionality", - destName: "S3", - config: map[string]interface{}{ - "bucketName": bucket, - "accessKeyID": accessKeyId, - "accessKey": secretAccessKey, - "enableSSE": false, - "prefix": "some-prefix", - "endPoint": minioEndpoint, - "s3ForcePathStyle": true, - "disableSSL": true, - "region": region, - }, - }, - { - name: "testing minio functionality", - destName: "MINIO", - config: map[string]interface{}{ - "bucketName": bucket, - "accessKeyID": accessKeyId, - "secretAccessKey": secretAccessKey, - "enableSSE": false, - "prefix": "some-prefix", - "endPoint": minioEndpoint, - "s3ForcePathStyle": true, - "disableSSL": true, - "region": region, - }, - }, - { - name: "testing digital ocean functionality", - destName: "DIGITAL_OCEAN_SPACES", - config: map[string]interface{}{ - "bucketName": bucket, - "accessKeyID": accessKeyId, - "accessKey": secretAccessKey, - "prefix": "some-prefix", - "endPoint": minioEndpoint, - "forcePathStyle": true, - "disableSSL": true, - "region": region, - "enableSSE": false, - }, - }, - { - name: "testing Azure blob storage filemanager functionality with account keys configured", - destName: "AZURE_BLOB", - config: map[string]interface{}{ - "containerName": bucket, - "prefix": "some-prefix", - "accountName": accessKeyId, - "accountKey": base64Secret, - "endPoint": AzuriteEndpoint, - "forcePathStyle": true, - "disableSSL": true, - }, - }, - { - skip: "storage emulator is not stable", - name: "testing GCS filemanager functionality", - destName: "GCS", - config: map[string]interface{}{ - "bucketName": bucket, - "prefix": "some-prefix", - "endPoint": gcsURL, - "s3ForcePathStyle": true, - "disableSSL": true, - }, - }, - { - name: "testing Azure blob storage filemanager functionality with sas tokens configured", - destName: "AZURE_BLOB", - config: map[string]interface{}{ - "containerName": bucket, - "prefix": "some-prefix", - "accountName": accessKeyId, - "useSASTokens": true, - "sasToken": azureSASTokens, - "endPoint": AzuriteEndpoint, - "forcePathStyle": true, - "disableSSL": true, - }, - }, - } - - for _, tt := range tests { - - t.Run(tt.name, func(t *testing.T) { - if tt.skip != "" { - t.Skip(tt.skip) - } - fmFactory := filemanager.FileManagerFactoryT{} - fm, err := fmFactory.New(&filemanager.SettingsT{ - Provider: tt.destName, - Config: tt.config, - }) - if err != nil { - t.Fatal(err) - } - - // upload all files - uploadOutputs := make([]filemanager.UploadOutput, 0) - for _, file := range fileList { - filePtr, err := os.Open(file) - require.NoError(t, err, "error while opening testData file to upload") - uploadOutput, err := fm.Upload(context.TODO(), filePtr, "another-prefix1", "another-prefix2") - if err != nil { - t.Fatal(err) - } - require.Equal(t, path.Join("some-prefix/another-prefix1/another-prefix2/", path.Base(file)), - uploadOutput.ObjectName) - uploadOutputs = append(uploadOutputs, uploadOutput) - filePtr.Close() - } - // list files using ListFilesWithPrefix - originalFileObject := make([]*filemanager.FileObject, 0) - originalFileNames := make(map[string]int) - fileListNames := make(map[string]int) - for i := 0; i < len(fileList); i++ { - files, err := fm.ListFilesWithPrefix(context.TODO(), "some-prefix/another-prefix1/another-prefix2/", "", 1) - require.NoError(t, err, "expected no error while listing files") - require.Equal(t, 1, len(files), "number of files should be 1") - originalFileObject = append(originalFileObject, files[0]) - originalFileNames[files[0].Key]++ - fileListNames[path.Join("some-prefix/another-prefix1/another-prefix2/", path.Base(fileList[i]))]++ - } - require.Equal(t, len(originalFileObject), len(fileList), "actual number of files different than expected") - for fileListName, count := range fileListNames { - require.Equal(t, count, originalFileNames[fileListName], "files different than expected when listed") - } - - tempFm, err := fmFactory.New(&filemanager.SettingsT{ - Provider: tt.destName, - Config: tt.config, - }) - if err != nil { - t.Fatal(err) - } - - iteratorMap := make(map[string]int) - iteratorCount := 0 - iter := filemanager.IterateFilesWithPrefix(context.TODO(), "some-prefix/another-prefix1/another-prefix2/", "", int64(len(fileList)), &tempFm) - for iter.Next() { - iteratorFile := iter.Get().Key - iteratorMap[iteratorFile]++ - iteratorCount++ - } - require.NoError(t, iter.Err(), "no error expected while iterating files") - require.Equal(t, len(fileList), iteratorCount, "actual number of files different than expected") - for fileListName, count := range fileListNames { - require.Equal(t, count, iteratorMap[fileListName], "files different than expected when iterated") - } - - // based on the obtained location, get object name by calling GetObjectNameFromLocation - objectName, err := fm.GetObjectNameFromLocation(uploadOutputs[0].Location) - require.NoError(t, err, "no error expected") - require.Equal(t, uploadOutputs[0].ObjectName, objectName, "actual object name different than expected") - - // also get download key from file location by calling GetDownloadKeyFromFileLocation - expectedKey := uploadOutputs[0].ObjectName - key := fm.GetDownloadKeyFromFileLocation(uploadOutputs[0].Location) - require.Equal(t, expectedKey, key, "actual object key different than expected") - - // get prefix based on config - splitString := strings.Split(uploadOutputs[0].ObjectName, "/") - expectedPrefix := splitString[0] - prefix := fm.GetConfiguredPrefix() - require.Equal(t, expectedPrefix, prefix, "actual prefix different than expected") - - // download one of the files & assert if it matches the original one present locally. - filePtr, err := os.Open(fileList[0]) - if err != nil { - fmt.Printf("error: %s while opening file: %s ", err, fileList[0]) - } - originalFile, err := io.ReadAll(filePtr) - if err != nil { - fmt.Printf("error: %s, while reading file: %s", err, fileList[0]) - } - filePtr.Close() - - DownloadedFileName := "TmpDownloadedFile" - - // fail to download the file with cancelled context - filePtr, err = os.OpenFile(DownloadedFileName, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0o644) - if err != nil { - fmt.Println("error while Creating file to download data: ", err) - } - ctx, cancel := context.WithCancel(context.TODO()) - cancel() - err = fm.Download(ctx, filePtr, key) - require.Error(t, err, "expected error while downloading file") - filePtr.Close() - - filePtr, err = os.OpenFile(DownloadedFileName, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0o644) - if err != nil { - fmt.Println("error while Creating file to download data: ", err) - } - defer os.Remove(DownloadedFileName) - err = fm.Download(context.TODO(), filePtr, key) - - require.NoError(t, err, "expected no error") - filePtr.Close() - - filePtr, err = os.OpenFile(DownloadedFileName, os.O_RDWR, 0o644) - if err != nil { - fmt.Println("error while Creating file to download data: ", err) - } - downloadedFile, err := io.ReadAll(filePtr) - if err != nil { - fmt.Println("error while reading downloaded file: ", err) - } - filePtr.Close() - - ans := strings.Compare(string(originalFile), string(downloadedFile)) - require.Equal(t, 0, ans, "downloaded file different than actual file") - - // fail to delete the file with cancelled context - ctx, cancel = context.WithCancel(context.TODO()) - cancel() - err = fm.DeleteObjects(ctx, []string{key}) - require.Error(t, err, "expected error while deleting file") - - // delete that file - err = fm.DeleteObjects(context.TODO(), []string{key}) - require.NoError(t, err, "expected no error while deleting object") - // list files again & assert if that file is still present. - fmFactoryNew := filemanager.FileManagerFactoryT{} - fmNew, err := fmFactoryNew.New(&filemanager.SettingsT{ - Provider: tt.destName, - Config: tt.config, - }) - if err != nil { - panic(err) - } - newFileObject, err := fmNew.ListFilesWithPrefix(context.TODO(), "", "", 1000) - if err != nil { - fmt.Println("error while getting new file object: ", err) - } - require.Equal(t, len(originalFileObject)-1, len(newFileObject), "expected original file list length to be greater than new list by 1, but is different") - }) - - t.Run(tt.name, func(t *testing.T) { - if tt.skip != "" { - t.Skip(tt.skip) - } - fmFactory := filemanager.FileManagerFactoryT{} - fm, err := fmFactory.New(&filemanager.SettingsT{ - Provider: tt.destName, - Config: tt.config, - }) - if err != nil { - t.Fatal(err) - } - - // fail to upload file - file := fileList[0] - filePtr, err := os.Open(file) - require.NoError(t, err, "error while opening testData file to upload") - ctx, cancel := context.WithTimeout(context.TODO(), time.Second*5) - cancel() - _, err = fm.Upload(ctx, filePtr) - require.Error(t, err, "expected error while uploading file") - filePtr.Close() - - // MINIO doesn't support list files with context cancellation - if tt.destName != "MINIO" { - // fail to fetch file list - ctx1, cancel := context.WithTimeout(context.TODO(), time.Second*5) - cancel() - _, err = fm.ListFilesWithPrefix(ctx1, "", "", 1000) - require.Error(t, err, "expected error while listing files") - - iter := filemanager.IterateFilesWithPrefix(ctx1, "", "", 1000, &fm) - next := iter.Next() - require.Equal(t, false, next, "next should be false when context is cancelled") - err = iter.Err() - require.Error(t, err, "expected error while iterating files") - } - }) - - } -} - -func TestGCSManager_unsupported_credentials(t *testing.T) { - var config map[string]interface{} - err := jsoniter.Unmarshal( - []byte(`{ - "project": "my-project", - "location": "US", - "bucketName": "my-bucket", - "prefix": "rudder", - "namespace": "namespace", - "credentials":"{\"installed\":{\"client_id\":\"1234.apps.googleusercontent.com\",\"project_id\":\"project_id\",\"auth_uri\":\"https://accounts.google.com/o/oauth2/auth\",\"token_uri\":\"https://oauth2.googleapis.com/token\",\"auth_provider_x509_cert_url\":\"https://www.googleapis.com/oauth2/v1/certs\",\"client_secret\":\"client_secret\",\"redirect_uris\":[\"urn:ietf:wg:oauth:2.0:oob\",\"http://localhost\"]}}", - "syncFrequency": "1440", - "syncStartAt": "09:00" - }`), - &config, - ) - assert.NoError(t, err) - manager := &filemanager.GCSManager{ - Config: filemanager.GetGCSConfig(config), - } - _, err = manager.ListFilesWithPrefix(context.TODO(), "", "/tests", 100) - assert.NotNil(t, err) - assert.Contains(t, err.Error(), "client_credentials.json file is not supported") -} - -func blockOnHold() { - if !hold { - return - } - - log.Println("Test on hold, before cleanup") - log.Println("Press Ctrl+C to exit") - - c := make(chan os.Signal, 1) - signal.Notify(c, os.Interrupt, syscall.SIGTERM) - - <-c - close(c) -} diff --git a/services/filemanager/filemanager.go b/services/filemanager/filemanager.go deleted file mode 100644 index 89252e7b6cb..00000000000 --- a/services/filemanager/filemanager.go +++ /dev/null @@ -1,210 +0,0 @@ -//go:generate mockgen -destination=../../mocks/services/filemanager/mock_filemanager.go -package mock_filemanager github.com/rudderlabs/rudder-server/services/filemanager FileManagerFactory,FileManager - -package filemanager - -import ( - "context" - "errors" - "fmt" - "os" - "time" - - "github.com/rudderlabs/rudder-go-kit/config" - "github.com/rudderlabs/rudder-go-kit/logger" - backendconfig "github.com/rudderlabs/rudder-server/backend-config" - "github.com/rudderlabs/rudder-server/router/rterror" -) - -var ( - pkgLogger logger.Logger - DefaultFileManagerFactory FileManagerFactory - ErrKeyNotFound = errors.New("NoSuchKey") -) - -type FileManagerFactoryT struct{} - -type UploadOutput struct { - Location string - ObjectName string -} - -type FileManagerFactory interface { - New(settings *SettingsT) (FileManager, error) -} - -type FileObject struct { - Key string - LastModified time.Time -} - -// FileManager implements all upload methods -type FileManager interface { - Upload(context.Context, *os.File, ...string) (UploadOutput, error) - Download(context.Context, *os.File, string) error - GetObjectNameFromLocation(string) (string, error) - GetDownloadKeyFromFileLocation(location string) string - DeleteObjects(ctx context.Context, keys []string) error - ListFilesWithPrefix(ctx context.Context, startAfter, prefix string, maxItems int64) (fileObjects []*FileObject, err error) - GetConfiguredPrefix() string - SetTimeout(timeout time.Duration) -} - -// SettingsT sets configuration for FileManager -type SettingsT struct { - Provider string - Config map[string]interface{} -} - -func init() { - DefaultFileManagerFactory = &FileManagerFactoryT{} - pkgLogger = logger.NewLogger().Child("filemanager") -} - -// New returns FileManager backed by configured provider -func (*FileManagerFactoryT) New(settings *SettingsT) (FileManager, error) { - switch settings.Provider { - case "S3_DATALAKE": - return NewS3Manager(settings.Config) - case "S3": - return NewS3Manager(settings.Config) - case "GCS": - return &GCSManager{ - Config: GetGCSConfig(settings.Config), - }, nil - case "AZURE_BLOB": - return &AzureBlobStorageManager{ - Config: GetAzureBlogStorageConfig(settings.Config), - }, nil - case "MINIO": - return &MinioManager{ - Config: GetMinioConfig(settings.Config), - }, nil - case "DIGITAL_OCEAN_SPACES": - return &DOSpacesManager{ - Config: GetDOSpacesConfig(settings.Config), - }, nil - } - return nil, fmt.Errorf("%w: %s", rterror.InvalidServiceProvider, settings.Provider) -} - -func GetProviderConfigFromEnv(ctx context.Context, provider string) map[string]interface{} { - providerConfig := make(map[string]interface{}) - switch provider { - - case "S3": - providerConfig["bucketName"] = config.GetString("JOBS_BACKUP_BUCKET", "rudder-saas") - providerConfig["prefix"] = config.GetString("JOBS_BACKUP_PREFIX", "") - providerConfig["accessKeyID"] = config.GetString("AWS_ACCESS_KEY_ID", "") - providerConfig["accessKey"] = config.GetString("AWS_SECRET_ACCESS_KEY", "") - providerConfig["enableSSE"] = config.GetBool("AWS_ENABLE_SSE", false) - providerConfig["regionHint"] = config.GetString("AWS_S3_REGION_HINT", "us-east-1") - providerConfig["iamRoleArn"] = config.GetString("BACKUP_IAM_ROLE_ARN", "") - if providerConfig["iamRoleArn"] != "" { - backendconfig.DefaultBackendConfig.WaitForConfig(ctx) - providerConfig["externalID"] = backendconfig.DefaultBackendConfig.Identity().ID() - } - - case "GCS": - providerConfig["bucketName"] = config.GetString("JOBS_BACKUP_BUCKET", "rudder-saas") - providerConfig["prefix"] = config.GetString("JOBS_BACKUP_PREFIX", "") - credentials, err := os.ReadFile(config.GetString("GOOGLE_APPLICATION_CREDENTIALS", "")) - if err == nil { - providerConfig["credentials"] = string(credentials) - } - - case "AZURE_BLOB": - providerConfig["containerName"] = config.GetString("JOBS_BACKUP_BUCKET", "rudder-saas") - providerConfig["prefix"] = config.GetString("JOBS_BACKUP_PREFIX", "") - providerConfig["accountName"] = config.GetString("AZURE_STORAGE_ACCOUNT", "") - providerConfig["accountKey"] = config.GetString("AZURE_STORAGE_ACCESS_KEY", "") - - case "MINIO": - providerConfig["bucketName"] = config.GetString("JOBS_BACKUP_BUCKET", "rudder-saas") - providerConfig["prefix"] = config.GetString("JOBS_BACKUP_PREFIX", "") - providerConfig["endPoint"] = config.GetString("MINIO_ENDPOINT", "localhost:9000") - providerConfig["accessKeyID"] = config.GetString("MINIO_ACCESS_KEY_ID", "minioadmin") - providerConfig["secretAccessKey"] = config.GetString("MINIO_SECRET_ACCESS_KEY", "minioadmin") - providerConfig["useSSL"] = config.GetBool("MINIO_SSL", false) - - case "DIGITAL_OCEAN_SPACES": - providerConfig["bucketName"] = config.GetString("JOBS_BACKUP_BUCKET", "rudder-saas") - providerConfig["prefix"] = config.GetString("JOBS_BACKUP_PREFIX", "") - providerConfig["endPoint"] = config.GetString("DO_SPACES_ENDPOINT", "") - providerConfig["accessKeyID"] = config.GetString("DO_SPACES_ACCESS_KEY_ID", "") - providerConfig["accessKey"] = config.GetString("DO_SPACES_SECRET_ACCESS_KEY", "") - } - - return providerConfig -} - -// GetProviderConfigForBackupsFromEnv returns the provider config -func GetProviderConfigForBackupsFromEnv(ctx context.Context) map[string]interface{} { - return GetProviderConfigFromEnv( - ctx, - config.GetString("JOBS_BACKUP_STORAGE_PROVIDER", "S3")) -} - -func getBatchRouterTimeoutConfig(destType string) time.Duration { - key := "timeout" - defaultValueInTimescaleUnits := int64(120) - timeScale := time.Second - - destOverrideFound := config.IsSet("BatchRouter." + destType + "." + key) - if destOverrideFound { - return config.GetDuration("BatchRouter."+destType+"."+key, defaultValueInTimescaleUnits, timeScale) - } else { - return config.GetDuration("BatchRouter."+key, defaultValueInTimescaleUnits, timeScale) - } -} - -func IterateFilesWithPrefix(ctx context.Context, prefix, startAfter string, maxItems int64, manager *FileManager) *ObjectIterator { - it := &ObjectIterator{ - ctx: ctx, - startAfter: startAfter, - maxItems: maxItems, - prefix: prefix, - manager: manager, - } - return it -} - -type ObjectIterator struct { - ctx context.Context - err error - item *FileObject - items []*FileObject - manager *FileManager - maxItems int64 - startAfter string - prefix string -} - -func (it *ObjectIterator) Next() bool { - var err error - if len(it.items) == 0 { - mn := *it.manager - it.items, err = mn.ListFilesWithPrefix(it.ctx, it.startAfter, it.prefix, it.maxItems) - if err != nil { - it.err = err - return false - } - if len(it.items) > 0 { - pkgLogger.Infof(`Fetched files list from %v (lastModifiedAt: %v) to %v (lastModifiedAt: %v)`, it.items[0].Key, it.items[0].LastModified, it.items[len(it.items)-1].Key, it.items[len(it.items)-1].LastModified) - } - } - - if len(it.items) > 0 { - it.item = it.items[0] - it.items = it.items[1:] - return true - } - return false -} - -func (it *ObjectIterator) Get() *FileObject { - return it.item -} - -func (it *ObjectIterator) Err() error { - return it.err -} diff --git a/services/filemanager/gcsmanager.go b/services/filemanager/gcsmanager.go deleted file mode 100644 index 54f385106e9..00000000000 --- a/services/filemanager/gcsmanager.go +++ /dev/null @@ -1,242 +0,0 @@ -package filemanager - -import ( - "context" - "fmt" - "io" - "os" - "path" - "strings" - "time" - - "google.golang.org/api/iterator" - - "github.com/rudderlabs/rudder-server/utils/googleutils" - - "cloud.google.com/go/storage" - "google.golang.org/api/option" -) - -func (manager *GCSManager) objectURL(objAttrs *storage.ObjectAttrs) string { - if manager.Config.EndPoint != nil && *manager.Config.EndPoint != "" { - return fmt.Sprintf("%s/%s/%s", *manager.Config.EndPoint, objAttrs.Bucket, objAttrs.Name) - } - return fmt.Sprintf("https://storage.googleapis.com/%s/%s", objAttrs.Bucket, objAttrs.Name) -} - -func (manager *GCSManager) Upload(ctx context.Context, file *os.File, prefixes ...string) (UploadOutput, error) { - fileName := path.Join(manager.Config.Prefix, path.Join(prefixes...), path.Base(file.Name())) - - client, err := manager.getClient(ctx) - if err != nil { - return UploadOutput{}, err - } - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - obj := client.Bucket(manager.Config.Bucket).Object(fileName) - w := obj.NewWriter(ctx) - if _, err := io.Copy(w, file); err != nil { - err = fmt.Errorf("copying file to GCS: %v", err) - if closeErr := w.Close(); closeErr != nil { - return UploadOutput{}, fmt.Errorf("closing writer: %q, while: %w", closeErr, err) - } - - return UploadOutput{}, err - } - err = w.Close() - if err != nil { - return UploadOutput{}, fmt.Errorf("closing writer: %w", err) - } - - attrs, err := obj.Attrs(ctx) - if err != nil { - return UploadOutput{}, err - } - - return UploadOutput{Location: manager.objectURL(attrs), ObjectName: fileName}, err -} - -func (manager *GCSManager) ListFilesWithPrefix(ctx context.Context, startAfter, prefix string, maxItems int64) (fileObjects []*FileObject, err error) { - fileObjects = make([]*FileObject, 0) - - // Create GCS storage client - client, err := manager.getClient(ctx) - if err != nil { - return - } - - // Create GCS Bucket handle - if manager.Config.Iterator == nil { - manager.Config.Iterator = client.Bucket(manager.Config.Bucket).Objects(ctx, &storage.Query{ - Prefix: prefix, - Delimiter: "", - StartOffset: startAfter, - }) - } - var attrs *storage.ObjectAttrs - for { - if maxItems <= 0 { - break - } - attrs, err = manager.Config.Iterator.Next() - if err == iterator.Done || err != nil { - if err == iterator.Done { - err = nil - } - break - } - fileObjects = append(fileObjects, &FileObject{attrs.Name, attrs.Updated}) - maxItems-- - } - return -} - -func (manager *GCSManager) getClient(ctx context.Context) (*storage.Client, error) { - var err error - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - if manager.client != nil { - return manager.client, err - } - options := []option.ClientOption{} - - if manager.Config.EndPoint != nil && *manager.Config.EndPoint != "" { - options = append(options, option.WithEndpoint(*manager.Config.EndPoint)) - } - if !googleutils.ShouldSkipCredentialsInit(manager.Config.Credentials) { - if err = googleutils.CompatibleGoogleCredentialsJSON([]byte(manager.Config.Credentials)); err != nil { - return manager.client, err - } - options = append(options, option.WithCredentialsJSON([]byte(manager.Config.Credentials))) - } - - manager.client, err = storage.NewClient(ctx, options...) - return manager.client, err -} - -func (manager *GCSManager) Download(ctx context.Context, output *os.File, key string) error { - client, err := manager.getClient(ctx) - if err != nil { - return err - } - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - rc, err := client.Bucket(manager.Config.Bucket).Object(key).NewReader(ctx) - if err != nil { - return err - } - defer rc.Close() - - _, err = io.Copy(output, rc) - return err -} - -/* -GetObjectNameFromLocation gets the object name/key name from the object location url - - https://storage.googleapis.com/bucket-name/key - >> key -*/ -func (manager *GCSManager) GetObjectNameFromLocation(location string) (string, error) { - splitStr := strings.Split(location, manager.Config.Bucket) - object := strings.TrimLeft(splitStr[len(splitStr)-1], "/") - return object, nil -} - -// TODO complete this -func (manager *GCSManager) GetDownloadKeyFromFileLocation(location string) string { - splitStr := strings.Split(location, manager.Config.Bucket) - key := strings.TrimLeft(splitStr[len(splitStr)-1], "/") - return key -} - -type GCSManager struct { - Config *GCSConfig - client *storage.Client - timeout time.Duration -} - -func (manager *GCSManager) SetTimeout(timeout time.Duration) { - manager.timeout = timeout -} - -func (manager *GCSManager) getTimeout() time.Duration { - if manager.timeout > 0 { - return manager.timeout - } - - return getBatchRouterTimeoutConfig("GCS") -} - -func GetGCSConfig(config map[string]interface{}) *GCSConfig { - var bucketName, prefix, credentials string - var endPoint *string - var forcePathStyle, disableSSL *bool - - if config["bucketName"] != nil { - tmp, ok := config["bucketName"].(string) - if ok { - bucketName = tmp - } - } - if config["prefix"] != nil { - tmp, ok := config["prefix"].(string) - if ok { - prefix = tmp - } - } - if config["credentials"] != nil { - tmp, ok := config["credentials"].(string) - if ok { - credentials = tmp - } - } - if config["endPoint"] != nil { - tmp, ok := config["endPoint"].(string) - if ok { - endPoint = &tmp - } - } - if config["forcePathStyle"] != nil { - tmp, ok := config["forcePathStyle"].(bool) - if ok { - forcePathStyle = &tmp - } - } - if config["disableSSL"] != nil { - tmp, ok := config["disableSSL"].(bool) - if ok { - disableSSL = &tmp - } - } - return &GCSConfig{ - Bucket: bucketName, - Prefix: prefix, - Credentials: credentials, - EndPoint: endPoint, - ForcePathStyle: forcePathStyle, - DisableSSL: disableSSL, - } -} - -type GCSConfig struct { - Bucket string - Prefix string - Credentials string - EndPoint *string - ForcePathStyle *bool - DisableSSL *bool - Iterator *storage.ObjectIterator -} - -func (*GCSManager) DeleteObjects(_ context.Context, _ []string) (err error) { - return -} - -func (manager *GCSManager) GetConfiguredPrefix() string { - return manager.Config.Prefix -} diff --git a/services/filemanager/goldenDirectory/original.json.gz b/services/filemanager/goldenDirectory/original.json.gz deleted file mode 100644 index 90f590d5294..00000000000 Binary files a/services/filemanager/goldenDirectory/original.json.gz and /dev/null differ diff --git a/services/filemanager/goldenDirectory/original100.json.gz b/services/filemanager/goldenDirectory/original100.json.gz deleted file mode 100644 index f65b0496323..00000000000 Binary files a/services/filemanager/goldenDirectory/original100.json.gz and /dev/null differ diff --git a/services/filemanager/miniomanager.go b/services/filemanager/miniomanager.go deleted file mode 100644 index c2cc5734e47..00000000000 --- a/services/filemanager/miniomanager.go +++ /dev/null @@ -1,243 +0,0 @@ -package filemanager - -import ( - "context" - "errors" - "fmt" - "net/url" - "os" - "path" - "strings" - "time" - - "github.com/minio/minio-go/v7" - "github.com/minio/minio-go/v7/pkg/credentials" -) - -func (manager *MinioManager) ObjectUrl(objectName string) string { - protocol := "http" - if manager.Config.UseSSL { - protocol = "https" - } - return protocol + "://" + manager.Config.EndPoint + "/" + manager.Config.Bucket + "/" + objectName -} - -func (manager *MinioManager) Upload(ctx context.Context, file *os.File, prefixes ...string) (UploadOutput, error) { - if manager.Config.Bucket == "" { - return UploadOutput{}, errors.New("no storage bucket configured to uploader") - } - - minioClient, err := manager.getClient() - if err != nil { - return UploadOutput{}, err - } - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - exists, err := minioClient.BucketExists(ctx, manager.Config.Bucket) - if err != nil { - return UploadOutput{}, fmt.Errorf("checking bucket: %w", err) - } - if !exists { - if err = minioClient.MakeBucket(ctx, manager.Config.Bucket, minio.MakeBucketOptions{Region: "us-east-1"}); err != nil { - return UploadOutput{}, fmt.Errorf("creating bucket: %w", err) - } - } - - fileName := path.Join(manager.Config.Prefix, path.Join(prefixes...), path.Base(file.Name())) - - _, err = minioClient.FPutObject(ctx, manager.Config.Bucket, fileName, file.Name(), minio.PutObjectOptions{}) - if err != nil { - return UploadOutput{}, err - } - - return UploadOutput{Location: manager.ObjectUrl(fileName), ObjectName: fileName}, nil -} - -func (manager *MinioManager) Download(ctx context.Context, file *os.File, key string) error { - minioClient, err := manager.getClient() - if err != nil { - return err - } - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - err = minioClient.FGetObject(ctx, manager.Config.Bucket, key, file.Name(), minio.GetObjectOptions{}) - return err -} - -/* -GetObjectNameFromLocation gets the object name/key name from the object location url - - https://minio-endpoint/bucket-name/key1 - >> key1 - http://minio-endpoint/bucket-name/key2 - >> key2 -*/ -func (manager *MinioManager) GetObjectNameFromLocation(location string) (string, error) { - var baseURL string - if manager.Config.UseSSL { - baseURL += "https://" - } else { - baseURL += "http://" - } - baseURL += manager.Config.EndPoint + "/" - baseURL += manager.Config.Bucket + "/" - return location[len(baseURL):], nil -} - -func (manager *MinioManager) GetDownloadKeyFromFileLocation(location string) string { - parsedUrl, err := url.Parse(location) - if err != nil { - fmt.Println("error while parsing location url: ", err) - } - trimedUrl := strings.TrimLeft(parsedUrl.Path, "/") - return strings.TrimPrefix(trimedUrl, fmt.Sprintf(`%s/`, manager.Config.Bucket)) -} - -func (manager *MinioManager) DeleteObjects(ctx context.Context, keys []string) (err error) { - objectChannel := make(chan minio.ObjectInfo, len(keys)) - for _, key := range keys { - objectChannel <- minio.ObjectInfo{Key: key} - } - close(objectChannel) - - minioClient, err := manager.getClient() - if err != nil { - return err - } - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - tmp := <-minioClient.RemoveObjects(ctx, manager.Config.Bucket, objectChannel, minio.RemoveObjectsOptions{}) - return tmp.Err -} - -func (manager *MinioManager) ListFilesWithPrefix(_ context.Context, startAfter, prefix string, maxItems int64) (fileObjects []*FileObject, err error) { - if !manager.Config.IsTruncated { - pkgLogger.Infof("Manager is truncated: %v so returning here", manager.Config.IsTruncated) - return - } - fileObjects = make([]*FileObject, 0) - - // Created minio core - core, err := minio.NewCore(manager.Config.EndPoint, &minio.Options{ - Creds: credentials.NewStaticV4(manager.Config.AccessKeyID, manager.Config.SecretAccessKey, ""), - Secure: manager.Config.UseSSL, - }) - if err != nil { - return - } - - // List the Objects in the bucket - result, err := core.ListObjectsV2(manager.Config.Bucket, prefix, startAfter, manager.Config.ContinuationToken, "", int(maxItems)) - if err != nil { - return - } - - for idx := range result.Contents { - fileObjects = append(fileObjects, &FileObject{result.Contents[idx].Key, result.Contents[idx].LastModified}) - } - manager.Config.IsTruncated = result.IsTruncated - manager.Config.ContinuationToken = result.NextContinuationToken - return -} - -func (manager *MinioManager) getClient() (*minio.Client, error) { - var err error - if manager.client == nil { - manager.client, err = minio.New(manager.Config.EndPoint, &minio.Options{ - Creds: credentials.NewStaticV4(manager.Config.AccessKeyID, manager.Config.SecretAccessKey, ""), - Secure: manager.Config.UseSSL, - }) - if err != nil { - return &minio.Client{}, err - } - } - return manager.client, nil -} - -func (manager *MinioManager) GetConfiguredPrefix() string { - return manager.Config.Prefix -} - -func GetMinioConfig(config map[string]interface{}) *MinioConfig { - var bucketName, prefix, endPoint, accessKeyID, secretAccessKey, continuationToken string - var useSSL, ok bool - if config["bucketName"] != nil { - tmp, ok := config["bucketName"].(string) - if ok { - bucketName = tmp - } - } - if config["prefix"] != nil { - tmp, ok := config["prefix"].(string) - if ok { - prefix = tmp - } - } - if config["endPoint"] != nil { - tmp, ok := config["endPoint"].(string) - if ok { - endPoint = tmp - } - } - if config["accessKeyID"] != nil { - tmp, ok := config["accessKeyID"].(string) - if ok { - accessKeyID = tmp - } - } - if config["secretAccessKey"] != nil { - tmp, ok := config["secretAccessKey"].(string) - if ok { - secretAccessKey = tmp - } - } - if config["useSSL"] != nil { - if useSSL, ok = config["useSSL"].(bool); !ok { - useSSL = false - } - } - - return &MinioConfig{ - Bucket: bucketName, - Prefix: prefix, - EndPoint: endPoint, - AccessKeyID: accessKeyID, - SecretAccessKey: secretAccessKey, - UseSSL: useSSL, - ContinuationToken: continuationToken, - IsTruncated: true, - } -} - -type MinioManager struct { - Config *MinioConfig - client *minio.Client - timeout time.Duration -} - -func (manager *MinioManager) SetTimeout(timeout time.Duration) { - manager.timeout = timeout -} - -func (manager *MinioManager) getTimeout() time.Duration { - if manager.timeout > 0 { - return manager.timeout - } - - return getBatchRouterTimeoutConfig("MINIO") -} - -type MinioConfig struct { - Bucket string - Prefix string - EndPoint string - AccessKeyID string - SecretAccessKey string - UseSSL bool - ContinuationToken string - IsTruncated bool -} diff --git a/services/filemanager/s3manager.go b/services/filemanager/s3manager.go deleted file mode 100644 index f6616c6842e..00000000000 --- a/services/filemanager/s3manager.go +++ /dev/null @@ -1,290 +0,0 @@ -package filemanager - -import ( - "context" - "errors" - "fmt" - "net/url" - "os" - "path" - "strings" - "time" - - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/aws/awserr" - "github.com/aws/aws-sdk-go/aws/session" - "github.com/aws/aws-sdk-go/service/s3" - awsS3Manager "github.com/aws/aws-sdk-go/service/s3/s3manager" - "github.com/mitchellh/mapstructure" - "github.com/samber/lo" - - appConfig "github.com/rudderlabs/rudder-go-kit/config" - "github.com/rudderlabs/rudder-server/utils/awsutils" -) - -// Upload passed in file to s3 -func (manager *S3Manager) Upload(ctx context.Context, file *os.File, prefixes ...string) (UploadOutput, error) { - fileName := path.Join(manager.Config.Prefix, path.Join(prefixes...), path.Base(file.Name())) - - uploadInput := &awsS3Manager.UploadInput{ - ACL: aws.String("bucket-owner-full-control"), - Bucket: aws.String(manager.Config.Bucket), - Key: aws.String(fileName), - Body: file, - } - if manager.Config.EnableSSE { - uploadInput.ServerSideEncryption = aws.String("AES256") - } - - uploadSession, err := manager.getSession(ctx) - if err != nil { - return UploadOutput{}, fmt.Errorf("error starting S3 session: %w", err) - } - s3manager := awsS3Manager.NewUploader(uploadSession) - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - output, err := s3manager.UploadWithContext(ctx, uploadInput) - if err != nil { - if awsError, ok := err.(awserr.Error); ok && awsError.Code() == "MissingRegion" { - err = fmt.Errorf(fmt.Sprintf(`Bucket '%s' not found.`, manager.Config.Bucket)) - } - return UploadOutput{}, err - } - - return UploadOutput{Location: output.Location, ObjectName: fileName}, err -} - -func (manager *S3Manager) Download(ctx context.Context, output *os.File, key string) error { - sess, err := manager.getSession(ctx) - if err != nil { - return fmt.Errorf("error starting S3 session: %w", err) - } - - downloader := awsS3Manager.NewDownloader(sess) - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - _, err = downloader.DownloadWithContext(ctx, output, - &s3.GetObjectInput{ - Bucket: aws.String(manager.Config.Bucket), - Key: aws.String(key), - }) - if err != nil { - if aerr, ok := err.(awserr.Error); ok && aerr.Code() == ErrKeyNotFound.Error() { - return ErrKeyNotFound - } - return err - } - return nil -} - -/* -GetObjectNameFromLocation gets the object name/key name from the object location url - - https://bucket-name.s3.amazonaws.com/key - >> key -*/ -func (manager *S3Manager) GetObjectNameFromLocation(location string) (string, error) { - parsedUrl, err := url.Parse(location) - if err != nil { - return "", err - } - trimedUrl := strings.TrimLeft(parsedUrl.Path, "/") - if (manager.Config.S3ForcePathStyle != nil && *manager.Config.S3ForcePathStyle) || (!strings.Contains(parsedUrl.Host, manager.Config.Bucket)) { - return strings.TrimPrefix(trimedUrl, fmt.Sprintf(`%s/`, manager.Config.Bucket)), nil - } - return trimedUrl, nil -} - -func (manager *S3Manager) GetDownloadKeyFromFileLocation(location string) string { - parsedURL, err := url.Parse(location) - if err != nil { - fmt.Println("error while parsing location url: ", err) - } - trimmedURL := strings.TrimLeft(parsedURL.Path, "/") - if (manager.Config.S3ForcePathStyle != nil && *manager.Config.S3ForcePathStyle) || (!strings.Contains(parsedURL.Host, manager.Config.Bucket)) { - return strings.TrimPrefix(trimmedURL, fmt.Sprintf(`%s/`, manager.Config.Bucket)) - } - return trimmedURL -} - -func (manager *S3Manager) DeleteObjects(ctx context.Context, keys []string) (err error) { - sess, err := manager.getSession(ctx) - if err != nil { - return fmt.Errorf("error starting S3 session: %w", err) - } - - var objects []*s3.ObjectIdentifier - for _, key := range keys { - objects = append(objects, &s3.ObjectIdentifier{Key: aws.String(key)}) - } - - svc := s3.New(sess) - - batchSize := 1000 // max accepted by DeleteObjects API - chunks := lo.Chunk(objects, batchSize) - for _, chunk := range chunks { - input := &s3.DeleteObjectsInput{ - Bucket: aws.String(manager.Config.Bucket), - Delete: &s3.Delete{ - Objects: chunk, - }, - } - - _ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - _, err := svc.DeleteObjectsWithContext(_ctx, input) - if err != nil { - if aerr, ok := err.(awserr.Error); ok { - pkgLogger.Errorf(`Error while deleting S3 objects: %v, error code: %v`, aerr.Error(), aerr.Code()) - } else { - // Print the error, cast err to awserr.Error to get the Code and - // Message from an error. - pkgLogger.Errorf(`Error while deleting S3 objects: %v`, aerr.Error()) - } - return err - } - } - return nil -} - -func (manager *S3Manager) getSession(ctx context.Context) (*session.Session, error) { - if manager.session != nil { - return manager.session, nil - } - - if manager.Config.Bucket == "" { - return nil, errors.New("no storage bucket configured to downloader") - } - if !manager.Config.UseGlue || manager.Config.Region == nil { - getRegionSession, err := session.NewSession() - if err != nil { - return nil, err - } - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - region, err := awsS3Manager.GetBucketRegion(ctx, getRegionSession, manager.Config.Bucket, manager.Config.RegionHint) - if err != nil { - pkgLogger.Errorf("Failed to fetch AWS region for bucket %s. Error %v", manager.Config.Bucket, err) - /// Failed to Get Region probably due to VPC restrictions, Will proceed to try with AccessKeyID and AccessKey - } - manager.Config.Region = aws.String(region) - manager.SessionConfig.Region = region - } - - var err error - manager.session, err = awsutils.CreateSession(manager.SessionConfig) - if err != nil { - return nil, err - } - return manager.session, err -} - -// IMPT NOTE: `ListFilesWithPrefix` support Continuation Token. So, if you want same set of files (says 1st 1000 again) -// then create a new S3Manager & not use the existing one. Since, using the existing one will by default return next 1000 files. -func (manager *S3Manager) ListFilesWithPrefix(ctx context.Context, startAfter, prefix string, maxItems int64) (fileObjects []*FileObject, err error) { - if !manager.Config.IsTruncated { - pkgLogger.Infof("Manager is truncated: %v so returning here", manager.Config.IsTruncated) - return - } - fileObjects = make([]*FileObject, 0) - - sess, err := manager.getSession(ctx) - if err != nil { - return []*FileObject{}, fmt.Errorf("error starting S3 session: %w", err) - } - // Create S3 service client - svc := s3.New(sess) - listObjectsV2Input := s3.ListObjectsV2Input{ - Bucket: aws.String(manager.Config.Bucket), - Prefix: aws.String(prefix), - MaxKeys: &maxItems, - // Delimiter: aws.String("/"), - } - // startAfter is to resume a paused task. - if startAfter != "" { - listObjectsV2Input.StartAfter = aws.String(startAfter) - } - - if manager.Config.ContinuationToken != nil { - listObjectsV2Input.ContinuationToken = manager.Config.ContinuationToken - } - - ctx, cancel := context.WithTimeout(ctx, manager.getTimeout()) - defer cancel() - - // Get the list of items - resp, err := svc.ListObjectsV2WithContext(ctx, &listObjectsV2Input) - if err != nil { - pkgLogger.Errorf("Error while listing S3 objects: %v", err) - return - } - if resp.IsTruncated != nil { - manager.Config.IsTruncated = *resp.IsTruncated - } - manager.Config.ContinuationToken = resp.NextContinuationToken - for _, item := range resp.Contents { - fileObjects = append(fileObjects, &FileObject{*item.Key, *item.LastModified}) - } - return -} - -func (manager *S3Manager) GetConfiguredPrefix() string { - return manager.Config.Prefix -} - -type S3Manager struct { - Config *S3Config - SessionConfig *awsutils.SessionConfig - session *session.Session - timeout time.Duration -} - -func (manager *S3Manager) SetTimeout(timeout time.Duration) { - manager.timeout = timeout -} - -func (manager *S3Manager) getTimeout() time.Duration { - if manager.timeout > 0 { - return manager.timeout - } - - return getBatchRouterTimeoutConfig("S3") -} - -func NewS3Manager(config map[string]interface{}) (*S3Manager, error) { - var s3Config S3Config - if err := mapstructure.Decode(config, &s3Config); err != nil { - return nil, err - } - regionHint := appConfig.GetString("AWS_S3_REGION_HINT", "us-east-1") - s3Config.RegionHint = regionHint - s3Config.IsTruncated = true - sessionConfig, err := awsutils.NewSimpleSessionConfig(config, s3.ServiceName) - if err != nil { - return nil, err - } - return &S3Manager{ - Config: &s3Config, - SessionConfig: sessionConfig, - }, nil -} - -type S3Config struct { - Bucket string `mapstructure:"bucketName"` - Prefix string `mapstructure:"Prefix"` - Region *string `mapstructure:"region"` - Endpoint *string `mapstructure:"endpoint"` - S3ForcePathStyle *bool `mapstructure:"s3ForcePathStyle"` - DisableSSL *bool `mapstructure:"disableSSL"` - EnableSSE bool `mapstructure:"enableSSE"` - RegionHint string `mapstructure:"regionHint"` - ContinuationToken *string `mapstructure:"continuationToken"` - IsTruncated bool `mapstructure:"isTruncated"` - UseGlue bool `mapstructure:"useGlue"` -} diff --git a/services/filemanager/s3manager_test.go b/services/filemanager/s3manager_test.go deleted file mode 100644 index da62ffaebae..00000000000 --- a/services/filemanager/s3manager_test.go +++ /dev/null @@ -1,126 +0,0 @@ -package filemanager - -import ( - "context" - "testing" - - "github.com/aws/aws-sdk-go/aws" - "github.com/stretchr/testify/assert" - - "github.com/rudderlabs/rudder-server/utils/awsutils" -) - -func TestNewS3ManagerWithNil(t *testing.T) { - s3Manager, err := NewS3Manager(nil) - assert.EqualError(t, err, "config should not be nil") - assert.Nil(t, s3Manager) -} - -func TestNewS3ManagerWithAccessKeys(t *testing.T) { - s3Manager, err := NewS3Manager(map[string]interface{}{ - "bucketName": "someBucket", - "region": "someRegion", - "accessKeyID": "someAccessKeyId", - "accessKey": "someSecretAccessKey", - }) - assert.Nil(t, err) - assert.NotNil(t, s3Manager) - assert.Equal(t, "someBucket", s3Manager.Config.Bucket) - assert.Equal(t, aws.String("someRegion"), s3Manager.Config.Region) - assert.Equal(t, "someAccessKeyId", s3Manager.SessionConfig.AccessKeyID) - assert.Equal(t, "someSecretAccessKey", s3Manager.SessionConfig.AccessKey) - assert.Equal(t, false, s3Manager.SessionConfig.RoleBasedAuth) -} - -func TestNewS3ManagerWithRole(t *testing.T) { - s3Manager, err := NewS3Manager(map[string]interface{}{ - "bucketName": "someBucket", - "region": "someRegion", - "iamRoleARN": "someIAMRole", - "externalID": "someExternalID", - }) - assert.Nil(t, err) - assert.NotNil(t, s3Manager) - assert.Equal(t, "someBucket", s3Manager.Config.Bucket) - assert.Equal(t, aws.String("someRegion"), s3Manager.Config.Region) - assert.Equal(t, "someIAMRole", s3Manager.SessionConfig.IAMRoleARN) - assert.Equal(t, "someExternalID", s3Manager.SessionConfig.ExternalID) - assert.Equal(t, true, s3Manager.SessionConfig.RoleBasedAuth) -} - -func TestNewS3ManagerWithBothAccessKeysAndRole(t *testing.T) { - s3Manager, err := NewS3Manager(map[string]interface{}{ - "bucketName": "someBucket", - "region": "someRegion", - "iamRoleARN": "someIAMRole", - "externalID": "someExternalID", - "accessKeyID": "someAccessKeyId", - "accessKey": "someSecretAccessKey", - }) - assert.Nil(t, err) - assert.NotNil(t, s3Manager) - assert.Equal(t, "someBucket", s3Manager.Config.Bucket) - assert.Equal(t, aws.String("someRegion"), s3Manager.Config.Region) - assert.Equal(t, "someAccessKeyId", s3Manager.SessionConfig.AccessKeyID) - assert.Equal(t, "someSecretAccessKey", s3Manager.SessionConfig.AccessKey) - assert.Equal(t, "someIAMRole", s3Manager.SessionConfig.IAMRoleARN) - assert.Equal(t, "someExternalID", s3Manager.SessionConfig.ExternalID) - assert.Equal(t, true, s3Manager.SessionConfig.RoleBasedAuth) -} - -func TestNewS3ManagerWithBothAccessKeysAndRoleButRoleBasedAuthFalse(t *testing.T) { - s3Manager, err := NewS3Manager(map[string]interface{}{ - "bucketName": "someBucket", - "region": "someRegion", - "iamRoleARN": "someIAMRole", - "externalID": "someExternalID", - "accessKeyID": "someAccessKeyId", - "accessKey": "someSecretAccessKey", - "roleBasedAuth": false, - }) - assert.Nil(t, err) - assert.NotNil(t, s3Manager) - assert.Equal(t, "someBucket", s3Manager.Config.Bucket) - assert.Equal(t, aws.String("someRegion"), s3Manager.Config.Region) - assert.Equal(t, "someAccessKeyId", s3Manager.SessionConfig.AccessKeyID) - assert.Equal(t, "someSecretAccessKey", s3Manager.SessionConfig.AccessKey) - assert.Equal(t, "someIAMRole", s3Manager.SessionConfig.IAMRoleARN) - assert.Equal(t, "someExternalID", s3Manager.SessionConfig.ExternalID) - assert.Equal(t, false, s3Manager.SessionConfig.RoleBasedAuth) -} - -func TestGetSessionWithAccessKeys(t *testing.T) { - s3Manager := S3Manager{ - Config: &S3Config{ - Bucket: "someBucket", - Region: aws.String("someRegion"), - }, - SessionConfig: &awsutils.SessionConfig{ - AccessKeyID: "someAccessKeyId", - AccessKey: "someSecretAccessKey", - Region: "someRegion", - }, - } - awsSession, err := s3Manager.getSession(context.TODO()) - assert.Nil(t, err) - assert.NotNil(t, awsSession) - assert.NotNil(t, s3Manager.session) -} - -func TestGetSessionWithIAMRole(t *testing.T) { - s3Manager := S3Manager{ - Config: &S3Config{ - Bucket: "someBucket", - Region: aws.String("someRegion"), - }, - SessionConfig: &awsutils.SessionConfig{ - IAMRoleARN: "someIAMRole", - ExternalID: "someExternalID", - Region: "someRegion", - }, - } - awsSession, err := s3Manager.getSession(context.TODO()) - assert.Nil(t, err) - assert.NotNil(t, awsSession) - assert.NotNil(t, s3Manager.session) -} diff --git a/services/fileuploader/fileuploader.go b/services/fileuploader/fileuploader.go index e2bfd73e0bb..7ddf0c7eeca 100644 --- a/services/fileuploader/fileuploader.go +++ b/services/fileuploader/fileuploader.go @@ -6,8 +6,9 @@ import ( "sync" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" backendconfig "github.com/rudderlabs/rudder-server/backend-config" - "github.com/rudderlabs/rudder-server/services/filemanager" + "github.com/rudderlabs/rudder-server/utils/filemanagerutil" ) type StorageSettings struct { @@ -76,7 +77,7 @@ func (p *provider) GetFileManager(workspaceID string) (filemanager.FileManager, if err != nil { return nil, err } - return filemanager.DefaultFileManagerFactory.New(&filemanager.SettingsT{ + return filemanager.New(&filemanager.Settings{ Provider: settings.Bucket.Type, Config: settings.Bucket.Config, }) @@ -142,7 +143,7 @@ type defaultProvider struct{} func (*defaultProvider) GetFileManager(_ string) (filemanager.FileManager, error) { defaultConfig := getDefaultBucket(context.Background(), config.GetString("JOBS_BACKUP_STORAGE_PROVIDER", "S3")) - return filemanager.DefaultFileManagerFactory.New(&filemanager.SettingsT{ + return filemanager.New(&filemanager.Settings{ Provider: defaultConfig.Type, Config: defaultConfig.Config, }) @@ -161,7 +162,7 @@ func (*defaultProvider) GetStoragePreferences(_ string) (backendconfig.StoragePr func getDefaultBucket(ctx context.Context, provider string) backendconfig.StorageBucket { return backendconfig.StorageBucket{ Type: provider, - Config: filemanager.GetProviderConfigFromEnv(ctx, provider), + Config: filemanager.GetProviderConfigFromEnv(filemanagerutil.ProviderConfigOpts(ctx, provider, config.Default)), } } diff --git a/services/fileuploader/fileuploader_test.go b/services/fileuploader/fileuploader_test.go index 4175911537a..47a639157ae 100644 --- a/services/fileuploader/fileuploader_test.go +++ b/services/fileuploader/fileuploader_test.go @@ -118,11 +118,11 @@ func TestFileUploaderUpdatingWithConfigBackend(t *testing.T) { fm1, err := fileUploaderProvider.GetFileManager("testWorkspaceId-1") Expect(err).To(BeNil()) - Expect(fm1.GetConfiguredPrefix()).To(Equal("fullStoragePrefixWithNoTTL")) + Expect(fm1.Prefix()).To(Equal("fullStoragePrefixWithNoTTL")) fm3, err := fileUploaderProvider.GetFileManager("testWorkspaceId-3") Expect(err).To(BeNil()) - Expect(fm3.GetConfiguredPrefix()).To(Equal("defaultPrefixWithStorageTTL")) + Expect(fm3.Prefix()).To(Equal("defaultPrefixWithStorageTTL")) fm0, err := fileUploaderProvider.GetFileManager("testWorkspaceId-0") Expect(err).To(Equal(fmt.Errorf(noStorageForWorkspaceErrorString, "testWorkspaceId-0"))) diff --git a/services/pgnotifier/pgnotifier.go b/services/pgnotifier/pgnotifier.go index d7fbabb2504..609b9f1b8a9 100644 --- a/services/pgnotifier/pgnotifier.go +++ b/services/pgnotifier/pgnotifier.go @@ -18,6 +18,7 @@ import ( "github.com/rudderlabs/rudder-server/rruntime" migrator "github.com/rudderlabs/rudder-server/services/sql-migrator" "github.com/rudderlabs/rudder-server/utils/misc" + sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" whUtils "github.com/rudderlabs/rudder-server/warehouse/utils" ) @@ -58,7 +59,7 @@ func Init() { type PGNotifier struct { URI string - db *sql.DB + db *sqlmiddleware.DB workspaceIdentifier string } @@ -141,7 +142,10 @@ func New(workspaceIdentifier, fallbackConnectionInfo string) (notifier PGNotifie pgNotifierClaimUpdateFailed = whUtils.NewCounterStat("pgnotifier_claim_update_failed", pgNotifierModuleTag) notifier = PGNotifier{ - db: dbHandle, + db: sqlmiddleware.New( + dbHandle, + sqlmiddleware.WithQueryTimeout(config.GetDuration("Warehouse.pgNotifierQueryTimeout", 5, time.Minute)), + ), URI: connectionInfo, workspaceIdentifier: workspaceIdentifier, } @@ -150,7 +154,7 @@ func New(workspaceIdentifier, fallbackConnectionInfo string) (notifier PGNotifie } func (notifier *PGNotifier) GetDBHandle() *sql.DB { - return notifier.db + return notifier.db.DB } func (notifier *PGNotifier) ClearJobs(ctx context.Context) (err error) { @@ -626,7 +630,7 @@ func (notifier *PGNotifier) setupQueue() (err error) { pkgLogger.Infof("PgNotifier: Creating Job Queue Tables ") m := &migrator.Migrator{ - Handle: notifier.db, + Handle: notifier.GetDBHandle(), MigrationsTable: "pg_notifier_queue_migrations", ShouldForceSetLowerVersion: config.GetBool("SQLMigrator.forceSetLowerVersion", true), } @@ -648,7 +652,7 @@ func GetCurrentSQLTimestamp() string { // which were left behind by dead workers in executing state func (notifier *PGNotifier) RunMaintenanceWorker(ctx context.Context) error { maintenanceWorkerLockID := murmur3.Sum64([]byte(queueName)) - maintenanceWorkerLock, err := pglock.NewLock(ctx, int64(maintenanceWorkerLockID), notifier.db) + maintenanceWorkerLock, err := pglock.NewLock(ctx, int64(maintenanceWorkerLockID), notifier.GetDBHandle()) if err != nil { return err } diff --git a/services/streammanager/bqstream/bqstreammanager.go b/services/streammanager/bqstream/bqstreammanager.go index 2d8f3e21536..26a13e8afdd 100644 --- a/services/streammanager/bqstream/bqstreammanager.go +++ b/services/streammanager/bqstream/bqstreammanager.go @@ -15,10 +15,10 @@ import ( gbq "google.golang.org/api/bigquery/v2" "google.golang.org/api/option" + "github.com/rudderlabs/rudder-go-kit/googleutil" "github.com/rudderlabs/rudder-go-kit/logger" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/services/streammanager/common" - "github.com/rudderlabs/rudder-server/utils/googleutils" ) type Config struct { @@ -88,9 +88,9 @@ func NewProducer(destination *backendconfig.DestinationT, o common.Opts) (*BQStr gbq.BigqueryInsertdataScope, }...), } - if !googleutils.ShouldSkipCredentialsInit(config.Credentials) { + if !googleutil.ShouldSkipCredentialsInit(config.Credentials) { confCreds := []byte(config.Credentials) - if err = googleutils.CompatibleGoogleCredentialsJSON(confCreds); err != nil { + if err = googleutil.CompatibleGoogleCredentialsJSON(confCreds); err != nil { return nil, createErr(err, "incompatible credentials") } opts = append(opts, option.WithCredentialsJSON(confCreds)) diff --git a/services/streammanager/eventbridge/eventbridgemanager.go b/services/streammanager/eventbridge/eventbridgemanager.go index 455965e6fd3..a61d4dd4069 100644 --- a/services/streammanager/eventbridge/eventbridgemanager.go +++ b/services/streammanager/eventbridge/eventbridgemanager.go @@ -9,6 +9,7 @@ import ( "github.com/aws/aws-sdk-go/service/eventbridge" + "github.com/rudderlabs/rudder-go-kit/awsutil" "github.com/rudderlabs/rudder-go-kit/logger" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/services/streammanager/common" @@ -35,7 +36,7 @@ func NewProducer(destination *backendconfig.DestinationT, o common.Opts) (*Event if err != nil { return nil, err } - awsSession, err := awsutils.CreateSession(sessionConfig) + awsSession, err := awsutil.CreateSession(sessionConfig) if err != nil { return nil, err } diff --git a/services/streammanager/firehose/firehosemanager.go b/services/streammanager/firehose/firehosemanager.go index be3ace7e8ba..d7957b4ce74 100644 --- a/services/streammanager/firehose/firehosemanager.go +++ b/services/streammanager/firehose/firehosemanager.go @@ -10,6 +10,7 @@ import ( "github.com/aws/aws-sdk-go/service/firehose" "github.com/tidwall/gjson" + "github.com/rudderlabs/rudder-go-kit/awsutil" "github.com/rudderlabs/rudder-go-kit/logger" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/services/streammanager/common" @@ -36,7 +37,7 @@ func NewProducer(destination *backendconfig.DestinationT, o common.Opts) (*FireH if err != nil { return nil, err } - awsSession, err := awsutils.CreateSession(sessionConfig) + awsSession, err := awsutil.CreateSession(sessionConfig) if err != nil { return nil, err } diff --git a/services/streammanager/googlepubsub/googlepubsubmanager.go b/services/streammanager/googlepubsub/googlepubsubmanager.go index 245bd1deb76..35f1fea5619 100644 --- a/services/streammanager/googlepubsub/googlepubsubmanager.go +++ b/services/streammanager/googlepubsub/googlepubsubmanager.go @@ -14,10 +14,10 @@ import ( "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/status" + "github.com/rudderlabs/rudder-go-kit/googleutil" "github.com/rudderlabs/rudder-go-kit/logger" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/services/streammanager/common" - "github.com/rudderlabs/rudder-server/utils/googleutils" ) type Config struct { @@ -70,9 +70,9 @@ func NewProducer(destination *backendconfig.DestinationT, o common.Opts) (*Googl option.WithEndpoint(config.TestConfig.Endpoint), } options = append(options, opts...) - } else if !googleutils.ShouldSkipCredentialsInit(config.Credentials) { // Test configuration requires a custom endpoint + } else if !googleutil.ShouldSkipCredentialsInit(config.Credentials) { // Test configuration requires a custom endpoint credsBytes := []byte(config.Credentials) - if err = googleutils.CompatibleGoogleCredentialsJSON(credsBytes); err != nil { + if err = googleutil.CompatibleGoogleCredentialsJSON(credsBytes); err != nil { return nil, err } options = append(options, option.WithCredentialsJSON(credsBytes)) diff --git a/services/streammanager/kinesis/kinesismanager.go b/services/streammanager/kinesis/kinesismanager.go index 8e08ae0d05f..48c10bb71b4 100644 --- a/services/streammanager/kinesis/kinesismanager.go +++ b/services/streammanager/kinesis/kinesismanager.go @@ -10,6 +10,7 @@ import ( "github.com/aws/aws-sdk-go/service/kinesis" "github.com/tidwall/gjson" + "github.com/rudderlabs/rudder-go-kit/awsutil" "github.com/rudderlabs/rudder-go-kit/logger" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/services/streammanager/common" @@ -42,7 +43,7 @@ func NewProducer(destination *backendconfig.DestinationT, o common.Opts) (*Kines if err != nil { return nil, err } - awsSession, err := awsutils.CreateSession(sessionConfig) + awsSession, err := awsutil.CreateSession(sessionConfig) if err != nil { return nil, err } diff --git a/services/streammanager/lambda/lambdamanager.go b/services/streammanager/lambda/lambdamanager.go index b3af051b15e..17f957b0ad7 100644 --- a/services/streammanager/lambda/lambdamanager.go +++ b/services/streammanager/lambda/lambdamanager.go @@ -9,6 +9,7 @@ import ( jsoniter "github.com/json-iterator/go" "github.com/mitchellh/mapstructure" + "github.com/rudderlabs/rudder-go-kit/awsutil" "github.com/rudderlabs/rudder-go-kit/logger" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/services/streammanager/common" @@ -49,7 +50,7 @@ func NewProducer(destination *backendconfig.DestinationT, o common.Opts) (*Lambd if err != nil { return nil, err } - awsSession, err := awsutils.CreateSession(sessionConfig) + awsSession, err := awsutil.CreateSession(sessionConfig) if err != nil { return nil, err } diff --git a/services/streammanager/personalize/personalizemanager.go b/services/streammanager/personalize/personalizemanager.go index 185fb49bd67..984ea37e0c0 100644 --- a/services/streammanager/personalize/personalizemanager.go +++ b/services/streammanager/personalize/personalizemanager.go @@ -9,6 +9,7 @@ import ( "github.com/aws/aws-sdk-go/service/personalizeevents" "github.com/tidwall/gjson" + "github.com/rudderlabs/rudder-go-kit/awsutil" "github.com/rudderlabs/rudder-go-kit/logger" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/services/streammanager/common" @@ -36,7 +37,7 @@ func NewProducer(destination *backendconfig.DestinationT, o common.Opts) (*Perso if err != nil { return nil, err } - awsSession, err := awsutils.CreateSession(sessionConfig) + awsSession, err := awsutil.CreateSession(sessionConfig) if err != nil { return nil, err } diff --git a/sql/migrations/warehouse/000023_alter_wh_add_index_wh_load_files.up.sql b/sql/migrations/warehouse/000023_alter_wh_add_index_wh_load_files.up.sql new file mode 100644 index 00000000000..106b59215e5 --- /dev/null +++ b/sql/migrations/warehouse/000023_alter_wh_add_index_wh_load_files.up.sql @@ -0,0 +1 @@ +CREATE INDEX IF NOT EXISTS wh_load_files_staging_file_id_index ON wh_load_files (staging_file_id); diff --git a/utils/awsutils/session.go b/utils/awsutils/session.go index 8ad034a93cb..b980bfd72aa 100644 --- a/utils/awsutils/session.go +++ b/utils/awsutils/session.go @@ -2,132 +2,17 @@ package awsutils import ( "errors" - "fmt" - "net/http" - "strings" "time" - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/aws/credentials" - "github.com/aws/aws-sdk-go/aws/credentials/stscreds" - "github.com/aws/aws-sdk-go/aws/session" - "github.com/mitchellh/mapstructure" - + "github.com/rudderlabs/rudder-go-kit/awsutil" backendconfig "github.com/rudderlabs/rudder-server/backend-config" ) -// Some AWS destinations are using SecretAccessKey instead of accessKey -type SessionConfig struct { - Region string `mapstructure:"region"` - AccessKeyID string `mapstructure:"accessKeyID"` - AccessKey string `mapstructure:"accessKey"` - SecretAccessKey string `mapstructure:"secretAccessKey"` - RoleBasedAuth bool `mapstructure:"roleBasedAuth"` - IAMRoleARN string `mapstructure:"iamRoleARN"` - ExternalID string `mapstructure:"externalID"` - WorkspaceID string `mapstructure:"workspaceID"` - Endpoint *string `mapstructure:"endpoint"` - S3ForcePathStyle *bool `mapstructure:"s3ForcePathStyle"` - DisableSSL *bool `mapstructure:"disableSSL"` - Service string `mapstructure:"service"` - Timeout *time.Duration `mapstructure:"timeout"` -} - -func createRoleSessionName(serviceName string) string { - return fmt.Sprintf("rudderstack-aws-%s-access", strings.ToLower(strings.ReplaceAll(serviceName, " ", "-"))) -} - -func getHttpClient(config *SessionConfig) *http.Client { - var httpClient *http.Client - if config.Timeout != nil { - httpClient = &http.Client{ - Timeout: *config.Timeout, - } - } - return httpClient -} - -func createDefaultSession(config *SessionConfig) (*session.Session, error) { - return session.NewSession(&aws.Config{ - HTTPClient: getHttpClient(config), - Region: aws.String(config.Region), - }) -} - -func createCredentialsForRole(config *SessionConfig) (*credentials.Credentials, error) { - if config.ExternalID == "" { - return nil, errors.New("externalID is required for IAM role") - } - hostSession, err := createDefaultSession(config) - if err != nil { - return nil, err - } - return stscreds.NewCredentials(hostSession, config.IAMRoleARN, - func(p *stscreds.AssumeRoleProvider) { - p.ExternalID = aws.String(config.ExternalID) - p.RoleSessionName = createRoleSessionName(config.Service) - }), err -} - -func CreateSession(config *SessionConfig) (*session.Session, error) { - var ( - awsCredentials *credentials.Credentials - err error - ) - if config.RoleBasedAuth { - awsCredentials, err = createCredentialsForRole(config) - } else if config.AccessKey != "" && config.AccessKeyID != "" { - awsCredentials, err = credentials.NewStaticCredentials(config.AccessKeyID, config.AccessKey, ""), nil - } - if err != nil { - return nil, err - } - return session.NewSession(&aws.Config{ - HTTPClient: getHttpClient(config), - Region: aws.String(config.Region), - CredentialsChainVerboseErrors: aws.Bool(true), - Credentials: awsCredentials, - Endpoint: config.Endpoint, - S3ForcePathStyle: config.S3ForcePathStyle, - DisableSSL: config.DisableSSL, - }) -} - -func isRoleBasedAuthFieldExist(config map[string]interface{}) bool { - _, ok := config["roleBasedAuth"].(bool) - return ok -} - -func NewSimpleSessionConfig(config map[string]interface{}, serviceName string) (*SessionConfig, error) { - if config == nil { - return nil, errors.New("config should not be nil") - } - sessionConfig := SessionConfig{} - if err := mapstructure.Decode(config, &sessionConfig); err != nil { - return nil, fmt.Errorf("unable to populate session config using destinationConfig: %w", err) - } - - if !isRoleBasedAuthFieldExist(config) { - sessionConfig.RoleBasedAuth = sessionConfig.IAMRoleARN != "" - } - - if sessionConfig.IAMRoleARN == "" { - sessionConfig.RoleBasedAuth = false - } - - // Some AWS destinations are using SecretAccessKey instead of accessKey - if sessionConfig.SecretAccessKey != "" { - sessionConfig.AccessKey = sessionConfig.SecretAccessKey - } - sessionConfig.Service = serviceName - return &sessionConfig, nil -} - -func NewSimpleSessionConfigForDestination(destination *backendconfig.DestinationT, serviceName string) (*SessionConfig, error) { +func NewSimpleSessionConfigForDestination(destination *backendconfig.DestinationT, serviceName string) (*awsutil.SessionConfig, error) { if destination == nil { return nil, errors.New("destination should not be nil") } - sessionConfig, err := NewSimpleSessionConfig(destination.Config, serviceName) + sessionConfig, err := awsutil.NewSimpleSessionConfig(destination.Config, serviceName) if err != nil { return nil, err } @@ -142,7 +27,7 @@ func NewSimpleSessionConfigForDestination(destination *backendconfig.Destination return sessionConfig, nil } -func NewSessionConfigForDestination(destination *backendconfig.DestinationT, timeout time.Duration, serviceName string) (*SessionConfig, error) { +func NewSessionConfigForDestination(destination *backendconfig.DestinationT, timeout time.Duration, serviceName string) (*awsutil.SessionConfig, error) { sessionConfig, err := NewSimpleSessionConfigForDestination(destination, serviceName) if err != nil { return nil, err diff --git a/utils/awsutils/session_test.go b/utils/awsutils/session_test.go index 81da5510453..1a3ba882af2 100644 --- a/utils/awsutils/session_test.go +++ b/utils/awsutils/session_test.go @@ -6,6 +6,7 @@ import ( "github.com/stretchr/testify/assert" + "github.com/rudderlabs/rudder-go-kit/awsutil" backendconfig "github.com/rudderlabs/rudder-server/backend-config" ) @@ -39,7 +40,7 @@ func TestNewSessionConfigWithAccessKey(t *testing.T) { sessionConfig, err := NewSessionConfigForDestination(&destinationWithAccessKey, httpTimeout, serviceName) assert.Nil(t, err) assert.NotNil(t, sessionConfig) - assert.Equal(t, *sessionConfig, SessionConfig{ + assert.Equal(t, *sessionConfig, awsutil.SessionConfig{ Region: someRegion, AccessKeyID: someAccessKeyID, AccessKey: someAccessKey, @@ -61,7 +62,7 @@ func TestNewSessionConfigWithSecretAccessKey(t *testing.T) { sessionConfig, err := NewSessionConfigForDestination(&destinationWithSecretAccessKey, httpTimeout, serviceName) assert.Nil(t, err) assert.NotNil(t, sessionConfig) - assert.Equal(t, *sessionConfig, SessionConfig{ + assert.Equal(t, *sessionConfig, awsutil.SessionConfig{ Region: someRegion, AccessKeyID: someAccessKeyID, AccessKey: someSecretAccessKey, @@ -84,7 +85,7 @@ func TestNewSessionConfigWithRole(t *testing.T) { sessionConfig, err := NewSessionConfigForDestination(&destinationWithRole, httpTimeout, serviceName) assert.Nil(t, err) assert.NotNil(t, sessionConfig) - assert.Equal(t, *sessionConfig, SessionConfig{ + assert.Equal(t, *sessionConfig, awsutil.SessionConfig{ Region: someRegion, RoleBasedAuth: true, IAMRoleARN: someIAMRoleARN, @@ -106,7 +107,7 @@ func TestNewSessionConfigWithRole(t *testing.T) { sessionConfig, err := NewSessionConfigForDestination(&destinationWithRole, httpTimeout, serviceName) assert.Nil(t, err) assert.NotNil(t, sessionConfig) - assert.Equal(t, *sessionConfig, SessionConfig{ + assert.Equal(t, *sessionConfig, awsutil.SessionConfig{ Region: someRegion, RoleBasedAuth: false, IAMRoleARN: someIAMRoleARN, @@ -144,7 +145,7 @@ func TestNewSessionConfigWithRoleBasedAuth(t *testing.T) { sessionConfig, err := NewSessionConfigForDestination(&destinationWithRole, httpTimeout, serviceName) assert.Nil(t, err) assert.NotNil(t, sessionConfig) - assert.Equal(t, *sessionConfig, SessionConfig{ + assert.Equal(t, *sessionConfig, awsutil.SessionConfig{ Region: someRegion, RoleBasedAuth: true, IAMRoleARN: someIAMRoleARN, @@ -199,52 +200,52 @@ func TestNewSessionConfigWithBadDestination(t *testing.T) { func TestCreateSessionWithRole(t *testing.T) { t.Run("With RoleBasedAuth but without ExternalID", func(t *testing.T) { - sessionConfig := SessionConfig{ + sessionConfig := awsutil.SessionConfig{ Region: someRegion, RoleBasedAuth: true, IAMRoleARN: someIAMRoleARN, Timeout: &httpTimeout, } - awsSession, err := CreateSession(&sessionConfig) + awsSession, err := awsutil.CreateSession(&sessionConfig) assert.NotNil(t, err) assert.Nil(t, awsSession) assert.EqualError(t, err, "externalID is required for IAM role") }) t.Run("With RoleBasedAuth false and without ExternalID", func(t *testing.T) { - sessionConfig := SessionConfig{ + sessionConfig := awsutil.SessionConfig{ Region: someRegion, RoleBasedAuth: false, IAMRoleARN: someIAMRoleARN, Timeout: &httpTimeout, } - awsSession, err := CreateSession(&sessionConfig) + awsSession, err := awsutil.CreateSession(&sessionConfig) assert.Nil(t, err) assert.NotNil(t, awsSession) }) t.Run("With RoleBasedAuth true auth and ExternalID", func(t *testing.T) { - sessionConfig := SessionConfig{ + sessionConfig := awsutil.SessionConfig{ Region: someRegion, RoleBasedAuth: true, ExternalID: someWorkspaceID, IAMRoleARN: someIAMRoleARN, Timeout: &httpTimeout, } - awsSession, err := CreateSession(&sessionConfig) + awsSession, err := awsutil.CreateSession(&sessionConfig) assert.Nil(t, err) assert.NotNil(t, awsSession) }) } func TestCreateSessionWithAccessKeys(t *testing.T) { - sessionConfig := SessionConfig{ + sessionConfig := awsutil.SessionConfig{ Region: destinationWithAccessKey.Config["region"].(string), AccessKeyID: destinationWithAccessKey.Config["accessKeyID"].(string), AccessKey: destinationWithAccessKey.Config["accessKey"].(string), Timeout: &httpTimeout, } - awsSession, err := CreateSession(&sessionConfig) + awsSession, err := awsutil.CreateSession(&sessionConfig) assert.Nil(t, err) assert.NotNil(t, awsSession) assert.NotNil(t, awsSession.Config.Credentials) @@ -253,11 +254,11 @@ func TestCreateSessionWithAccessKeys(t *testing.T) { } func TestCreateSessionWithoutAccessKeysOrRole(t *testing.T) { - sessionConfig := SessionConfig{ + sessionConfig := awsutil.SessionConfig{ Region: "someRegion", Timeout: &httpTimeout, } - awsSession, err := CreateSession(&sessionConfig) + awsSession, err := awsutil.CreateSession(&sessionConfig) assert.Nil(t, err) assert.NotNil(t, awsSession) assert.NotNil(t, awsSession.Config.Credentials) @@ -266,10 +267,10 @@ func TestCreateSessionWithoutAccessKeysOrRole(t *testing.T) { } func TestCreateSessionWithoutTimeout(t *testing.T) { - sessionConfig := SessionConfig{ + sessionConfig := awsutil.SessionConfig{ Region: "someRegion", } - awsSession, err := CreateSession(&sessionConfig) + awsSession, err := awsutil.CreateSession(&sessionConfig) assert.Nil(t, err) assert.NotNil(t, awsSession) assert.NotNil(t, awsSession.Config.Credentials) diff --git a/utils/filemanagerutil/filemanagerutil.go b/utils/filemanagerutil/filemanagerutil.go new file mode 100644 index 00000000000..81bce4e8871 --- /dev/null +++ b/utils/filemanagerutil/filemanagerutil.go @@ -0,0 +1,29 @@ +package filemanagerutil + +import ( + "context" + + "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" + backendconfig "github.com/rudderlabs/rudder-server/backend-config" +) + +func GetProviderConfigForBackupsFromEnv(ctx context.Context, config *config.Config) map[string]interface{} { + return filemanager.GetProviderConfigFromEnv(ProviderConfigOpts(ctx, + config.GetString("JOBS_BACKUP_STORAGE_PROVIDER", "S3"), + config, + )) +} + +func ProviderConfigOpts(ctx context.Context, provider string, config *config.Config) filemanager.ProviderConfigOpts { + return filemanager.ProviderConfigOpts{ + Provider: provider, + Bucket: config.GetString("JOBS_BACKUP_BUCKET", "rudder-saas"), + Prefix: config.GetString("JOBS_BACKUP_PREFIX", ""), + Config: config, + ExternalIDSupplier: func() string { + backendconfig.DefaultBackendConfig.WaitForConfig(ctx) + return backendconfig.DefaultBackendConfig.Identity().ID() + }, + } +} diff --git a/utils/googleutils/googleutils.go b/utils/googleutils/googleutils.go deleted file mode 100644 index 721053ad035..00000000000 --- a/utils/googleutils/googleutils.go +++ /dev/null @@ -1,46 +0,0 @@ -package googleutils - -import ( - "fmt" - - "golang.org/x/oauth2/google" - - "github.com/rudderlabs/rudder-go-kit/config" -) - -const ( - EMPTY_CREDS = "{}" - WI_CONFIG_KEY = "workloadIdentity" -) - -func CompatibleGoogleCredentialsJSON(jsonKey []byte) error { - // google.ConfigFromJSON checks if jsonKey is a valid console client_credentials.json - // which we won't support so "err == nil" means it is bad for us. - if _, err := google.ConfigFromJSON(jsonKey); err == nil { - return fmt.Errorf("google developers console client_credentials.json file is not supported") - } - return nil -} - -func ShouldSkipCredentialsInit(credentials string) bool { - return isGKEEnabledWorkload() && isCredentialsStringEmpty(credentials) -} - -/* -IsCredentialsStringEmpty checks for empty credentials. -The credentials are deemed to be empty when either the field credentials is -sent as empty string or when the field is set with "{}" - -Note: This is true only for workload identity enabled rudderstack data-plane deployments -*/ -func isCredentialsStringEmpty(credentials string) bool { - return (credentials == "" || credentials == EMPTY_CREDS) -} - -/* -IsGKEEnabledWorkload checks against rudder-server configuration to find if workload identity for google destinations is enabled -*/ -func isGKEEnabledWorkload() bool { - workloadType := config.GetString(fmt.Sprintf("%s.type", WI_CONFIG_KEY), "") - return workloadType == "GKE" -} diff --git a/utils/misc/misc.go b/utils/misc/misc.go index 1eed86d009c..227cf6bba3f 100644 --- a/utils/misc/misc.go +++ b/utils/misc/misc.go @@ -29,8 +29,6 @@ import ( "time" "unicode" - jsoniter "github.com/json-iterator/go" - "github.com/araddon/dateparse" "github.com/bugsnag/bugsnag-go/v2" "github.com/cenkalti/backoff" @@ -50,7 +48,6 @@ var ( AppStartTime int64 errorStorePath string reservedFolderPaths []*RFP - jsonfast = jsoniter.ConfigCompatibleWithStandardLibrary notifyOnce sync.Once regexGwHa = regexp.MustCompile(`^.*-gw-ha-\d+-\w+-\w+$`) @@ -224,18 +221,6 @@ func GetRudderEventVal(key string, rudderEvent types.SingularEventT) (interface{ return rudderVal, true } -// ParseRudderEventBatch looks for the batch structure inside event -func ParseRudderEventBatch(eventPayload json.RawMessage) ([]types.SingularEventT, bool) { - var gatewayBatchEvent types.GatewayBatchRequestT - err := jsonfast.Unmarshal(eventPayload, &gatewayBatchEvent) - if err != nil { - pkgLogger.Debug("json parsing of event payload failed ", string(eventPayload)) - return nil, false - } - - return gatewayBatchEvent.Batch, true -} - // GetRudderID return the UserID from the object func GetRudderID(event types.SingularEventT) (string, bool) { userID, ok := GetRudderEventVal("rudderId", event) diff --git a/utils/types/types.go b/utils/types/types.go index 6f5789ee2c7..22b78ba829a 100644 --- a/utils/types/types.go +++ b/utils/types/types.go @@ -17,9 +17,12 @@ type SingularEventWithReceivedAt struct { ReceivedAt time.Time } -// GatewayBatchRequestT batch request structure -type GatewayBatchRequestT struct { - Batch []SingularEventT `json:"batch"` +// GatewayBatchRequest batch request structure +type GatewayBatchRequest struct { + Batch []SingularEventT `json:"batch"` + WriteKey string `json:"writeKey"` + RequestIP string `json:"requestIP"` + ReceivedAt time.Time `json:"receivedAt"` } // UserSuppression is interface to access Suppress user feature diff --git a/warehouse/admin.go b/warehouse/admin.go index 3edd5c1072f..e86ef090608 100644 --- a/warehouse/admin.go +++ b/warehouse/admin.go @@ -79,6 +79,9 @@ func (*WarehouseAdmin) Query(s QueryInput, reply *warehouseutils.QueryResult) er if err != nil { return err } + whManager.SetConnectionTimeout(warehouseutils.GetConnectionTimeout( + warehouse.Type, warehouse.Destination.ID, + )) client, err := whManager.Connect(context.TODO(), warehouse) if err != nil { return err diff --git a/warehouse/api.go b/warehouse/api.go index fbdd56f9ef2..f9cd328966c 100644 --- a/warehouse/api.go +++ b/warehouse/api.go @@ -21,11 +21,12 @@ import ( "github.com/tidwall/gjson" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" backendconfig "github.com/rudderlabs/rudder-server/backend-config" "github.com/rudderlabs/rudder-server/controlplane" proto "github.com/rudderlabs/rudder-server/proto/warehouse" - "github.com/rudderlabs/rudder-server/services/filemanager" + "github.com/rudderlabs/rudder-server/utils/filemanagerutil" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/utils/timeutil" "github.com/rudderlabs/rudder-server/utils/types/deployment" @@ -139,7 +140,7 @@ func InitWarehouseAPI(dbHandle *sql.DB, log logger.Logger) error { UploadAPI = UploadAPIT{ enabled: true, dbHandle: dbHandle, - warehouseDBHandle: NewWarehouseDB(dbHandle), + warehouseDBHandle: NewWarehouseDB(wrappedDBHandle), log: log, isMultiWorkspace: isMultiWorkspace, connectionManager: &controlplane.ConnectionManager{ @@ -245,7 +246,7 @@ func (uploadsReq *UploadsReq) TriggerWhUploads(ctx context.Context) (response *p return } if pendingUploadCount == int64(0) { - pendingStagingFileCount, err = repo.NewStagingFiles(dbHandle).CountPendingForDestination(ctx, uploadsReq.DestinationID) + pendingStagingFileCount, err = repo.NewStagingFiles(wrappedDBHandle).CountPendingForDestination(ctx, uploadsReq.DestinationID) if err != nil { return } @@ -380,7 +381,7 @@ func (uploadReq *UploadReq) TriggerWHUpload(ctx context.Context) (response *prot return } - upload, err := repo.NewUploads(uploadReq.API.dbHandle).Get(ctx, uploadReq.UploadId) + upload, err := repo.NewUploads(uploadReq.API.warehouseDBHandle.handle).Get(ctx, uploadReq.UploadId) if err == model.ErrUploadNotFound { return &proto.TriggerWhUploadsResponse{ Message: NoSuchSync, @@ -400,7 +401,7 @@ func (uploadReq *UploadReq) TriggerWHUpload(ctx context.Context) (response *prot uploadJobT := UploadJob{ upload: upload, - dbHandle: uploadReq.API.dbHandle, + dbHandle: uploadReq.API.warehouseDBHandle.handle, now: timeutil.Now, ctx: ctx, } @@ -840,8 +841,7 @@ func validateObjectStorage(ctx context.Context, request *ObjectStorageValidation return fmt.Errorf("unable to create file manager settings: \n%s", err.Error()) } - factory := &filemanager.FileManagerFactoryT{} - fileManager, err := factory.New(settings) + fileManager, err := filemanager.New(settings) if err != nil { return fmt.Errorf("unable to create file manager: \n%s", err.Error()) } @@ -889,8 +889,8 @@ func validateObjectStorage(ctx context.Context, request *ObjectStorageValidation return nil } -func getFileManagerSettings(ctx context.Context, provider string, inputConfig map[string]interface{}) (*filemanager.SettingsT, error) { - settings := &filemanager.SettingsT{ +func getFileManagerSettings(ctx context.Context, provider string, inputConfig map[string]interface{}) (*filemanager.Settings, error) { + settings := &filemanager.Settings{ Provider: provider, Config: inputConfig, } @@ -903,8 +903,8 @@ func getFileManagerSettings(ctx context.Context, provider string, inputConfig ma // overrideWithEnv overrides the config keys in the fileManager settings // with fallback values pulled from env. Only supported for S3 for now. -func overrideWithEnv(ctx context.Context, settings *filemanager.SettingsT) error { - envConfig := filemanager.GetProviderConfigFromEnv(ctx, settings.Provider) +func overrideWithEnv(ctx context.Context, settings *filemanager.Settings) error { + envConfig := filemanager.GetProviderConfigFromEnv(filemanagerutil.ProviderConfigOpts(ctx, settings.Provider, config.Default)) if settings.Provider == "S3" { ifNotExistThenSet("prefix", envConfig["prefix"], settings.Config) diff --git a/warehouse/api_test.go b/warehouse/api_test.go index eb176f22929..b51f07ab154 100644 --- a/warehouse/api_test.go +++ b/warehouse/api_test.go @@ -6,7 +6,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "github.com/rudderlabs/rudder-server/services/filemanager" + "github.com/rudderlabs/rudder-go-kit/filemanager" ) var _ = Describe("warehouse_api", func() { @@ -14,7 +14,7 @@ var _ = Describe("warehouse_api", func() { ctx := context.Background() It("Should fallback to backup credentials when fields missing(as of now backup only supported for s3)", func() { - fm := &filemanager.SettingsT{ + fm := &filemanager.Settings{ Provider: "AZURE_BLOB", Config: map[string]interface{}{"containerName": "containerName1", "prefix": "prefix1", "accountKey": "accountKey1"}, } diff --git a/warehouse/archive/archiver.go b/warehouse/archive/archiver.go index fdbad519126..a60c03b6aa3 100644 --- a/warehouse/archive/archiver.go +++ b/warehouse/archive/archiver.go @@ -16,9 +16,10 @@ import ( "github.com/tidwall/sjson" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-server/services/archiver/tablearchiver" - "github.com/rudderlabs/rudder-server/services/filemanager" + "github.com/rudderlabs/rudder-server/utils/filemanagerutil" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/utils/timeutil" "github.com/rudderlabs/rudder-server/warehouse/internal/model" @@ -66,7 +67,7 @@ type Archiver struct { DB *sql.DB Stats stats.Stats Logger logger.Logger - FileManager filemanager.FileManagerFactory + FileManager filemanager.Factory Multitenant *multitenant.Manager } @@ -90,9 +91,9 @@ func (a *Archiver) backupRecords(ctx context.Context, args backupRecordsArgs) (b ) defer misc.RemoveFilePaths(path) - fManager, err := a.FileManager.New(&filemanager.SettingsT{ + fManager, err := a.FileManager(&filemanager.Settings{ Provider: config.GetString("JOBS_BACKUP_STORAGE_PROVIDER", "S3"), - Config: filemanager.GetProviderConfigForBackupsFromEnv(ctx), + Config: filemanagerutil.GetProviderConfigForBackupsFromEnv(ctx, config.Default), }) if err != nil { err = fmt.Errorf("error in creating a file manager for:%s. Error: %w", config.GetString("JOBS_BACKUP_STORAGE_PROVIDER", "S3"), err) @@ -135,7 +136,7 @@ func (a *Archiver) backupRecords(ctx context.Context, args backupRecordsArgs) (b } func (a *Archiver) deleteFilesInStorage(ctx context.Context, locations []string) error { - fManager, err := a.FileManager.New(&filemanager.SettingsT{ + fManager, err := a.FileManager(&filemanager.Settings{ Provider: warehouseutils.S3, Config: misc.GetRudderObjectStorageConfig(""), }) @@ -144,7 +145,7 @@ func (a *Archiver) deleteFilesInStorage(ctx context.Context, locations []string) return err } - err = fManager.DeleteObjects(ctx, locations) + err = fManager.Delete(ctx, locations) if err != nil { a.Logger.Errorf("Error in deleting objects in Rudder S3: %v", err) } diff --git a/warehouse/archive/archiver_test.go b/warehouse/archive/archiver_test.go index e6704d53710..21d4efbfe88 100644 --- a/warehouse/archive/archiver_test.go +++ b/warehouse/archive/archiver_test.go @@ -19,10 +19,10 @@ import ( "github.com/ory/dockertest/v3" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats/mock_stats" "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource" - "github.com/rudderlabs/rudder-server/services/filemanager" migrator "github.com/rudderlabs/rudder-server/services/sql-migrator" "github.com/rudderlabs/rudder-server/testhelper/destination" "github.com/rudderlabs/rudder-server/warehouse/archive" @@ -147,7 +147,7 @@ func TestArchiver(t *testing.T) { DB: pgResource.DB, Stats: mockStats, Logger: logger.NOP, - FileManager: filemanager.DefaultFileManagerFactory, + FileManager: filemanager.New, Multitenant: &multitenant.Manager{ DegradedWorkspaceIDs: tc.degradedWorkspaceIDs, }, diff --git a/warehouse/db.go b/warehouse/db.go index abbf30385e1..70714396498 100644 --- a/warehouse/db.go +++ b/warehouse/db.go @@ -7,6 +7,7 @@ import ( "fmt" "strings" + "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) @@ -17,10 +18,10 @@ const ( // DB encapsulate interactions of warehouse operations // with the database. type DB struct { - handle *sql.DB + handle *sqlquerywrapper.DB } -func NewWarehouseDB(handle *sql.DB) *DB { +func NewWarehouseDB(handle *sqlquerywrapper.DB) *DB { return &DB{handle} } diff --git a/warehouse/identities.go b/warehouse/identities.go index 8299d03eaa8..0d52f1dc2f2 100644 --- a/warehouse/identities.go +++ b/warehouse/identities.go @@ -309,14 +309,10 @@ func (wh *HandleT) initPrePopulateDestIdentitiesUpload(warehouse model.Warehouse ($1, $2, $3, $4, $5, $6 ,$7, $8, $9, $10, $11, $12, $13, $14, $15) RETURNING id `, warehouseutils.WarehouseUploadsTable) - stmt, err := wh.dbHandle.Prepare(sqlStatement) - if err != nil { - panic(fmt.Errorf("Query: %s\nfailed to prepare with Error : %w", sqlStatement, err)) - } - defer stmt.Close() now := timeutil.Now() - row := stmt.QueryRow( + row := wh.dbHandle.QueryRow( + sqlStatement, warehouse.Source.ID, warehouse.Namespace, warehouse.WorkspaceID, @@ -433,6 +429,9 @@ func (wh *HandleT) populateHistoricIdentities(ctx context.Context, warehouse mod } } + whManager.SetConnectionTimeout(warehouseutils.GetConnectionTimeout( + wh.destType, warehouse.Destination.ID, + )) err = whManager.Setup(ctx, job.warehouse, job) if err != nil { job.setUploadError(err, model.Aborted) diff --git a/warehouse/identity/identity.go b/warehouse/identity/identity.go index 336377363d7..676611892f1 100644 --- a/warehouse/identity/identity.go +++ b/warehouse/identity/identity.go @@ -20,9 +20,10 @@ import ( "github.com/lib/pq" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/utils/misc" + sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) @@ -37,18 +38,16 @@ type WarehouseManager interface { } type Identity struct { - ctx context.Context warehouse model.Warehouse - db *sql.DB + db *sqlmiddleware.DB uploader warehouseutils.Uploader uploadID int64 warehouseManager WarehouseManager downloader downloader.Downloader } -func New(ctx context.Context, warehouse model.Warehouse, db *sql.DB, uploader warehouseutils.Uploader, uploadID int64, warehouseManager WarehouseManager, loadFileDownloader downloader.Downloader) *Identity { +func New(warehouse model.Warehouse, db *sqlmiddleware.DB, uploader warehouseutils.Uploader, uploadID int64, warehouseManager WarehouseManager, loadFileDownloader downloader.Downloader) *Identity { return &Identity{ - ctx: ctx, warehouse: warehouse, db: db, uploader: uploader, @@ -74,11 +73,11 @@ func (idr *Identity) whMappingsTable() string { return warehouseutils.ToProviderCase(idr.warehouse.Destination.DestinationDefinition.Name, warehouseutils.IdentityMappingsTable) } -func (idr *Identity) applyRule(txn *sql.Tx, ruleID int64, gzWriter *misc.GZipWriter) (totalRowsModified int, err error) { +func (idr *Identity) applyRule(txn *sqlmiddleware.Tx, ruleID int64, gzWriter *misc.GZipWriter) (totalRowsModified int, err error) { sqlStatement := fmt.Sprintf(`SELECT merge_property_1_type, merge_property_1_value, merge_property_2_type, merge_property_2_value FROM %s WHERE id=%v`, idr.mergeRulesTable(), ruleID) var prop1Val, prop2Val, prop1Type, prop2Type sql.NullString - err = txn.QueryRowContext(idr.ctx, sqlStatement).Scan(&prop1Type, &prop1Val, &prop2Type, &prop2Val) + err = txn.QueryRow(sqlStatement).Scan(&prop1Type, &prop1Val, &prop2Type, &prop2Val) if err != nil { return } @@ -90,7 +89,7 @@ func (idr *Identity) applyRule(txn *sql.Tx, ruleID int64, gzWriter *misc.GZipWri } sqlStatement = fmt.Sprintf(`SELECT ARRAY_AGG(DISTINCT(rudder_id)) FROM %s WHERE (merge_property_type='%s' AND merge_property_value=%s) %s`, idr.mappingsTable(), prop1Type.String, misc.QuoteLiteral(prop1Val.String), additionalClause) pkgLogger.Debugf(`IDR: Fetching all rudder_id's corresponding to the merge_rule: %v`, sqlStatement) - err = txn.QueryRowContext(idr.ctx, sqlStatement).Scan(pq.Array(&rudderIDs)) + err = txn.QueryRow(sqlStatement).Scan(pq.Array(&rudderIDs)) if err != nil { pkgLogger.Errorf("IDR: Error fetching all rudder_id's corresponding to the merge_rule: %v\nwith Error: %v", sqlStatement, err) return @@ -123,7 +122,7 @@ func (idr *Identity) applyRule(txn *sql.Tx, ruleID int64, gzWriter *misc.GZipWri sqlStatement = fmt.Sprintf(`INSERT INTO %s (merge_property_type, merge_property_value, rudder_id, updated_at) VALUES (%s) %s ON CONFLICT ON CONSTRAINT %s DO NOTHING`, idr.mappingsTable(), row1Values, row2Values, warehouseutils.IdentityMappingsUniqueMappingConstraintName(idr.warehouse)) pkgLogger.Debugf(`IDR: Inserting properties from merge_rule into mappings table: %v`, sqlStatement) - _, err = txn.ExecContext(idr.ctx, sqlStatement) + _, err = txn.Exec(sqlStatement) if err != nil { pkgLogger.Errorf(`IDR: Error inserting properties from merge_rule into mappings table: %v`, err) return @@ -145,12 +144,12 @@ func (idr *Identity) applyRule(txn *sql.Tx, ruleID int64, gzWriter *misc.GZipWri quotedRudderIDs := misc.SingleQuoteLiteralJoin(rudderIDs) sqlStatement := fmt.Sprintf(`SELECT merge_property_type, merge_property_value FROM %s WHERE rudder_id IN (%v)`, idr.mappingsTable(), quotedRudderIDs) pkgLogger.Debugf(`IDR: Get all merge properties from mapping table with rudder_id's %v: %v`, quotedRudderIDs, sqlStatement) - var tableRows *sql.Rows - tableRows, err = txn.QueryContext(idr.ctx, sqlStatement) + var tableRows *sqlmiddleware.Rows + tableRows, err = txn.Query(sqlStatement) if err != nil { return } - defer tableRows.Close() + defer func() { _ = tableRows.Close() }() for tableRows.Next() { var mergePropType, mergePropVal string @@ -167,7 +166,7 @@ func (idr *Identity) applyRule(txn *sql.Tx, ruleID int64, gzWriter *misc.GZipWri sqlStatement = fmt.Sprintf(`UPDATE %s SET rudder_id='%s', updated_at='%s' WHERE rudder_id IN (%v)`, idr.mappingsTable(), newID, currentTimeString, misc.SingleQuoteLiteralJoin(rudderIDs[1:])) var res sql.Result - res, err = txn.ExecContext(idr.ctx, sqlStatement) + res, err = txn.Exec(sqlStatement) if err != nil { return } @@ -176,7 +175,7 @@ func (idr *Identity) applyRule(txn *sql.Tx, ruleID int64, gzWriter *misc.GZipWri sqlStatement = fmt.Sprintf(`INSERT INTO %s (merge_property_type, merge_property_value, rudder_id, updated_at) VALUES (%s) %s ON CONFLICT ON CONSTRAINT %s DO NOTHING`, idr.mappingsTable(), row1Values, row2Values, warehouseutils.IdentityMappingsUniqueMappingConstraintName(idr.warehouse)) pkgLogger.Debugf(`IDR: Insert new mappings into %s: %v`, idr.mappingsTable(), sqlStatement) - _, err = txn.ExecContext(idr.ctx, sqlStatement) + _, err = txn.Exec(sqlStatement) if err != nil { return } @@ -193,7 +192,7 @@ func (idr *Identity) applyRule(txn *sql.Tx, ruleID int64, gzWriter *misc.GZipWri return len(rows), err } -func (idr *Identity) addRules(txn *sql.Tx, loadFileNames []string, gzWriter *misc.GZipWriter) (ids []int64, err error) { +func (idr *Identity) addRules(txn *sqlmiddleware.Tx, loadFileNames []string, gzWriter *misc.GZipWriter) (ids []int64, err error) { // add rules from load files into temp table // use original table to delete redundant ones from temp table // insert from temp table into original table @@ -204,7 +203,7 @@ func (idr *Identity) addRules(txn *sql.Tx, loadFileNames []string, gzWriter *mis WITH NO DATA;`, mergeRulesStagingTable, idr.mergeRulesTable()) pkgLogger.Infof(`IDR: Creating temp table %s in postgres for loading %s: %v`, mergeRulesStagingTable, idr.mergeRulesTable(), sqlStatement) - _, err = txn.ExecContext(idr.ctx, sqlStatement) + _, err = txn.Exec(sqlStatement) if err != nil { pkgLogger.Errorf(`IDR: Error creating temp table %s in postgres: %v`, mergeRulesStagingTable, err) return @@ -257,7 +256,7 @@ func (idr *Identity) addRules(txn *sql.Tx, loadFileNames []string, gzWriter *mis // add rowID which allows us to insert in same order from staging to original merge _rules table rowID++ recordInterface[4] = rowID - _, err = stmt.ExecContext(idr.ctx, recordInterface[:]...) + _, err = stmt.Exec(recordInterface[:]...) if err != nil { pkgLogger.Errorf("IDR: Error while adding rowID to merge_rules table: %v", err) return @@ -265,7 +264,7 @@ func (idr *Identity) addRules(txn *sql.Tx, loadFileNames []string, gzWriter *mis } } - _, err = stmt.ExecContext(idr.ctx) + _, err = stmt.Exec() if err != nil { pkgLogger.Errorf(`IDR: Error bulk copy using CopyIn: %v for uploadID: %v`, err, idr.uploadID) return @@ -283,7 +282,7 @@ func (idr *Identity) addRules(txn *sql.Tx, loadFileNames []string, gzWriter *mis (original.merge_property_2_value = staging.merge_property_2_value)`, mergeRulesStagingTable, idr.mergeRulesTable()) pkgLogger.Infof(`IDR: Deleting from staging table %s using %s: %v`, mergeRulesStagingTable, idr.mergeRulesTable(), sqlStatement) - _, err = txn.ExecContext(idr.ctx, sqlStatement) + _, err = txn.Exec(sqlStatement) if err != nil { pkgLogger.Errorf(`IDR: Error deleting from staging table %s using %s: %v`, mergeRulesStagingTable, idr.mergeRulesTable(), err) return @@ -308,11 +307,12 @@ func (idr *Identity) addRules(txn *sql.Tx, loadFileNames []string, gzWriter *mis ) t ORDER BY id ASC RETURNING id`, idr.mergeRulesTable(), mergeRulesStagingTable) pkgLogger.Infof(`IDR: Inserting into %s from %s: %v`, idr.mergeRulesTable(), mergeRulesStagingTable, sqlStatement) - rows, err := txn.QueryContext(idr.ctx, sqlStatement) + rows, err := txn.Query(sqlStatement) if err != nil { pkgLogger.Errorf(`IDR: Error inserting into %s from %s: %v`, idr.mergeRulesTable(), mergeRulesStagingTable, err) return } + defer func() { _ = rows.Close() }() for rows.Next() { var id int64 err = rows.Scan(&id) @@ -330,11 +330,11 @@ func (idr *Identity) addRules(txn *sql.Tx, loadFileNames []string, gzWriter *mis return ids, nil } -func (idr *Identity) writeTableToFile(tableName string, txn *sql.Tx, gzWriter *misc.GZipWriter) (err error) { +func (idr *Identity) writeTableToFile(tableName string, txn *sqlmiddleware.Tx, gzWriter *misc.GZipWriter) (err error) { batchSize := int64(500) sqlStatement := fmt.Sprintf(`SELECT COUNT(*) FROM %s`, tableName) var totalRows int64 - err = txn.QueryRowContext(idr.ctx, sqlStatement).Scan(&totalRows) + err = txn.QueryRow(sqlStatement).Scan(&totalRows) if err != nil { return } @@ -343,11 +343,12 @@ func (idr *Identity) writeTableToFile(tableName string, txn *sql.Tx, gzWriter *m for { sqlStatement = fmt.Sprintf(`SELECT merge_property_1_type, merge_property_1_value, merge_property_2_type, merge_property_2_value FROM %s LIMIT %d OFFSET %d`, tableName, batchSize, offset) - var rows *sql.Rows - rows, err = txn.QueryContext(idr.ctx, sqlStatement) + var rows *sqlmiddleware.Rows + rows, err = txn.Query(sqlStatement) if err != nil { return } + defer func() { _ = rows.Close() }() columnNames := []string{"merge_property_1_type", "merge_property_1_value", "merge_property_2_type", "merge_property_2_value"} for rows.Next() { var rowData []string @@ -382,13 +383,13 @@ func (idr *Identity) writeTableToFile(tableName string, txn *sql.Tx, gzWriter *m return } -func (idr *Identity) uploadFile(filePath string, txn *sql.Tx, tableName string, totalRecords int) (err error) { +func (idr *Identity) uploadFile(ctx context.Context, filePath string, txn *sqlmiddleware.Tx, tableName string, totalRecords int) (err error) { outputFile, err := os.Open(filePath) if err != nil { panic(err) } storageProvider := warehouseutils.ObjectStorageType(idr.warehouse.Destination.DestinationDefinition.Name, idr.warehouse.Destination.Config, idr.uploader.UseRudderStorage()) - uploader, err := filemanager.DefaultFileManagerFactory.New(&filemanager.SettingsT{ + uploader, err := filemanager.New(&filemanager.Settings{ Provider: storageProvider, Config: misc.GetObjectStorageConfig(misc.ObjectStorageOptsT{ Provider: storageProvider, @@ -400,14 +401,14 @@ func (idr *Identity) uploadFile(filePath string, txn *sql.Tx, tableName string, pkgLogger.Errorf("IDR: Error in creating a file manager for :%s: , %v", idr.warehouse.Destination.DestinationDefinition.Name, err) return err } - output, err := uploader.Upload(idr.ctx, outputFile, config.GetString("WAREHOUSE_BUCKET_LOAD_OBJECTS_FOLDER_NAME", "rudder-warehouse-load-objects"), tableName, idr.warehouse.Source.ID, tableName) + output, err := uploader.Upload(ctx, outputFile, config.GetString("WAREHOUSE_BUCKET_LOAD_OBJECTS_FOLDER_NAME", "rudder-warehouse-load-objects"), tableName, idr.warehouse.Source.ID, tableName) if err != nil { return } sqlStatement := fmt.Sprintf(`UPDATE %s SET location='%s', total_events=%d WHERE wh_upload_id=%d AND table_name='%s'`, warehouseutils.WarehouseTableUploadsTable, output.Location, totalRecords, idr.uploadID, warehouseutils.ToProviderCase(idr.warehouse.Destination.DestinationDefinition.Name, tableName)) pkgLogger.Infof(`IDR: Updating load file location for table: %s: %s `, tableName, sqlStatement) - _, err = txn.ExecContext(idr.ctx, sqlStatement) + _, err = txn.Exec(sqlStatement) if err != nil { pkgLogger.Errorf(`IDR: Error updating load file location for table: %s: %v`, tableName, err) } @@ -432,8 +433,8 @@ func (idr *Identity) createTempGzFile(dirName string) (gzWriter misc.GZipWriter, return } -func (idr *Identity) processMergeRules(fileNames []string) (err error) { - txn, err := idr.db.BeginTx(idr.ctx, &sql.TxOptions{}) +func (idr *Identity) processMergeRules(ctx context.Context, fileNames []string) (err error) { + txn, err := idr.db.BeginTx(ctx, nil) if err != nil { panic(err) } @@ -464,21 +465,29 @@ func (idr *Identity) processMergeRules(fileNames []string) (err error) { } totalMappingRecords += count if idx%1000 == 0 { - pkgLogger.Infof(`IDR: Applied %d rules out of %d. Total Mapping records added: %d. Namespace: %s, Destination: %s:%s`, idx+1, len(ruleIDs), totalMappingRecords, idr.warehouse.Namespace, idr.warehouse.Type, idr.warehouse.Destination.ID) + pkgLogger.Infof( + `IDR: Applied %d rules out of %d. Total Mapping records added: %d. Namespace: %s, Destination: %s:%s`, + idx+1, + len(ruleIDs), + totalMappingRecords, + idr.warehouse.Namespace, + idr.warehouse.Type, + idr.warehouse.Destination.ID, + ) } } mappingsFileGzWriter.CloseGZ() // END: Add new/changed identity mappings to local pg table and also to file // upload new merge rules to object storage - err = idr.uploadFile(mergeRulesFilePath, txn, idr.whMergeRulesTable(), len(ruleIDs)) + err = idr.uploadFile(ctx, mergeRulesFilePath, txn, idr.whMergeRulesTable(), len(ruleIDs)) if err != nil { pkgLogger.Errorf(`IDR: Error uploading load file for %s at %s to object storage: %v`, idr.mergeRulesTable(), mergeRulesFilePath, err) return } // upload new/changed identity mappings to object storage - err = idr.uploadFile(mappingsFilePath, txn, idr.whMappingsTable(), totalMappingRecords) + err = idr.uploadFile(ctx, mappingsFilePath, txn, idr.whMappingsTable(), totalMappingRecords) if err != nil { pkgLogger.Errorf(`IDR: Error uploading load file for %s at %s to object storage: %v`, mappingsFilePath, mergeRulesFilePath, err) return @@ -497,23 +506,23 @@ func (idr *Identity) processMergeRules(fileNames []string) (err error) { // 2. Append to local identity merge rules table // 3. Apply each merge rule and update local identity mapping table // 4. Upload the diff of each table to load files for both tables -func (idr *Identity) Resolve() (err error) { +func (idr *Identity) Resolve(ctx context.Context) (err error) { var loadFileNames []string defer misc.RemoveFilePaths(loadFileNames...) - loadFileNames, err = idr.downloader.Download(idr.ctx, idr.whMergeRulesTable()) + loadFileNames, err = idr.downloader.Download(ctx, idr.whMergeRulesTable()) if err != nil { pkgLogger.Errorf(`IDR: Failed to download load files for %s with error: %v`, idr.mergeRulesTable(), err) return } - return idr.processMergeRules(loadFileNames) + return idr.processMergeRules(ctx, loadFileNames) } -func (idr *Identity) ResolveHistoricIdentities() (err error) { +func (idr *Identity) ResolveHistoricIdentities(ctx context.Context) (err error) { var loadFileNames []string defer misc.RemoveFilePaths(loadFileNames...) gzWriter, path := idr.createTempGzFile(fmt.Sprintf(`/%s/`, misc.RudderIdentityMergeRulesTmp)) - err = idr.warehouseManager.DownloadIdentityRules(idr.ctx, &gzWriter) + err = idr.warehouseManager.DownloadIdentityRules(ctx, &gzWriter) gzWriter.CloseGZ() if err != nil { pkgLogger.Errorf(`IDR: Failed to download identity information from warehouse with error: %v`, err) @@ -521,5 +530,5 @@ func (idr *Identity) ResolveHistoricIdentities() (err error) { } loadFileNames = append(loadFileNames, path) - return idr.processMergeRules(loadFileNames) + return idr.processMergeRules(ctx, loadFileNames) } diff --git a/warehouse/integrations/bigquery/bigquery.go b/warehouse/integrations/bigquery/bigquery.go index 8267a9e0fbd..c61c4e1452f 100644 --- a/warehouse/integrations/bigquery/bigquery.go +++ b/warehouse/integrations/bigquery/bigquery.go @@ -15,8 +15,8 @@ import ( "google.golang.org/api/option" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/googleutil" "github.com/rudderlabs/rudder-go-kit/logger" - "github.com/rudderlabs/rudder-server/utils/googleutils" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/warehouse/client" "github.com/rudderlabs/rudder-server/warehouse/integrations/bigquery/middleware" @@ -716,9 +716,9 @@ type BQCredentials struct { func Connect(context context.Context, cred *BQCredentials) (*bigquery.Client, error) { var opts []option.ClientOption - if !googleutils.ShouldSkipCredentialsInit(cred.Credentials) { + if !googleutil.ShouldSkipCredentialsInit(cred.Credentials) { credBytes := []byte(cred.Credentials) - if err := googleutils.CompatibleGoogleCredentialsJSON(credBytes); err != nil { + if err := googleutil.CompatibleGoogleCredentialsJSON(credBytes); err != nil { return nil, err } opts = append(opts, option.WithCredentialsJSON(credBytes)) diff --git a/warehouse/integrations/clickhouse/clickhouse_test.go b/warehouse/integrations/clickhouse/clickhouse_test.go index 0cb61533863..141443384b4 100644 --- a/warehouse/integrations/clickhouse/clickhouse_test.go +++ b/warehouse/integrations/clickhouse/clickhouse_test.go @@ -29,8 +29,8 @@ import ( "github.com/rudderlabs/rudder-server/warehouse/integrations/testhelper" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/warehouse/validations" @@ -521,8 +521,7 @@ func TestClickhouse_LoadTableRoundTrip(t *testing.T) { defer func() { _ = f.Close() }() - fmFactory := filemanager.FileManagerFactoryT{} - fm, err := fmFactory.New(&filemanager.SettingsT{ + fm, err := filemanager.New(&filemanager.Settings{ Provider: provider, Config: map[string]any{ "bucketName": bucketName, diff --git a/warehouse/integrations/datalake/datalake_test.go b/warehouse/integrations/datalake/datalake_test.go index f5c20c213e7..b277f0d9df7 100644 --- a/warehouse/integrations/datalake/datalake_test.go +++ b/warehouse/integrations/datalake/datalake_test.go @@ -15,7 +15,8 @@ import ( "github.com/rudderlabs/rudder-server/testhelper/workspaceConfig" - "github.com/minio/minio-go/v6" + "github.com/minio/minio-go/v7" + "github.com/minio/minio-go/v7/pkg/credentials" "github.com/rudderlabs/compose-test/testcompose" kithelper "github.com/rudderlabs/rudder-go-kit/testhelper" @@ -224,10 +225,13 @@ func TestIntegration(t *testing.T) { region = "us-east-1" ) - minioClient, err := minio.New(s3EndPoint, s3AccessKeyID, s3AccessKey, secure) + minioClient, err := minio.New(s3EndPoint, &minio.Options{ + Creds: credentials.NewStaticV4(s3AccessKeyID, s3AccessKey, ""), + Secure: secure, + }) require.NoError(t, err) - _ = minioClient.MakeBucket(s3BucketName, region) + _ = minioClient.MakeBucket(context.TODO(), s3BucketName, minio.MakeBucketOptions{Region: region}) }, stagingFilePrefix: "testdata/upload-job-s3-datalake", }, @@ -327,10 +331,13 @@ func TestIntegration(t *testing.T) { region = "us-east-1" ) - minioClient, err := minio.New(s3EndPoint, s3AccessKeyID, s3AccessKey, secure) + minioClient, err := minio.New(s3EndPoint, &minio.Options{ + Creds: credentials.NewStaticV4(s3AccessKeyID, s3AccessKey, ""), + Secure: secure, + }) require.NoError(t, err) - _ = minioClient.MakeBucket(s3BucketName, region) + _ = minioClient.MakeBucket(context.Background(), s3BucketName, minio.MakeBucketOptions{Region: region}) dest := backendconfig.DestinationT{ ID: s3DestinationID, diff --git a/warehouse/integrations/datalake/schema-repository/glue.go b/warehouse/integrations/datalake/schema-repository/glue.go index 17acdfaba77..6ba1a580728 100644 --- a/warehouse/integrations/datalake/schema-repository/glue.go +++ b/warehouse/integrations/datalake/schema-repository/glue.go @@ -6,6 +6,7 @@ import ( "net/url" "regexp" + "github.com/rudderlabs/rudder-go-kit/awsutil" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-server/warehouse/internal/model" @@ -209,7 +210,7 @@ func getGlueClient(wh model.Warehouse) (*glue.Glue, error) { if err != nil { return nil, err } - awsSession, err := awsutils.CreateSession(sessionConfig) + awsSession, err := awsutil.CreateSession(sessionConfig) if err != nil { return nil, err } diff --git a/warehouse/integrations/datalake/schema-repository/glue_test.go b/warehouse/integrations/datalake/schema-repository/glue_test.go index 1fdab5861e0..9c228deac3c 100644 --- a/warehouse/integrations/datalake/schema-repository/glue_test.go +++ b/warehouse/integrations/datalake/schema-repository/glue_test.go @@ -16,8 +16,8 @@ import ( "github.com/aws/aws-sdk-go/service/glue" "github.com/stretchr/testify/require" + "github.com/rudderlabs/rudder-go-kit/filemanager" backendconfig "github.com/rudderlabs/rudder-server/backend-config" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/utils/misc" warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) @@ -169,8 +169,7 @@ func TestGlueSchemaRepositoryRoundTrip(t *testing.T) { _ = f.Close() }) - fmFactory := filemanager.FileManagerFactoryT{} - fm, err := fmFactory.New(&filemanager.SettingsT{ + fm, err := filemanager.New(&filemanager.Settings{ Provider: warehouseutils.S3, Config: map[string]any{ "bucketName": credentials.Bucket, diff --git a/warehouse/integrations/datalake/schema-repository/schema_repository.go b/warehouse/integrations/datalake/schema-repository/schema_repository.go index 4072374965f..c6aa34a542b 100644 --- a/warehouse/integrations/datalake/schema-repository/schema_repository.go +++ b/warehouse/integrations/datalake/schema-repository/schema_repository.go @@ -53,20 +53,3 @@ func NewSchemaRepository(wh model.Warehouse, uploader warehouseutils.Uploader) ( } return NewLocalSchemaRepository(wh, uploader) } - -// LoadFileBatching batches load files for refresh partitions -func LoadFileBatching(files []warehouseutils.LoadFile, batchSize int) [][]warehouseutils.LoadFile { - fileBatches := make([][]warehouseutils.LoadFile, 0, len(files)/batchSize+1) - - for len(files) > 0 { - cut := batchSize - if len(files) < cut { - cut = len(files) - } - - fileBatches = append(fileBatches, files[0:cut]) - files = files[cut:] - } - - return fileBatches -} diff --git a/warehouse/integrations/datalake/schema-repository/schema_repository_test.go b/warehouse/integrations/datalake/schema-repository/schema_repository_test.go index d825f92be1d..cf1e5863c26 100644 --- a/warehouse/integrations/datalake/schema-repository/schema_repository_test.go +++ b/warehouse/integrations/datalake/schema-repository/schema_repository_test.go @@ -3,6 +3,8 @@ package schemarepository_test import ( "testing" + "github.com/samber/lo" + "github.com/rudderlabs/rudder-server/warehouse/internal/model" "github.com/stretchr/testify/require" @@ -70,7 +72,7 @@ func TestLoadFileBatching(t *testing.T) { }) } - batches := schemarepository.LoadFileBatching(loadFiles, batchSize) + batches := lo.Chunk(loadFiles, batchSize) require.Equal(t, 1+(len(loadFiles)/batchSize), len(batches)) var reconstruct []warehouseutils.LoadFile diff --git a/warehouse/integrations/deltalake-native/deltalake.go b/warehouse/integrations/deltalake-native/deltalake.go index f0d9a6a4c46..9bd0d6118e4 100644 --- a/warehouse/integrations/deltalake-native/deltalake.go +++ b/warehouse/integrations/deltalake-native/deltalake.go @@ -222,6 +222,7 @@ func (d *Deltalake) connect() (*sqlmiddleware.DB, error) { logfield.Schema, d.Namespace, ), sqlmiddleware.WithSlowQueryThreshold(d.SlowQueryThreshold), + sqlmiddleware.WithQueryTimeout(d.ConnectTimeout), sqlmiddleware.WithSecretsRegex(map[string]string{ "'awsKeyId' = '[^']*'": "'awsKeyId' = '***'", "'awsSecretKey' = '[^']*'": "'awsSecretKey' = '***'", @@ -561,7 +562,7 @@ func (d *Deltalake) loadTable(ctx context.Context, tableName string, tableSchema err error auth string - row *sql.Row + row *sqlmiddleware.Row ) d.Logger.Infow("started loading", @@ -971,7 +972,7 @@ func (d *Deltalake) LoadUserTables(ctx context.Context) map[string]error { var ( partitionQuery string - row *sql.Row + row *sqlmiddleware.Row ) userColNames, firstValProps := getColumnProperties(usersSchemaInWarehouse) diff --git a/warehouse/integrations/middleware/sqlquerywrapper/sql.go b/warehouse/integrations/middleware/sqlquerywrapper/sql.go index fd08dadcf64..c68edd53849 100644 --- a/warehouse/integrations/middleware/sqlquerywrapper/sql.go +++ b/warehouse/integrations/middleware/sqlquerywrapper/sql.go @@ -27,14 +27,57 @@ type DB struct { logger logger keysAndValues []any slowQueryThreshold time.Duration + queryTimeout time.Duration + transactionTimeout time.Duration rollbackThreshold time.Duration commitThreshold time.Duration secretsRegex map[string]string } +type Rows struct { + *sql.Rows + context.CancelFunc + logQ +} + +func (r *Rows) Close() error { + defer r.CancelFunc() + r.logQ() + return r.Rows.Close() +} + +func (r *Rows) Next() bool { + return r.Rows.Next() +} + +func (r *Rows) Scan(dest ...interface{}) error { + return r.Rows.Scan(dest...) +} + +func (r *Rows) Err() error { + return r.Rows.Err() +} + +type Row struct { + *sql.Row + context.CancelFunc + logQ +} + +func (r *Row) Scan(dest ...interface{}) error { + defer r.CancelFunc() + r.logQ() + return r.Row.Scan(dest...) +} + +func (r *Row) Err() error { + return r.Row.Err() +} + type Tx struct { *sql.Tx db *DB + context.CancelFunc } func WithLogger(logger logger) Opt { @@ -61,6 +104,20 @@ func WithSecretsRegex(secretsRegex map[string]string) Opt { } } +// imposes a timeout on each query +func WithQueryTimeout(timeout time.Duration) Opt { + return func(s *DB) { + s.queryTimeout = timeout + } +} + +// imposes a timeout on the transaction +func WithTransactionTimeout(timeout time.Duration) Opt { + return func(s *DB) { + s.transactionTimeout = timeout + } +} + func New(db *sql.DB, opts ...Opt) *DB { s := &DB{ DB: db, @@ -77,48 +134,61 @@ func New(db *sql.DB, opts ...Opt) *DB { } func (db *DB) Exec(query string, args ...interface{}) (sql.Result, error) { - startedAt := time.Now() - result, err := db.DB.Exec(query, args...) - db.logQuery(query, db.since(startedAt)) - return result, err + return db.ExecContext(context.Background(), query, args...) } func (db *DB) ExecContext(ctx context.Context, query string, args ...interface{}) (sql.Result, error) { startedAt := time.Now() + ctx, cancel := queryContextWithTimeout(ctx, db.queryTimeout) + defer cancel() result, err := db.DB.ExecContext(ctx, query, args...) - db.logQuery(query, db.since(startedAt)) + db.logQuery(query, startedAt)() return result, err } -func (db *DB) Query(query string, args ...interface{}) (*sql.Rows, error) { - startedAt := time.Now() - rows, err := db.DB.Query(query, args...) - db.logQuery(query, db.since(startedAt)) - return rows, err +func (db *DB) Query(query string, args ...interface{}) (*Rows, error) { + return db.QueryContext(context.Background(), query, args...) } -func (db *DB) QueryContext(ctx context.Context, query string, args ...interface{}) (*sql.Rows, error) { +func (db *DB) QueryContext(ctx context.Context, query string, args ...interface{}) (*Rows, error) { startedAt := time.Now() + ctx, cancel := queryContextWithTimeout(ctx, db.queryTimeout) rows, err := db.DB.QueryContext(ctx, query, args...) - db.logQuery(query, db.since(startedAt)) - return rows, err + if err != nil { + defer cancel() + defer db.logQuery(query, startedAt)() + return nil, err + } + if err := rows.Err(); err != nil { + cancel() + db.logQuery(query, startedAt)() + func() { _ = rows.Close() }() + return nil, err + } + return &Rows{ + Rows: rows, + CancelFunc: cancel, + logQ: db.logQuery(query, startedAt), + }, err } -func (db *DB) QueryRow(query string, args ...interface{}) *sql.Row { - startedAt := time.Now() - row := db.DB.QueryRow(query, args...) - db.logQuery(query, db.since(startedAt)) - return row +func (db *DB) QueryRow(query string, args ...interface{}) *Row { + return db.QueryRowContext(context.Background(), query, args...) } -func (db *DB) QueryRowContext(ctx context.Context, query string, args ...interface{}) *sql.Row { +func (db *DB) QueryRowContext(ctx context.Context, query string, args ...interface{}) *Row { startedAt := time.Now() - row := db.DB.QueryRowContext(ctx, query, args...) - db.logQuery(query, db.since(startedAt)) - return row + ctx, cancel := queryContextWithTimeout(ctx, db.queryTimeout) + return &Row{ + Row: db.DB.QueryRowContext(ctx, query, args...), + CancelFunc: cancel, + logQ: db.logQuery(query, startedAt), + } } func (db *DB) WithTx(ctx context.Context, fn func(*Tx) error) error { + ctx, cancel := queryContextWithTimeout(ctx, db.queryTimeout) + defer cancel() tx, err := db.BeginTx(ctx, &sql.TxOptions{}) if err != nil { return fmt.Errorf("begin transaction: %w", err) @@ -137,82 +207,106 @@ func (db *DB) WithTx(ctx context.Context, fn func(*Tx) error) error { return tx.Commit() } -func (db *DB) logQuery(query string, elapsed time.Duration) { - if elapsed < db.slowQueryThreshold { - return - } +func (db *DB) logQuery(query string, since time.Time) logQ { + return func() { + if db.slowQueryThreshold <= 0 { + return + } + if db.since(since) < db.slowQueryThreshold { + return + } + + sanitizedQuery, _ := misc.ReplaceMultiRegex(query, db.secretsRegex) - sanitizedQuery, _ := misc.ReplaceMultiRegex(query, db.secretsRegex) + keysAndValues := []any{ + logfield.Query, sanitizedQuery, + logfield.QueryExecutionTime, db.since(since), + } + keysAndValues = append(keysAndValues, db.keysAndValues...) - keysAndValues := []any{ - logfield.Query, sanitizedQuery, - logfield.QueryExecutionTime, elapsed, + db.logger.Infow("executing query", keysAndValues...) } - keysAndValues = append(keysAndValues, db.keysAndValues...) +} - db.logger.Infow("executing query", keysAndValues...) +type logQ func() + +func (tx *Tx) GetTx() *sql.Tx { + return tx.Tx } +// Begin starts a transaction. +// +// Use BeginTx to pass context and options to the underlying driver. func (db *DB) Begin() (*Tx, error) { - if tx, err := db.DB.Begin(); err != nil { - return nil, err - } else { - return &Tx{tx, db}, nil - } + return db.BeginTx(context.Background(), nil) } func (db *DB) BeginTx(ctx context.Context, opts *sql.TxOptions) (*Tx, error) { - if tx, err := db.DB.BeginTx(ctx, opts); err != nil { + ctx, cancel := queryContextWithTimeout(ctx, db.transactionTimeout) + tx, err := db.DB.BeginTx(ctx, opts) + if err != nil { + defer cancel() return nil, err - } else { - return &Tx{tx, db}, nil } + return &Tx{tx, db, cancel}, nil } func (tx *Tx) Exec(query string, args ...interface{}) (sql.Result, error) { - startedAt := time.Now() - result, err := tx.Tx.Exec(query, args...) - tx.db.logQuery(query, tx.db.since(startedAt)) - return result, err + return tx.ExecContext(context.Background(), query, args...) } func (tx *Tx) ExecContext(ctx context.Context, query string, args ...interface{}) (sql.Result, error) { startedAt := time.Now() + ctx, cancel := queryContextWithTimeout(ctx, tx.db.queryTimeout) + defer cancel() result, err := tx.Tx.ExecContext(ctx, query, args...) - tx.db.logQuery(query, tx.db.since(startedAt)) + tx.db.logQuery(query, startedAt)() return result, err } -func (tx *Tx) Query(query string, args ...interface{}) (*sql.Rows, error) { - startedAt := time.Now() - rows, err := tx.Tx.Query(query, args...) - tx.db.logQuery(query, tx.db.since(startedAt)) - return rows, err +func (tx *Tx) Query(query string, args ...interface{}) (*Rows, error) { + return tx.QueryContext(context.Background(), query, args...) } -func (tx *Tx) QueryContext(ctx context.Context, query string, args ...interface{}) (*sql.Rows, error) { +func (tx *Tx) QueryContext(ctx context.Context, query string, args ...interface{}) (*Rows, error) { startedAt := time.Now() + ctx, cancel := queryContextWithTimeout(ctx, tx.db.queryTimeout) rows, err := tx.Tx.QueryContext(ctx, query, args...) - tx.db.logQuery(query, tx.db.since(startedAt)) - return rows, err + if err != nil { + defer cancel() + defer tx.db.logQuery(query, startedAt)() + return nil, err + } + if err := rows.Err(); err != nil { + cancel() + tx.db.logQuery(query, startedAt)() + func() { _ = rows.Close() }() + return nil, err + } + return &Rows{ + Rows: rows, + CancelFunc: cancel, + logQ: tx.db.logQuery(query, startedAt), + }, err } -func (tx *Tx) QueryRow(query string, args ...interface{}) *sql.Row { - startedAt := time.Now() - row := tx.Tx.QueryRow(query, args...) - tx.db.logQuery(query, tx.db.since(startedAt)) - return row +func (tx *Tx) QueryRow(query string, args ...interface{}) *Row { + return tx.QueryRowContext(context.Background(), query, args...) } -func (tx *Tx) QueryRowContext(ctx context.Context, query string, args ...interface{}) *sql.Row { +func (tx *Tx) QueryRowContext(ctx context.Context, query string, args ...interface{}) *Row { startedAt := time.Now() - row := tx.Tx.QueryRowContext(ctx, query, args...) - tx.db.logQuery(query, tx.db.since(startedAt)) - return row + ctx, cancel := queryContextWithTimeout(ctx, tx.db.queryTimeout) + return &Row{ + Row: tx.Tx.QueryRowContext(ctx, query, args...), + CancelFunc: cancel, + logQ: tx.db.logQuery(query, startedAt), + } } func (tx *Tx) Rollback() error { startedAt := time.Now() + defer tx.CancelFunc() err := tx.Tx.Rollback() if elapsed := tx.db.since(startedAt); elapsed > tx.db.rollbackThreshold { tx.db.logger.Warnw("rollback threshold exceeded", tx.db.keysAndValues...) @@ -222,9 +316,17 @@ func (tx *Tx) Rollback() error { func (tx *Tx) Commit() error { startedAt := time.Now() + defer tx.CancelFunc() err := tx.Tx.Commit() if elapsed := tx.db.since(startedAt); elapsed > tx.db.commitThreshold { tx.db.logger.Warnw("commit threshold exceeded", tx.db.keysAndValues...) } return err } + +func queryContextWithTimeout(ctx context.Context, timeout time.Duration) (context.Context, context.CancelFunc) { + if timeout.Abs() <= 0 { + return ctx, func() {} + } + return context.WithTimeout(ctx, timeout) +} diff --git a/warehouse/integrations/middleware/sqlquerywrapper/sql_test.go b/warehouse/integrations/middleware/sqlquerywrapper/sql_test.go index 3cf48682805..eceb46efee6 100644 --- a/warehouse/integrations/middleware/sqlquerywrapper/sql_test.go +++ b/warehouse/integrations/middleware/sqlquerywrapper/sql_test.go @@ -32,19 +32,19 @@ func TestQueryWrapper(t *testing.T) { t.Log("db:", pgResource.DBDsn) testCases := []struct { - name string - executionTimeInSec time.Duration - wantLog bool + name string + executionTime time.Duration + wantLog bool }{ { - name: "slow query", - executionTimeInSec: 500 * time.Second, - wantLog: true, + name: "slow query", + executionTime: 500 * time.Second, + wantLog: true, }, { - name: "fast query", - executionTimeInSec: 1 * time.Second, - wantLog: false, + name: "fast query", + executionTime: 1 * time.Second, + wantLog: false, }, } @@ -72,14 +72,14 @@ func TestQueryWrapper(t *testing.T) { WithKeyAndValues(keysAndValues...), ) qw.since = func(time.Time) time.Duration { - return tc.executionTimeInSec + return tc.executionTime } query := "SELECT 1;" kvs := []any{ logfield.Query, query, - logfield.QueryExecutionTime, tc.executionTimeInSec, + logfield.QueryExecutionTime, tc.executionTime, } kvs = append(kvs, keysAndValues...) @@ -95,16 +95,20 @@ func TestQueryWrapper(t *testing.T) { _, err = qw.ExecContext(ctx, query) require.NoError(t, err) - _, err = qw.Query(query) //nolint:rowserrcheck + rows, err := qw.Query(query) + _ = rows.Close() require.NoError(t, err) - _, err = qw.QueryContext(ctx, query) //nolint:rowserrcheck + rows, err = qw.QueryContext(ctx, query) + _ = rows.Close() require.NoError(t, err) - _ = qw.QueryRow(query) //nolint:rowserrcheck + row := qw.QueryRow(query) + _ = row.Scan() require.NoError(t, err) - _ = qw.QueryRowContext(ctx, query) //nolint:rowserrcheck + row = qw.QueryRowContext(ctx, query) + _ = row.Scan() require.NoError(t, err) }) @@ -126,7 +130,7 @@ func TestQueryWrapper(t *testing.T) { }), ) qw.since = func(time.Time) time.Duration { - return tc.executionTimeInSec + return tc.executionTime } t.Run("DB", func(t *testing.T) { @@ -134,11 +138,11 @@ func TestQueryWrapper(t *testing.T) { createKvs := []any{ logfield.Query, fmt.Sprintf("CREATE USER %s;", user), - logfield.QueryExecutionTime, tc.executionTimeInSec, + logfield.QueryExecutionTime, tc.executionTime, } alterKvs := []any{ logfield.Query, fmt.Sprintf("ALTER USER %s WITH PASSWORD '***';", user), - logfield.QueryExecutionTime, tc.executionTimeInSec, + logfield.QueryExecutionTime, tc.executionTime, } createKvs = append(createKvs, keysAndValues...) @@ -187,11 +191,11 @@ func TestQueryWrapper(t *testing.T) { createKvs := []any{ logfield.Query, fmt.Sprintf("CREATE USER %s;", user), - logfield.QueryExecutionTime, tc.executionTimeInSec, + logfield.QueryExecutionTime, tc.executionTime, } alterKvs := []any{ logfield.Query, fmt.Sprintf("ALTER USER %s WITH PASSWORD '***';", user), - logfield.QueryExecutionTime, tc.executionTimeInSec, + logfield.QueryExecutionTime, tc.executionTime, } createKvs = append(createKvs, keysAndValues...) @@ -234,14 +238,14 @@ func TestQueryWrapper(t *testing.T) { WithKeyAndValues(keysAndValues...), ) qw.since = func(time.Time) time.Duration { - return tc.executionTimeInSec + return tc.executionTime } query := "SELECT 1;" kvs := []any{ logfield.Query, query, - logfield.QueryExecutionTime, tc.executionTimeInSec, + logfield.QueryExecutionTime, tc.executionTime, } kvs = append(kvs, keysAndValues...) @@ -281,7 +285,8 @@ func TestQueryWrapper(t *testing.T) { tx, err := qw.Begin() require.NoError(t, err) - _, err = tx.Query(query) // nolint:rowserrcheck + rows, err := tx.Query(query) + _ = rows.Close() require.NoError(t, err) err = tx.Commit() // nolint:rowserrcheck @@ -292,7 +297,8 @@ func TestQueryWrapper(t *testing.T) { tx, err := qw.Begin() require.NoError(t, err) - _, err = tx.QueryContext(ctx, query) // nolint:rowserrcheck + rows, err := tx.QueryContext(ctx, query) + _ = rows.Close() require.NoError(t, err) err = tx.Commit() @@ -303,7 +309,8 @@ func TestQueryWrapper(t *testing.T) { tx, err := qw.Begin() require.NoError(t, err) - _ = tx.QueryRow(query) + row := tx.QueryRow(query) + _ = row.Scan() require.NoError(t, err) err = tx.Commit() @@ -314,7 +321,8 @@ func TestQueryWrapper(t *testing.T) { tx, err := qw.Begin() require.NoError(t, err) - _ = tx.QueryRowContext(ctx, query) + row := tx.QueryRowContext(ctx, query) + _ = row.Scan() require.NoError(t, err) err = tx.Commit() @@ -337,7 +345,7 @@ func TestQueryWrapper(t *testing.T) { WithLogger(rslogger.NOP), ) qw.since = func(time.Time) time.Duration { - return tc.executionTimeInSec + return tc.executionTime } table := fmt.Sprintf("test_table_%d", uuid.New().ID()) @@ -396,4 +404,48 @@ func TestQueryWrapper(t *testing.T) { }) }) } + + t.Run("wrapper with transaction timeout", func(t *testing.T) { + qw := New( + pgResource.DB, + WithSlowQueryThreshold(queryThreshold), + WithKeyAndValues(keysAndValues...), + WithTransactionTimeout(1*time.Millisecond), + ) + + tx, err := qw.BeginTx(ctx, &sql.TxOptions{}) + require.NoError(t, err) + + time.Sleep(2 * time.Millisecond) + + row := tx.QueryRowContext(ctx, "SELECT 1;") + var i int + err = row.Scan(&i) + require.ErrorIs(t, err, sql.ErrTxDone) + + err = tx.Commit() + require.ErrorIs(t, err, sql.ErrTxDone) + }) + + t.Run("wrapper without transaction timeout", func(t *testing.T) { + qw := New( + pgResource.DB, + WithSlowQueryThreshold(queryThreshold), + WithKeyAndValues(keysAndValues...), + ) + + tx, err := qw.BeginTx(ctx, &sql.TxOptions{}) + require.NoError(t, err) + + time.Sleep(2 * time.Millisecond) + + row := tx.QueryRowContext(ctx, "SELECT 1;") + var i int + err = row.Scan(&i) + require.Equal(t, 1, i) + require.NoError(t, err) + + err = tx.Commit() + require.NoError(t, err) + }) } diff --git a/warehouse/integrations/postgres-legacy/postgres.go b/warehouse/integrations/postgres-legacy/postgres.go index c3614d44c58..ebd73399f6d 100644 --- a/warehouse/integrations/postgres-legacy/postgres.go +++ b/warehouse/integrations/postgres-legacy/postgres.go @@ -24,9 +24,9 @@ import ( "github.com/lib/pq" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/warehouse/client" "github.com/rudderlabs/rudder-server/warehouse/tunnelling" @@ -162,6 +162,7 @@ func (pg *Postgres) getNewMiddleWare(db *sql.DB) *sqlmiddleware.DB { logfield.Schema, pg.Namespace, ), sqlmiddleware.WithSlowQueryThreshold(pg.SlowQueryThreshold), + sqlmiddleware.WithQueryTimeout(pg.ConnectTimeout), ) return middleware } @@ -285,7 +286,7 @@ func (*Postgres) IsEmpty(context.Context, model.Warehouse) (empty bool, err erro func (pg *Postgres) DownloadLoadFiles(ctx context.Context, tableName string) ([]string, error) { objects := pg.Uploader.GetLoadFilesMetadata(ctx, warehouseutils.GetLoadFilesOptions{Table: tableName}) storageProvider := warehouseutils.ObjectStorageType(pg.Warehouse.Destination.DestinationDefinition.Name, pg.Warehouse.Destination.Config, pg.Uploader.UseRudderStorage()) - downloader, err := filemanager.DefaultFileManagerFactory.New(&filemanager.SettingsT{ + downloader, err := filemanager.New(&filemanager.Settings{ Provider: storageProvider, Config: misc.GetObjectStorageConfig(misc.ObjectStorageOptsT{ Provider: storageProvider, @@ -1052,7 +1053,7 @@ func (pg *Postgres) handleExecContext(ctx context.Context, e *QueryParams) (err if e.enableWithQueryPlan { sqlStatement := "EXPLAIN " + e.query - var rows *sql.Rows + var rows *sqlmiddleware.Rows if e.txn != nil { rows, err = e.txn.QueryContext(ctx, sqlStatement) } else if e.db != nil { diff --git a/warehouse/integrations/postgres/postgres.go b/warehouse/integrations/postgres/postgres.go index c16ebfc38b0..1bd20f252c4 100644 --- a/warehouse/integrations/postgres/postgres.go +++ b/warehouse/integrations/postgres/postgres.go @@ -183,6 +183,7 @@ func (pg *Postgres) getNewMiddleWare(db *sql.DB) *sqlmiddleware.DB { logfield.Schema, pg.Namespace, ), sqlmiddleware.WithSlowQueryThreshold(pg.SlowQueryThreshold), + sqlmiddleware.WithQueryTimeout(pg.ConnectTimeout), ) return middleware } diff --git a/warehouse/integrations/redshift/redshift.go b/warehouse/integrations/redshift/redshift.go index df3af1590a1..401da513949 100644 --- a/warehouse/integrations/redshift/redshift.go +++ b/warehouse/integrations/redshift/redshift.go @@ -20,9 +20,9 @@ import ( "github.com/tidwall/gjson" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/warehouse/client" sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" @@ -379,7 +379,7 @@ func (rs *Redshift) generateManifest(ctx context.Context, tableName string) (str panic(err) } defer func() { _ = file.Close() }() - uploader, err := filemanager.DefaultFileManagerFactory.New(&filemanager.SettingsT{ + uploader, err := filemanager.New(&filemanager.Settings{ Provider: warehouseutils.S3, Config: misc.GetObjectStorageConfig(misc.ObjectStorageOptsT{ Provider: warehouseutils.S3, diff --git a/warehouse/integrations/snowflake/snowflake.go b/warehouse/integrations/snowflake/snowflake.go index 0cabe8574ef..002b200d9da 100644 --- a/warehouse/integrations/snowflake/snowflake.go +++ b/warehouse/integrations/snowflake/snowflake.go @@ -995,6 +995,7 @@ func (sf *Snowflake) connect(ctx context.Context, opts optionalCreds) (*sqlmiddl logfield.Schema, sf.Namespace, ), sqlmiddleware.WithSlowQueryThreshold(sf.SlowQueryThreshold), + sqlmiddleware.WithQueryTimeout(sf.ConnectTimeout), sqlmiddleware.WithSecretsRegex(map[string]string{ "AWS_KEY_ID='[^']*'": "AWS_KEY_ID='***'", "AWS_SECRET_KEY='[^']*'": "AWS_SECRET_KEY='***'", @@ -1119,7 +1120,7 @@ func (sf *Snowflake) DownloadIdentityRules(ctx context.Context, gzWriter *misc.G // TODO: Handle case for missing anonymous_id, user_id columns sqlStatement = fmt.Sprintf(`SELECT DISTINCT %s FROM %s.%q LIMIT %d OFFSET %d`, toSelectFields, schemaIdentifier, tableName, batchSize, offset) sf.Logger.Infof("SF: Downloading distinct combinations of anonymous_id, user_id: %s, totalRows: %d", sqlStatement, totalRows) - var rows *sql.Rows + var rows *sqlmiddleware.Rows rows, err = sf.DB.QueryContext(ctx, sqlStatement) if err != nil { return diff --git a/warehouse/integrations/testhelper/staging.go b/warehouse/integrations/testhelper/staging.go index c6f56da3fa8..786f3216be6 100644 --- a/warehouse/integrations/testhelper/staging.go +++ b/warehouse/integrations/testhelper/staging.go @@ -16,7 +16,7 @@ import ( "github.com/google/uuid" "github.com/stretchr/testify/require" - "github.com/rudderlabs/rudder-server/services/filemanager" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-server/utils/misc" warehouseclient "github.com/rudderlabs/rudder-server/warehouse/client" warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" @@ -76,12 +76,12 @@ func prepareStagingFile(t testing.TB, testConfig *TestConfig) string { return gzipFilePath } -func uploadStagingFile(t testing.TB, testConfig *TestConfig, stagingFile string) filemanager.UploadOutput { +func uploadStagingFile(t testing.TB, testConfig *TestConfig, stagingFile string) filemanager.UploadedFile { t.Helper() storageProvider := warehouseutils.ObjectStorageType(testConfig.DestinationType, testConfig.Config, false) - fm, err := filemanager.DefaultFileManagerFactory.New(&filemanager.SettingsT{ + fm, err := filemanager.New(&filemanager.Settings{ Provider: storageProvider, Config: misc.GetObjectStorageConfig(misc.ObjectStorageOptsT{ Provider: storageProvider, @@ -98,7 +98,7 @@ func uploadStagingFile(t testing.TB, testConfig *TestConfig, stagingFile string) require.NoError(t, err) defer func() { _ = f.Close() }() - var uploadOutput filemanager.UploadOutput + var uploadOutput filemanager.UploadedFile err = WithConstantRetries(func() error { if uploadOutput, err = fm.Upload(context.Background(), f, keyPrefixes...); err != nil { @@ -112,7 +112,7 @@ func uploadStagingFile(t testing.TB, testConfig *TestConfig, stagingFile string) return uploadOutput } -func prepareStagingPayload(t testing.TB, testConfig *TestConfig, stagingFile string, uploadOutput filemanager.UploadOutput) warehouseclient.StagingFile { +func prepareStagingPayload(t testing.TB, testConfig *TestConfig, stagingFile string, uploadOutput filemanager.UploadedFile) warehouseclient.StagingFile { t.Helper() type StagingEvent struct { diff --git a/warehouse/internal/loadfiles/loadfiles.go b/warehouse/internal/loadfiles/loadfiles.go index dc93c32e872..57655517852 100644 --- a/warehouse/internal/loadfiles/loadfiles.go +++ b/warehouse/internal/loadfiles/loadfiles.go @@ -119,17 +119,16 @@ func WithConfig(ld *LoadFileGenerator, config *config.Config) { // CreateLoadFiles for the staging files that have not been successfully processed. func (lf *LoadFileGenerator) CreateLoadFiles(ctx context.Context, job *model.UploadJob) (int64, int64, error) { - stagingFiles := job.StagingFiles - - var toProcessStagingFiles []*model.StagingFile - // skip processing staging files marked succeeded - for _, stagingFile := range stagingFiles { - if stagingFile.Status != warehouseutils.StagingFileSucceededState { - toProcessStagingFiles = append(toProcessStagingFiles, stagingFile) - } - } - - return lf.createFromStaging(ctx, job, toProcessStagingFiles) + return lf.createFromStaging( + ctx, + job, + lo.Filter( + job.StagingFiles, + func(stagingFile *model.StagingFile, _ int) bool { + return stagingFile.Status != warehouseutils.StagingFileSucceededState + }, + ), + ) } // ForceCreateLoadFiles creates load files for the staging files, regardless if they are already successfully processed. @@ -161,22 +160,29 @@ func (lf *LoadFileGenerator) createFromStaging(ctx context.Context, job *model.U return 0, 0, fmt.Errorf("populating destination revision ID: %w", err) } + // Delete previous load files for the staging files stagingFileIDs := repo.StagingFileIDs(toProcessStagingFiles) - err = lf.LoadRepo.DeleteByStagingFiles(ctx, stagingFileIDs) - if err != nil { + if err := lf.LoadRepo.DeleteByStagingFiles(ctx, stagingFileIDs); err != nil { return 0, 0, fmt.Errorf("deleting previous load files: %w", err) } - err = lf.StageRepo.SetStatuses(ctx, stagingFileIDs, warehouseutils.StagingFileExecutingState) - if err != nil { + // Set staging file status to executing + if err := lf.StageRepo.SetStatuses( + ctx, + stagingFileIDs, + warehouseutils.StagingFileExecutingState, + ); err != nil { return 0, 0, fmt.Errorf("set staging file status to executing: %w", err) } defer func() { // ensure that if there is an error, we set the staging file status to failed if err != nil { - errStatus := lf.StageRepo.SetStatuses(ctx, stagingFileIDs, warehouseutils.StagingFileFailedState) - if errStatus != nil { + if errStatus := lf.StageRepo.SetStatuses( + ctx, + stagingFileIDs, + warehouseutils.StagingFileFailedState, + ); errStatus != nil { err = fmt.Errorf("%w, and also: %v", err, errStatus) } } @@ -185,8 +191,7 @@ func (lf *LoadFileGenerator) createFromStaging(ctx context.Context, job *model.U var g errgroup.Group var sampleError error - chunks := lo.Chunk(toProcessStagingFiles, publishBatchSize) - for _, chunk := range chunks { + for _, chunk := range lo.Chunk(toProcessStagingFiles, publishBatchSize) { // td : add prefix to payload for s3 dest var messages []pgnotifier.JobPayload for _, stagingFile := range chunk { @@ -295,12 +300,10 @@ func (lf *LoadFileGenerator) createFromStaging(ctx context.Context, job *model.U return nil } - err = lf.LoadRepo.Insert(ctx, loadFiles) - if err != nil { + if err = lf.LoadRepo.Insert(ctx, loadFiles); err != nil { return fmt.Errorf("inserting load files: %w", err) } - err = lf.StageRepo.SetStatuses(ctx, successfulStagingFileIDs, warehouseutils.StagingFileSucceededState) - if err != nil { + if err = lf.StageRepo.SetStatuses(ctx, successfulStagingFileIDs, warehouseutils.StagingFileSucceededState); err != nil { return fmt.Errorf("setting staging file status to succeeded: %w", err) } return nil diff --git a/warehouse/internal/repo/load.go b/warehouse/internal/repo/load.go index 141143cbdc3..71e2fb0da54 100644 --- a/warehouse/internal/repo/load.go +++ b/warehouse/internal/repo/load.go @@ -9,6 +9,7 @@ import ( "github.com/lib/pq" "github.com/rudderlabs/rudder-server/utils/timeutil" + sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" "github.com/rudderlabs/rudder-server/warehouse/internal/model" warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) @@ -30,7 +31,7 @@ const ( type LoadFiles repo -func NewLoadFiles(db *sql.DB, opts ...Opt) *LoadFiles { +func NewLoadFiles(db *sqlmiddleware.DB, opts ...Opt) *LoadFiles { r := &LoadFiles{ db: db, now: timeutil.Now, @@ -66,7 +67,21 @@ func (repo *LoadFiles) Insert(ctx context.Context, loadFiles []model.LoadFile) ( return } - stmt, err := txn.PrepareContext(ctx, pq.CopyIn("wh_load_files", "staging_file_id", "location", "source_id", "destination_id", "destination_type", "table_name", "total_events", "created_at", "metadata")) + stmt, err := txn.PrepareContext( + ctx, + pq.CopyIn( + "wh_load_files", + "staging_file_id", + "location", + "source_id", + "destination_id", + "destination_type", + "table_name", + "total_events", + "created_at", + "metadata", + ), + ) if err != nil { return fmt.Errorf(`inserting load files: CopyIn: %w`, err) } diff --git a/warehouse/internal/repo/repo.go b/warehouse/internal/repo/repo.go index bec3bac77ce..caf027d2d31 100644 --- a/warehouse/internal/repo/repo.go +++ b/warehouse/internal/repo/repo.go @@ -1,12 +1,13 @@ package repo import ( - "database/sql" "time" + + sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" ) type repo struct { - db *sql.DB + db *sqlmiddleware.DB now func() time.Time } diff --git a/warehouse/internal/repo/schema.go b/warehouse/internal/repo/schema.go index b5670d5829b..21717a3f221 100644 --- a/warehouse/internal/repo/schema.go +++ b/warehouse/internal/repo/schema.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/rudderlabs/rudder-server/utils/timeutil" + sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" "github.com/rudderlabs/rudder-server/warehouse/internal/model" warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) @@ -27,7 +28,7 @@ const whSchemaTableColumns = ` type WHSchema repo -func NewWHSchemas(db *sql.DB, opts ...Opt) *WHSchema { +func NewWHSchemas(db *sqlmiddleware.DB, opts ...Opt) *WHSchema { r := &WHSchema{ db: db, now: timeutil.Now, @@ -112,7 +113,7 @@ func (repo *WHSchema) GetForNamespace(ctx context.Context, sourceID, destID, nam return *entries[0], err } -func (*WHSchema) parseRows(rows *sql.Rows) ([]*model.WHSchema, error) { +func (*WHSchema) parseRows(rows *sqlmiddleware.Rows) ([]*model.WHSchema, error) { var whSchemas []*model.WHSchema defer func() { _ = rows.Close() }() diff --git a/warehouse/internal/repo/staging.go b/warehouse/internal/repo/staging.go index da55ba1c05a..c6f420ce9f0 100644 --- a/warehouse/internal/repo/staging.go +++ b/warehouse/internal/repo/staging.go @@ -11,6 +11,7 @@ import ( "github.com/lib/pq" "github.com/rudderlabs/rudder-server/utils/timeutil" + sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" "github.com/rudderlabs/rudder-server/warehouse/internal/model" warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) @@ -50,9 +51,9 @@ type metadataSchema struct { } func StagingFileIDs(stagingFiles []*model.StagingFile) []int64 { - var stagingFileIDs []int64 - for _, stagingFile := range stagingFiles { - stagingFileIDs = append(stagingFileIDs, stagingFile.ID) + stagingFileIDs := make([]int64, len(stagingFiles)) + for i, stagingFile := range stagingFiles { + stagingFileIDs[i] = stagingFile.ID } return stagingFileIDs } @@ -71,7 +72,7 @@ func metadataFromStagingFile(stagingFile *model.StagingFile) metadataSchema { } } -func NewStagingFiles(db *sql.DB, opts ...Opt) *StagingFiles { +func NewStagingFiles(db *sqlmiddleware.DB, opts ...Opt) *StagingFiles { r := &StagingFiles{ db: db, now: timeutil.Now, @@ -168,7 +169,7 @@ func (repo *StagingFiles) Insert(ctx context.Context, stagingFile *model.Staging } // praseRow is a helper for mapping a row of tableColumns to a model.StagingFile. -func (*StagingFiles) parseRows(rows *sql.Rows) ([]*model.StagingFile, error) { +func (*StagingFiles) parseRows(rows *sqlmiddleware.Rows) ([]*model.StagingFile, error) { var stagingFiles []*model.StagingFile defer func() { _ = rows.Close() }() @@ -461,7 +462,7 @@ func (repo *StagingFiles) DestinationRevisionIDs(ctx context.Context, upload mod return revisionIDs, nil } -func (repo *StagingFiles) SetStatuses(ctx context.Context, ids []int64, status string) (err error) { +func (repo *StagingFiles) SetStatuses(ctx context.Context, ids []int64, status string) error { if len(ids) == 0 { return fmt.Errorf("no staging files to update") } @@ -488,7 +489,7 @@ func (repo *StagingFiles) SetStatuses(ctx context.Context, ids []int64, status s return fmt.Errorf("not all rows were updated: %d != %d", rowsAffected, len(ids)) } - return + return nil } func (repo *StagingFiles) SetErrorStatus(ctx context.Context, stagingFileID int64, stageFileErr error) error { @@ -513,7 +514,6 @@ func (repo *StagingFiles) SetErrorStatus(ctx context.Context, stagingFileID int6 if err != nil { return fmt.Errorf("update staging file with error: %w", err) } - rowsAffected, err := result.RowsAffected() if err != nil { return fmt.Errorf("rows affected: %w", err) @@ -521,6 +521,5 @@ func (repo *StagingFiles) SetErrorStatus(ctx context.Context, stagingFileID int6 if rowsAffected == 0 { return fmt.Errorf("no rows affected") } - return nil } diff --git a/warehouse/internal/repo/staging_test.go b/warehouse/internal/repo/staging_test.go index 6971ed69736..9d93ac12967 100644 --- a/warehouse/internal/repo/staging_test.go +++ b/warehouse/internal/repo/staging_test.go @@ -2,7 +2,6 @@ package repo_test import ( "context" - "database/sql" "fmt" "testing" "time" @@ -12,12 +11,13 @@ import ( "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource" migrator "github.com/rudderlabs/rudder-server/services/sql-migrator" + sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" "github.com/rudderlabs/rudder-server/warehouse/internal/model" "github.com/rudderlabs/rudder-server/warehouse/internal/repo" warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) -func setupDB(t testing.TB) *sql.DB { +func setupDB(t testing.TB) *sqlmiddleware.DB { pool, err := dockertest.NewPool("") require.NoError(t, err) @@ -32,7 +32,7 @@ func setupDB(t testing.TB) *sql.DB { t.Log("db:", pgResource.DBDsn) - return pgResource.DB + return sqlmiddleware.New(pgResource.DB) } func TestStagingFileRepo(t *testing.T) { diff --git a/warehouse/internal/repo/table_upload.go b/warehouse/internal/repo/table_upload.go index 2c3594c0c2c..b97ac6dee7e 100644 --- a/warehouse/internal/repo/table_upload.go +++ b/warehouse/internal/repo/table_upload.go @@ -11,6 +11,7 @@ import ( "github.com/lib/pq" "github.com/rudderlabs/rudder-server/utils/timeutil" + sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" "github.com/rudderlabs/rudder-server/warehouse/internal/model" warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) @@ -44,7 +45,7 @@ type TableUploadSetOptions struct { TotalEvents *int64 } -func NewTableUploads(db *sql.DB, opts ...Opt) *TableUploads { +func NewTableUploads(db *sqlmiddleware.DB, opts ...Opt) *TableUploads { r := &TableUploads{ db: db, now: timeutil.Now, @@ -57,7 +58,7 @@ func NewTableUploads(db *sql.DB, opts ...Opt) *TableUploads { func (repo *TableUploads) Insert(ctx context.Context, uploadID int64, tableNames []string) error { var ( - txn *sql.Tx + txn *sqlmiddleware.Tx stmt *sql.Stmt err error ) @@ -137,7 +138,7 @@ func (repo *TableUploads) GetByUploadIDAndTableName(ctx context.Context, uploadI return entries[0], err } -func (*TableUploads) parseRows(rows *sql.Rows) ([]model.TableUpload, error) { +func (*TableUploads) parseRows(rows *sqlmiddleware.Rows) ([]model.TableUpload, error) { var tableUploads []model.TableUpload defer func() { _ = rows.Close() }() diff --git a/warehouse/internal/repo/upload.go b/warehouse/internal/repo/upload.go index 189ef49a749..9db8054db7b 100644 --- a/warehouse/internal/repo/upload.go +++ b/warehouse/internal/repo/upload.go @@ -7,12 +7,14 @@ import ( "fmt" "time" + "github.com/tidwall/gjson" + "github.com/rudderlabs/rudder-server/utils/timeutil" jsoniter "github.com/json-iterator/go" "github.com/lib/pq" - "github.com/tidwall/gjson" + sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" "github.com/rudderlabs/rudder-server/warehouse/internal/model" warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) @@ -67,7 +69,7 @@ type UploadMetadata struct { NextRetryTime time.Time `json:"nextRetryTime"` } -func NewUploads(db *sql.DB, opts ...Opt) *Uploads { +func NewUploads(db *sqlmiddleware.DB, opts ...Opt) *Uploads { u := &Uploads{ db: db, now: timeutil.Now, @@ -137,9 +139,8 @@ func (uploads *Uploads) CreateWithStagingFiles(ctx context.Context, upload model } }() - row := tx.QueryRowContext( - ctx, - + var uploadID int64 + err = tx.QueryRow( `INSERT INTO `+uploadsTableName+` ( source_id, namespace, workspace_id, destination_id, destination_type, start_staging_file_id, @@ -171,15 +172,12 @@ func (uploads *Uploads) CreateWithStagingFiles(ctx context.Context, upload model lastEventAt, uploads.now(), uploads.now(), - ) - - var uploadID int64 - err = row.Scan(&uploadID) + ).Scan(&uploadID) if err != nil { return 0, err } - result, err := tx.ExecContext(ctx, + result, err := tx.Exec( `UPDATE `+stagingTableName+` SET upload_id = $1 WHERE id = ANY($2)`, uploadID, pq.Array(stagingFileIDs), ) @@ -302,7 +300,7 @@ func (uploads *Uploads) GetToProcess(ctx context.Context, destType string, limit ) var ( - rows *sql.Rows + rows *sqlmiddleware.Rows err error ) if opts.SkipWorkspaces == nil { diff --git a/warehouse/internal/repo/upload_test.go b/warehouse/internal/repo/upload_test.go index d0ce8b1785b..2286ff8801e 100644 --- a/warehouse/internal/repo/upload_test.go +++ b/warehouse/internal/repo/upload_test.go @@ -2,7 +2,6 @@ package repo_test import ( "context" - "database/sql" "encoding/json" "fmt" "sync/atomic" @@ -11,6 +10,7 @@ import ( "github.com/stretchr/testify/require" + sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" "github.com/rudderlabs/rudder-server/warehouse/internal/model" "github.com/rudderlabs/rudder-server/warehouse/internal/repo" warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" @@ -267,7 +267,7 @@ func TestUploads_GetToProcess(t *testing.T) { ctx = context.Background() ) - prepareUpload := func(db *sql.DB, sourceID string, status model.UploadStatus, priority int, now, nextRetryTime time.Time) model.Upload { + prepareUpload := func(db *sqlmiddleware.DB, sourceID string, status model.UploadStatus, priority int, now, nextRetryTime time.Time) model.Upload { stagingFileID := int64(0) repoUpload := repo.NewUploads(db, repo.WithNow(func() time.Time { return now diff --git a/warehouse/internal/service/loadfiles/downloader/downloader.go b/warehouse/internal/service/loadfiles/downloader/downloader.go index 47ffbb41826..b8919fce0f0 100644 --- a/warehouse/internal/service/loadfiles/downloader/downloader.go +++ b/warehouse/internal/service/loadfiles/downloader/downloader.go @@ -12,7 +12,7 @@ import ( "golang.org/x/sync/errgroup" - "github.com/rudderlabs/rudder-server/services/filemanager" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-server/utils/misc" warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" ) @@ -54,7 +54,7 @@ func (l *downloaderImpl) Download(ctx context.Context, tableName string) ([]stri l.uploader.UseRudderStorage(), ) - fileManager, err := filemanager.DefaultFileManagerFactory.New(&filemanager.SettingsT{ + fileManager, err := filemanager.New(&filemanager.Settings{ Provider: storageProvider, Config: misc.GetObjectStorageConfig(misc.ObjectStorageOptsT{ Provider: storageProvider, diff --git a/warehouse/internal/service/loadfiles/downloader/downloader_test.go b/warehouse/internal/service/loadfiles/downloader/downloader_test.go index 1ea041fd6cd..1baa20adff5 100644 --- a/warehouse/internal/service/loadfiles/downloader/downloader_test.go +++ b/warehouse/internal/service/loadfiles/downloader/downloader_test.go @@ -16,8 +16,8 @@ import ( "github.com/ory/dockertest/v3" "github.com/stretchr/testify/require" + "github.com/rudderlabs/rudder-go-kit/filemanager" backendconfig "github.com/rudderlabs/rudder-server/backend-config" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/testhelper/destination" "github.com/rudderlabs/rudder-server/utils/misc" warehouseutils "github.com/rudderlabs/rudder-server/warehouse/utils" @@ -143,8 +143,7 @@ func TestDownloader(t *testing.T) { conf[k] = v } - fmFactory := filemanager.FileManagerFactoryT{} - fm, err := fmFactory.New(&filemanager.SettingsT{ + fm, err := filemanager.New(&filemanager.Settings{ Provider: provider, Config: conf, }) diff --git a/warehouse/schema.go b/warehouse/schema.go index 3933efcbef3..384de8918f9 100644 --- a/warehouse/schema.go +++ b/warehouse/schema.go @@ -2,7 +2,6 @@ package warehouse import ( "context" - "database/sql" "errors" "fmt" "reflect" @@ -15,6 +14,7 @@ import ( "github.com/rudderlabs/rudder-go-kit/config" "github.com/rudderlabs/rudder-go-kit/logger" + "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" "github.com/rudderlabs/rudder-server/warehouse/internal/repo" "github.com/rudderlabs/rudder-server/warehouse/internal/model" @@ -60,7 +60,7 @@ type Schema struct { } func NewSchema( - db *sql.DB, + db *sqlquerywrapper.DB, warehouse model.Warehouse, conf *config.Config, ) *Schema { diff --git a/warehouse/slave.go b/warehouse/slave.go index 751bf18ae06..0cdd122b5e2 100644 --- a/warehouse/slave.go +++ b/warehouse/slave.go @@ -18,8 +18,8 @@ import ( "golang.org/x/sync/errgroup" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/stats" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/services/pgnotifier" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/utils/timeutil" @@ -112,7 +112,7 @@ func (job *Payload) sendDownloadStagingFileFailedStat() { // Get fileManager func (job *Payload) getFileManager(config interface{}, useRudderStorage bool) (filemanager.FileManager, error) { storageProvider := warehouseutils.ObjectStorageType(job.DestinationType, config, useRudderStorage) - fileManager, err := filemanager.DefaultFileManagerFactory.New(&filemanager.SettingsT{ + fileManager, err := filemanager.New(&filemanager.Settings{ Provider: storageProvider, Config: misc.GetObjectStorageConfig(misc.ObjectStorageOptsT{ Provider: storageProvider, @@ -293,16 +293,16 @@ func (jobRun *JobRun) uploadLoadFilesToObjectStorage(ctx context.Context) ([]loa } } -func (jobRun *JobRun) uploadLoadFileToObjectStorage(ctx context.Context, uploader filemanager.FileManager, uploadFile encoding.LoadFileWriter, tableName string) (filemanager.UploadOutput, error) { +func (jobRun *JobRun) uploadLoadFileToObjectStorage(ctx context.Context, uploader filemanager.FileManager, uploadFile encoding.LoadFileWriter, tableName string) (filemanager.UploadedFile, error) { job := jobRun.job file, err := os.Open(uploadFile.GetLoadFile().Name()) // opens file in read mode if err != nil { pkgLogger.Errorf("[WH]: Failed to Open File: %s", uploadFile.GetLoadFile().Name()) - return filemanager.UploadOutput{}, err + return filemanager.UploadedFile{}, err } defer file.Close() pkgLogger.Debugf("[WH]: %s: Uploading load_file to %s for table: %s with staging_file id: %v", job.DestinationType, warehouseutils.ObjectStorageType(job.DestinationType, job.DestinationConfig, job.UseRudderStorage), tableName, job.StagingFileID) - var uploadLocation filemanager.UploadOutput + var uploadLocation filemanager.UploadedFile if slices.Contains(warehouseutils.TimeWindowDestinations, job.DestinationType) { uploadLocation, err = uploader.Upload(ctx, file, warehouseutils.GetTablePathInObjectStorage(jobRun.job.DestinationNamespace, tableName), job.LoadFilePrefix) } else { @@ -653,6 +653,9 @@ func runAsyncJob(ctx context.Context, asyncjob jobs.AsyncJobPayload) (AsyncJobRu if err != nil { return AsyncJobRunResult{Id: asyncjob.Id, Result: false}, err } + whManager.SetConnectionTimeout(warehouseutils.GetConnectionTimeout( + destType, warehouse.Destination.ID, + )) err = whManager.Setup(ctx, warehouse, whasyncjob) if err != nil { return AsyncJobRunResult{Id: asyncjob.Id, Result: false}, err diff --git a/warehouse/stats.go b/warehouse/stats.go index 96ae33f7a78..6381725ea80 100644 --- a/warehouse/stats.go +++ b/warehouse/stats.go @@ -2,7 +2,6 @@ package warehouse import ( "fmt" - "strconv" "strings" "time" @@ -135,7 +134,7 @@ func (job *UploadJob) generateUploadSuccessMetrics() { return } - numStagedEvents, err = repo.NewStagingFiles(dbHandle).TotalEventsForUpload( + numStagedEvents, err = repo.NewStagingFiles(wrappedDBHandle).TotalEventsForUpload( job.ctx, job.upload, ) @@ -153,12 +152,7 @@ func (job *UploadJob) generateUploadSuccessMetrics() { job.counterStat("total_rows_synced").Count(int(numUploadedEvents)) job.counterStat("num_staged_events").Count(int(numStagedEvents)) - - attempts := job.getAttemptNumber() - job.counterStat("upload_success", Tag{ - Name: "attempt_number", - Value: strconv.Itoa(attempts), - }).Count(1) + job.counterStat("upload_success").Count(1) } func (job *UploadJob) generateUploadAbortedMetrics() { @@ -184,7 +178,7 @@ func (job *UploadJob) generateUploadAbortedMetrics() { return } - numStagedEvents, err = repo.NewStagingFiles(dbHandle).TotalEventsForUpload( + numStagedEvents, err = repo.NewStagingFiles(wrappedDBHandle).TotalEventsForUpload( job.ctx, job.upload, ) @@ -228,7 +222,7 @@ func (job *UploadJob) recordTableLoad(tableName string, numEvents int64) { Value: strings.ToLower(tableName), }).Count(int(numEvents)) // Delay for the oldest event in the batch - firstEventAt, err := repo.NewStagingFiles(dbHandle).FirstEventForUpload(job.ctx, job.upload) + firstEventAt, err := repo.NewStagingFiles(wrappedDBHandle).FirstEventForUpload(job.ctx, job.upload) if err != nil { pkgLogger.Errorf("[WH]: Failed to generate delay metrics: %s, Err: %v", job.warehouse.Identifier, err) return @@ -256,6 +250,7 @@ func (job *UploadJob) recordLoadFileGenerationTimeStat(startID, endID int64) (er var timeTakenInS time.Duration err = job.dbHandle.QueryRowContext(job.ctx, stmt).Scan(&timeTakenInS) if err != nil { + pkgLogger.Errorf("[WH]: Failed to generate load file generation time stat: %s, Err: %v", job.warehouse.Identifier, err) return } job.timerStat("load_file_generation_time").SendTiming(timeTakenInS * time.Second) diff --git a/warehouse/stats_test.go b/warehouse/stats_test.go index 8afa996180a..dcb299e46f4 100644 --- a/warehouse/stats_test.go +++ b/warehouse/stats_test.go @@ -4,6 +4,7 @@ import ( "context" "os" + sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" "github.com/rudderlabs/rudder-server/warehouse/internal/repo" "github.com/golang/mock/gomock" @@ -73,7 +74,7 @@ var _ = Describe("Stats", Ordered, func() { Type: "POSTGRES", }, stats: mockStats, - tableUploadsRepo: repo.NewTableUploads(pgResource.DB), + tableUploadsRepo: repo.NewTableUploads(sqlmiddleware.New(pgResource.DB)), ctx: context.Background(), } }) @@ -103,7 +104,7 @@ var _ = Describe("Stats", Ordered, func() { Type: "POSTGRES", }, stats: mockStats, - tableUploadsRepo: repo.NewTableUploads(pgResource.DB), + tableUploadsRepo: repo.NewTableUploads(sqlmiddleware.New(pgResource.DB)), ctx: context.Background(), } }) @@ -149,7 +150,7 @@ var _ = Describe("Stats", Ordered, func() { warehouse: model.Warehouse{ Type: "POSTGRES", }, - dbHandle: pgResource.DB, + dbHandle: sqlmiddleware.New(pgResource.DB), stats: mockStats, ctx: context.Background(), } diff --git a/warehouse/upload.go b/warehouse/upload.go index d828ade0a19..7dd974f185d 100644 --- a/warehouse/upload.go +++ b/warehouse/upload.go @@ -20,13 +20,13 @@ import ( "github.com/rudderlabs/rudder-server/services/alerta" schemarepository "github.com/rudderlabs/rudder-server/warehouse/integrations/datalake/schema-repository" + sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" "github.com/rudderlabs/rudder-server/warehouse/integrations/manager" "golang.org/x/exp/slices" "github.com/cenkalti/backoff/v4" - "github.com/tidwall/gjson" "github.com/rudderlabs/rudder-go-kit/config" "github.com/rudderlabs/rudder-go-kit/stats" @@ -79,7 +79,7 @@ type UploadJobFactory struct { type UploadJob struct { ctx context.Context - dbHandle *sql.DB + dbHandle *sqlmiddleware.DB destinationValidator validations.DestinationValidator loadfile *loadfiles.LoadFileGenerator tableUploadsRepo *repo.TableUploads @@ -174,24 +174,32 @@ func setMaxParallelLoads() { } func (f *UploadJobFactory) NewUploadJob(ctx context.Context, dto *model.UploadJob, whManager manager.Manager) *UploadJob { + wrappedDBHandle := sqlmiddleware.New( + f.dbHandle, + sqlmiddleware.WithQueryTimeout(dbHanndleTimeout), + ) return &UploadJob{ ctx: ctx, - dbHandle: f.dbHandle, + dbHandle: wrappedDBHandle, loadfile: f.loadFile, recovery: f.recovery, pgNotifier: f.pgNotifier, whManager: whManager, destinationValidator: f.destinationValidator, stats: f.stats, - tableUploadsRepo: repo.NewTableUploads(f.dbHandle), - schemaHandle: NewSchema(f.dbHandle, dto.Warehouse, config.Default), + tableUploadsRepo: repo.NewTableUploads(wrappedDBHandle), + schemaHandle: NewSchema( + wrappedDBHandle, + dto.Warehouse, + config.Default, + ), upload: dto.Upload, warehouse: dto.Warehouse, stagingFiles: dto.StagingFiles, stagingFileIDs: repo.StagingFileIDs(dto.StagingFiles), - pendingTableUploadsRepo: repo.NewUploads(f.dbHandle), + pendingTableUploadsRepo: repo.NewUploads(wrappedDBHandle), pendingTableUploads: []model.PendingTableUpload{}, refreshPartitionBatchSize: config.GetInt("Warehouse.refreshPartitionBatchSize", 100), @@ -247,11 +255,10 @@ func (job *UploadJob) trackLongRunningUpload() chan struct{} { } func (job *UploadJob) generateUploadSchema() error { - err := job.schemaHandle.prepareUploadSchema( + if err := job.schemaHandle.prepareUploadSchema( job.ctx, job.stagingFiles, - ) - if err != nil { + ); err != nil { return fmt.Errorf("consolidate staging files schema using warehouse schema: %w", err) } @@ -284,13 +291,10 @@ func (job *UploadJob) initTableUploads() error { } func (job *UploadJob) syncRemoteSchema() (bool, error) { - err := job.schemaHandle.fetchSchemaFromLocal(job.ctx) - if err != nil { + if err := job.schemaHandle.fetchSchemaFromLocal(job.ctx); err != nil { return false, fmt.Errorf("fetching schema from local: %w", err) } - - err = job.schemaHandle.fetchSchemaFromWarehouse(job.ctx, job.whManager) - if err != nil { + if err := job.schemaHandle.fetchSchemaFromWarehouse(job.ctx, job.whManager); err != nil { return false, fmt.Errorf("fetching schema from warehouse: %w", err) } @@ -305,8 +309,7 @@ func (job *UploadJob) syncRemoteSchema() (bool, error) { logfield.Namespace, job.warehouse.Namespace, ) - err = job.schemaHandle.updateLocalSchema(job.ctx, job.upload.ID, job.schemaHandle.schemaInWarehouse) - if err != nil { + if err := job.schemaHandle.updateLocalSchema(job.ctx, job.upload.ID, job.schemaHandle.schemaInWarehouse); err != nil { return false, fmt.Errorf("updating local schema: %w", err) } } @@ -314,7 +317,7 @@ func (job *UploadJob) syncRemoteSchema() (bool, error) { return schemaChanged, nil } -func (job *UploadJob) getTotalRowsInLoadFiles() int64 { +func (job *UploadJob) getTotalRowsInLoadFiles(ctx context.Context) int64 { var total sql.NullInt64 sqlStatement := fmt.Sprintf(` @@ -344,19 +347,18 @@ func (job *UploadJob) getTotalRowsInLoadFiles() int64 { misc.IntArrayToString(job.stagingFileIDs, ","), warehouseutils.ToProviderCase(job.warehouse.Type, warehouseutils.DiscardsTable), ) - err := dbHandle.QueryRowContext(job.ctx, sqlStatement).Scan(&total) - if err != nil { + if err := wrappedDBHandle.QueryRowContext(ctx, sqlStatement).Scan(&total); err != nil { pkgLogger.Errorf(`Error in getTotalRowsInLoadFiles: %v`, err) } return total.Int64 } func (job *UploadJob) matchRowsInStagingAndLoadFiles(ctx context.Context) error { - rowsInStagingFiles, err := repo.NewStagingFiles(dbHandle).TotalEventsForUpload(ctx, job.upload) + rowsInStagingFiles, err := repo.NewStagingFiles(wrappedDBHandle).TotalEventsForUpload(ctx, job.upload) if err != nil { return fmt.Errorf("total rows: %w", err) } - rowsInLoadFiles := job.getTotalRowsInLoadFiles() + rowsInLoadFiles := job.getTotalRowsInLoadFiles(ctx) if (rowsInStagingFiles != rowsInLoadFiles) || rowsInStagingFiles == 0 || rowsInLoadFiles == 0 { pkgLogger.Errorf(`Error: Rows count mismatch between staging and load files for upload:%d. rowsInStagingFiles: %d, rowsInLoadFiles: %d`, job.upload.ID, rowsInStagingFiles, rowsInLoadFiles) job.guageStat("warehouse_staging_load_file_events_count_mismatched").Gauge(rowsInStagingFiles - rowsInLoadFiles) @@ -386,6 +388,9 @@ func (job *UploadJob) run() (err error) { } whManager := job.whManager + whManager.SetConnectionTimeout(warehouseutils.GetConnectionTimeout( + job.warehouse.Type, job.warehouse.Destination.ID, + )) err = whManager.Setup(job.ctx, job.warehouse, job) if err != nil { _, _ = job.setUploadError(err, InternalProcessingFailed) @@ -393,8 +398,7 @@ func (job *UploadJob) run() (err error) { } defer whManager.Cleanup(job.ctx) - err = job.recovery.Recover(job.ctx, whManager, job.warehouse) - if err != nil { + if err = job.recovery.Recover(job.ctx, whManager, job.warehouse); err != nil { _, _ = job.setUploadError(err, InternalProcessingFailed) return err } @@ -466,13 +470,11 @@ func (job *UploadJob) run() (err error) { break } - err = job.setLoadFileIDs(startLoadFileID, endLoadFileID) - if err != nil { + if err = job.setLoadFileIDs(startLoadFileID, endLoadFileID); err != nil { break } - err = job.matchRowsInStagingAndLoadFiles(job.ctx) - if err != nil { + if err = job.matchRowsInStagingAndLoadFiles(job.ctx); err != nil { break } @@ -483,13 +485,12 @@ func (job *UploadJob) run() (err error) { case model.UpdatedTableUploadsCounts: newStatus = nextUploadState.failed for tableName := range job.upload.UploadSchema { - err = job.tableUploadsRepo.PopulateTotalEventsFromStagingFileIDs( + if err = job.tableUploadsRepo.PopulateTotalEventsFromStagingFileIDs( job.ctx, job.upload.ID, tableName, job.stagingFileIDs, - ) - if err != nil { + ); err != nil { err = fmt.Errorf("populate table uploads total events from staging file: %w", err) break } @@ -502,8 +503,7 @@ func (job *UploadJob) run() (err error) { case model.CreatedRemoteSchema: newStatus = nextUploadState.failed if len(job.schemaHandle.schemaInWarehouse) == 0 { - err = whManager.CreateSchema(job.ctx) - if err != nil { + if err = whManager.CreateSchema(job.ctx); err != nil { break } } @@ -620,7 +620,7 @@ func (job *UploadJob) run() (err error) { uploadStatusOpts := UploadStatusOpts{Status: newStatus} if newStatus == model.ExportedData { - rowCount, _ := repo.NewStagingFiles(dbHandle).TotalEventsForUpload(job.ctx, job.upload) + rowCount, _ := repo.NewStagingFiles(wrappedDBHandle).TotalEventsForUpload(job.ctx, job.upload) reportingMetric := types.PUReportedMetric{ ConnectionDetails: types.ConnectionDetails{ @@ -754,23 +754,21 @@ func (job *UploadJob) TablesToSkip() (map[string]model.PendingTableUpload, map[s func (job *UploadJob) resolveIdentities(populateHistoricIdentities bool) (err error) { idr := identity.New( - job.ctx, job.warehouse, - job.dbHandle, + wrappedDBHandle, job, job.upload.ID, job.whManager, downloader.NewDownloader(&job.warehouse, job, 8), ) if populateHistoricIdentities { - return idr.ResolveHistoricIdentities() + return idr.ResolveHistoricIdentities(job.ctx) } - return idr.Resolve() + return idr.Resolve(job.ctx) } func (job *UploadJob) UpdateTableSchema(tName string, tableSchemaDiff warehouseutils.TableSchemaDiff) (err error) { pkgLogger.Infof(`[WH]: Starting schema update for table %s in namespace %s of destination %s:%s`, tName, job.warehouse.Namespace, job.warehouse.Type, job.warehouse.Destination.ID) - if tableSchemaDiff.TableToBeCreated { err = job.whManager.CreateTable(job.ctx, tName, tableSchemaDiff.ColumnMap) if err != nil { @@ -781,18 +779,18 @@ func (job *UploadJob) UpdateTableSchema(tName string, tableSchemaDiff warehouseu return nil } - if err = job.addColumnsToWarehouse(tName, tableSchemaDiff.ColumnMap); err != nil { + if err = job.addColumnsToWarehouse(job.ctx, tName, tableSchemaDiff.ColumnMap); err != nil { return fmt.Errorf("adding columns to warehouse: %w", err) } - if err = job.alterColumnsToWarehouse(tName, tableSchemaDiff.AlteredColumnMap); err != nil { + if err = job.alterColumnsToWarehouse(job.ctx, tName, tableSchemaDiff.AlteredColumnMap); err != nil { return fmt.Errorf("altering columns to warehouse: %w", err) } return nil } -func (job *UploadJob) alterColumnsToWarehouse(tName string, columnsMap model.TableSchema) error { +func (job *UploadJob) alterColumnsToWarehouse(ctx context.Context, tName string, columnsMap model.TableSchema) error { if job.DisableAlter { pkgLogger.Debugw("skipping alter columns to warehouse", logfield.SourceID, job.warehouse.Source.ID, @@ -805,12 +803,11 @@ func (job *UploadJob) alterColumnsToWarehouse(tName string, columnsMap model.Tab ) return nil } - var responseToAlerta []model.AlterTableResponse var errs []error for columnName, columnType := range columnsMap { - res, err := job.whManager.AlterColumn(job.ctx, tName, columnName, columnType) + res, err := job.whManager.AlterColumn(ctx, tName, columnName, columnType) if err != nil { errs = append(errs, err) continue @@ -842,7 +839,7 @@ func (job *UploadJob) alterColumnsToWarehouse(tName string, columnsMap model.Tab query := strings.Join(queries, "\n") pkgLogger.Infof("altering dependent columns: %s", query) - err := job.alertSender.SendAlert(job.ctx, "warehouse-column-changes", + err := job.alertSender.SendAlert(ctx, "warehouse-column-changes", alerta.SendAlertOpts{ Severity: alerta.SeverityCritical, Priority: alerta.PriorityP1, @@ -867,7 +864,7 @@ func (job *UploadJob) alterColumnsToWarehouse(tName string, columnsMap model.Tab return nil } -func (job *UploadJob) addColumnsToWarehouse(tName string, columnsMap model.TableSchema) (err error) { +func (job *UploadJob) addColumnsToWarehouse(ctx context.Context, tName string, columnsMap model.TableSchema) (err error) { pkgLogger.Infof(`[WH]: Adding columns for table %s in namespace %s of destination %s:%s`, tName, job.warehouse.Namespace, job.warehouse.Type, job.warehouse.Destination.ID) destType := job.upload.DestinationType @@ -887,7 +884,7 @@ func (job *UploadJob) addColumnsToWarehouse(tName string, columnsMap model.Table chunks := lo.Chunk(columnsToAdd, columnsBatchSize) for _, chunk := range chunks { - err = job.whManager.AddColumns(job.ctx, tName, chunk) + err = job.whManager.AddColumns(ctx, tName, chunk) if err != nil { err = fmt.Errorf("failed to add columns for table %s in namespace %s of destination %s:%s with error: %w", tName, job.warehouse.Namespace, job.warehouse.Type, job.warehouse.Destination.ID, err) break @@ -1266,9 +1263,8 @@ func (job *UploadJob) loadIdentityTables(populateHistoricIdentities bool) (loadE errorMap := make(map[string]error) // var generated bool - if generated, _ := job.areIdentityTablesLoadFilesGenerated(); !generated { - err := job.resolveIdentities(populateHistoricIdentities) - if err != nil { + if generated, _ := job.areIdentityTablesLoadFilesGenerated(job.ctx); !generated { + if err := job.resolveIdentities(populateHistoricIdentities); err != nil { pkgLogger.Errorf(` ID Resolution operation failed: %v`, err) errorMap[job.identityMergeRulesTableName()] = err return job.processLoadTableResponse(errorMap) @@ -1439,7 +1435,7 @@ func (job *UploadJob) setUploadStatus(statusOpts UploadStatusOpts) (err error) { uploadColumnOpts := UploadColumnsOpts{Fields: additionalFields} if statusOpts.ReportingMetric != (types.PUReportedMetric{}) { - txn, err := dbHandle.BeginTx(job.ctx, &sql.TxOptions{}) + txn, err := wrappedDBHandle.BeginTx(job.ctx, &sql.TxOptions{}) if err != nil { return err } @@ -1448,12 +1444,13 @@ func (job *UploadJob) setUploadStatus(statusOpts UploadStatusOpts) (err error) { if err != nil { return err } - if config.GetBool("Reporting.enabled", types.DefaultReportingEnabled) { - application.Features().Reporting.GetReportingInstance().Report([]*types.PUReportedMetric{&statusOpts.ReportingMetric}, txn) + application.Features().Reporting.GetReportingInstance().Report( + []*types.PUReportedMetric{&statusOpts.ReportingMetric}, + txn.GetTx(), + ) } - err = txn.Commit() - return err + return txn.Commit() } return job.setUploadColumns(uploadColumnOpts) } @@ -1487,11 +1484,11 @@ func (job *UploadJob) setLoadFileIDs(startLoadFileID, endLoadFileID int64) error type UploadColumnsOpts struct { Fields []UploadColumn - Txn *sql.Tx + Txn *sqlmiddleware.Tx } // SetUploadColumns sets any column values passed as args in UploadColumn format for WarehouseUploadsTable -func (job *UploadJob) setUploadColumns(opts UploadColumnsOpts) (err error) { +func (job *UploadJob) setUploadColumns(opts UploadColumnsOpts) error { var columns string values := []interface{}{job.upload.ID} // setting values using syntax $n since Exec can correctly format time.Time strings @@ -1514,12 +1511,16 @@ func (job *UploadJob) setUploadColumns(opts UploadColumnsOpts) (err error) { warehouseutils.WarehouseUploadsTable, columns, ) + + var querier interface { + ExecContext(ctx context.Context, query string, args ...interface{}) (sql.Result, error) + } if opts.Txn != nil { - _, err = opts.Txn.ExecContext(job.ctx, sqlStatement, values...) + querier = opts.Txn } else { - _, err = dbHandle.ExecContext(job.ctx, sqlStatement, values...) + querier = wrappedDBHandle } - + _, err := querier.ExecContext(job.ctx, sqlStatement, values...) return err } @@ -1677,22 +1678,19 @@ func (job *UploadJob) setUploadError(statusError error, state string) (string, e if err != nil { return "", fmt.Errorf("unable to start transaction: %w", err) } - - err = job.setUploadColumns(UploadColumnsOpts{Fields: uploadColumns, Txn: txn}) - if err != nil { + if err = job.setUploadColumns(UploadColumnsOpts{Fields: uploadColumns, Txn: txn}); err != nil { return "", fmt.Errorf("unable to change upload columns: %w", err) } - - inputCount, _ := repo.NewStagingFiles(dbHandle).TotalEventsForUpload(job.ctx, upload) + inputCount, _ := repo.NewStagingFiles(wrappedDBHandle).TotalEventsForUpload(job.ctx, upload) outputCount, _ := job.tableUploadsRepo.TotalExportedEvents(job.ctx, job.upload.ID, []string{ warehouseutils.ToProviderCase(job.warehouse.Type, warehouseutils.DiscardsTable), }) + failCount := inputCount - outputCount reportingStatus := jobsdb.Failed.State if state == model.Aborted { reportingStatus = jobsdb.Aborted.State } - reportingMetrics := []*types.PUReportedMetric{{ ConnectionDetails: types.ConnectionDetails{ SourceID: job.upload.SourceID, @@ -1738,23 +1736,21 @@ func (job *UploadJob) setUploadError(statusError error, state string) (string, e }) } if config.GetBool("Reporting.enabled", types.DefaultReportingEnabled) { - application.Features().Reporting.GetReportingInstance().Report(reportingMetrics, txn) + application.Features().Reporting.GetReportingInstance().Report(reportingMetrics, txn.GetTx()) } err = txn.Commit() job.upload.Status = state job.upload.Error = serializedErr - attempts := job.getAttemptNumber() - job.counterStat("warehouse_failed_uploads", Tag{Name: "attempt_number", Value: strconv.Itoa(attempts)}).Count(1) + job.counterStat("warehouse_failed_uploads").Count(1) // On aborted state, validate credentials to allow // us to differentiate between user caused abort vs platform issue. if state == model.Aborted { // base tag to be sent as stat - tags := []Tag{{Name: "attempt_number", Value: strconv.Itoa(attempts)}} - tags = append(tags, errorTags) + tags := []Tag{errorTags} valid, err := job.validateDestinationCredentials() if err == nil { @@ -1776,20 +1772,6 @@ func (job *UploadJob) validateDestinationCredentials() (bool, error) { return response.Success, nil } -func (job *UploadJob) getAttemptNumber() int { - uploadError := job.upload.Error - var attempts int32 - if string(uploadError) == "" { - return 0 - } - - gjson.Parse(string(uploadError)).ForEach(func(key, value gjson.Result) bool { - attempts += int32(gjson.Get(value.String(), "attempt").Int()) - return true - }) - return int(attempts) -} - func (job *UploadJob) getLoadFilesTableMap() (loadFilesMap map[tableNameT]bool, err error) { loadFilesMap = make(map[tableNameT]bool) @@ -1843,7 +1825,7 @@ func (job *UploadJob) getLoadFilesTableMap() (loadFilesMap map[tableNameT]bool, return } -func (job *UploadJob) areIdentityTablesLoadFilesGenerated() (bool, error) { +func (job *UploadJob) areIdentityTablesLoadFilesGenerated(ctx context.Context) (bool, error) { var ( mergeRulesTable = warehouseutils.ToProviderCase(job.warehouse.Type, warehouseutils.IdentityMergeRulesTable) mappingsTable = warehouseutils.ToProviderCase(job.warehouse.Type, warehouseutils.IdentityMappingsTable) @@ -1851,13 +1833,13 @@ func (job *UploadJob) areIdentityTablesLoadFilesGenerated() (bool, error) { err error ) - if tu, err = job.tableUploadsRepo.GetByUploadIDAndTableName(job.ctx, job.upload.ID, mergeRulesTable); err != nil { + if tu, err = job.tableUploadsRepo.GetByUploadIDAndTableName(ctx, job.upload.ID, mergeRulesTable); err != nil { return false, fmt.Errorf("table upload not found for merge rules table: %w", err) } if tu.Location == "" { return false, fmt.Errorf("merge rules location not found: %w", err) } - if tu, err = job.tableUploadsRepo.GetByUploadIDAndTableName(job.ctx, job.upload.ID, mappingsTable); err != nil { + if tu, err = job.tableUploadsRepo.GetByUploadIDAndTableName(ctx, job.upload.ID, mappingsTable); err != nil { return false, fmt.Errorf("table upload not found for mappings table: %w", err) } if tu.Location == "" { @@ -1909,7 +1891,7 @@ func (job *UploadJob) GetLoadFilesMetadata(ctx context.Context, options warehous ) pkgLogger.Debugf(`Fetching loadFileLocations: %v`, sqlStatement) - rows, err := dbHandle.QueryContext(ctx, sqlStatement) + rows, err := wrappedDBHandle.QueryContext(ctx, sqlStatement) if err != nil { panic(fmt.Errorf("query: %s\nfailed with Error : %w", sqlStatement, err)) } @@ -2119,14 +2101,12 @@ func (job *UploadJob) RefreshPartitions(loadFileStartID, loadFileEndID int64) er StartID: loadFileStartID, EndID: loadFileEndID, }) - batches := schemarepository.LoadFileBatching(loadFiles, job.refreshPartitionBatchSize) - + batches := lo.Chunk(loadFiles, job.refreshPartitionBatchSize) for _, batch := range batches { if err = repository.RefreshPartitions(job.ctx, tableName, batch); err != nil { return fmt.Errorf("refresh partitions: %w", err) } } } - return nil } diff --git a/warehouse/upload_test.go b/warehouse/upload_test.go index 01fc87fe531..d836b7db8f7 100644 --- a/warehouse/upload_test.go +++ b/warehouse/upload_test.go @@ -13,6 +13,7 @@ import ( "github.com/rudderlabs/rudder-go-kit/config" "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource" "github.com/rudderlabs/rudder-server/services/alerta" + "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" "github.com/rudderlabs/rudder-server/warehouse/integrations/redshift" @@ -252,18 +253,18 @@ var _ = Describe("Upload", Ordered, func() { Namespace: namespace, }, stagingFileIDs: []int64{1, 2, 3, 4, 5}, - dbHandle: pgResource.DB, + dbHandle: sqlmiddleware.New(pgResource.DB), ctx: context.Background(), } }) It("Total rows in load files", func() { - count := job.getTotalRowsInLoadFiles() + count := job.getTotalRowsInLoadFiles(context.Background()) Expect(count).To(BeEquivalentTo(5)) }) It("Total rows in staging files", func() { - count, err := repo.NewStagingFiles(pgResource.DB).TotalEventsForUpload(context.Background(), job.upload) + count, err := repo.NewStagingFiles(sqlquerywrapper.New(pgResource.DB)).TotalEventsForUpload(context.Background(), job.upload) Expect(err).To(BeNil()) Expect(count).To(BeEquivalentTo(5)) }) diff --git a/warehouse/utils/utils.go b/warehouse/utils/utils.go index f735948ecd0..3cefbefe04f 100644 --- a/warehouse/utils/utils.go +++ b/warehouse/utils/utils.go @@ -27,11 +27,12 @@ import ( "github.com/iancoleman/strcase" "github.com/tidwall/gjson" + "github.com/rudderlabs/rudder-go-kit/awsutil" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" backendconfig "github.com/rudderlabs/rudder-server/backend-config" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/utils/awsutils" "github.com/rudderlabs/rudder-server/utils/httputil" "github.com/rudderlabs/rudder-server/utils/misc" @@ -403,7 +404,7 @@ func GetObjectName(location string, providerConfig interface{}, objectProvider s if destConfig, ok = providerConfig.(map[string]interface{}); !ok { return "", errors.New("failed to cast destination config interface{} to map[string]interface{}") } - fm, err := filemanager.DefaultFileManagerFactory.New(&filemanager.SettingsT{ + fm, err := filemanager.New(&filemanager.Settings{ Provider: objectProvider, Config: destConfig, }) @@ -716,13 +717,13 @@ func JoinWithFormatting(keys []string, format func(idx int, str string) string, return strings.Join(output, separator) } -func CreateAWSSessionConfig(destination *backendconfig.DestinationT, serviceName string) (*awsutils.SessionConfig, error) { +func CreateAWSSessionConfig(destination *backendconfig.DestinationT, serviceName string) (*awsutil.SessionConfig, error) { if !misc.IsConfiguredToUseRudderObjectStorage(destination.Config) && (misc.HasAWSRoleARNInConfig(destination.Config) || misc.HasAWSKeysInConfig(destination.Config)) { return awsutils.NewSimpleSessionConfigForDestination(destination, serviceName) } accessKeyID, accessKey := misc.GetRudderObjectStorageAccessKeys() - return &awsutils.SessionConfig{ + return &awsutil.SessionConfig{ AccessKeyID: accessKeyID, AccessKey: accessKey, Service: serviceName, @@ -735,7 +736,7 @@ func GetTemporaryS3Cred(destination *backendconfig.DestinationT) (string, string return "", "", "", err } - awsSession, err := awsutils.CreateSession(sessionConfig) + awsSession, err := awsutil.CreateSession(sessionConfig) if err != nil { return "", "", "", err } @@ -1001,3 +1002,26 @@ func ReadAsBool(key string, config map[string]interface{}) bool { } return false } + +func WithTimeout(ctx context.Context, timeout time.Duration, function func(context.Context) error) error { + ctxWithTimeout, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + return function(ctxWithTimeout) +} + +func GetConnectionTimeout(destType, destID string) time.Duration { + destIDLevelConfig := fmt.Sprintf("warehouse.%s.%s.connectionTimeout", destType, destID) + destTypeLevelConfig := fmt.Sprintf("warehouse.%s.connectionTimeout", destType) + warehouseLevelConfig := "warehouse.connectionTimeout" + + defaultTimeout := int64(3) + defaultTimeoutUnits := time.Hour + + if config.IsSet(destIDLevelConfig) { + return config.GetDuration(destIDLevelConfig, defaultTimeout, defaultTimeoutUnits) + } + if config.IsSet(destTypeLevelConfig) { + return config.GetDuration(destTypeLevelConfig, defaultTimeout, defaultTimeoutUnits) + } + return config.GetDuration(warehouseLevelConfig, defaultTimeout, defaultTimeoutUnits) +} diff --git a/warehouse/utils/utils_test.go b/warehouse/utils/utils_test.go index 5c259717f5a..06ef7f8e006 100644 --- a/warehouse/utils/utils_test.go +++ b/warehouse/utils/utils_test.go @@ -14,6 +14,7 @@ import ( "github.com/rudderlabs/rudder-server/warehouse/internal/model" + "github.com/rudderlabs/rudder-go-kit/awsutil" "github.com/rudderlabs/rudder-go-kit/logger" . "github.com/onsi/ginkgo/v2" @@ -22,7 +23,6 @@ import ( "github.com/stretchr/testify/require" "github.com/rudderlabs/rudder-go-kit/config" - "github.com/rudderlabs/rudder-server/utils/awsutils" "github.com/rudderlabs/rudder-server/utils/misc" backendconfig "github.com/rudderlabs/rudder-server/backend-config" @@ -1164,7 +1164,7 @@ func TestCreateAWSSessionConfig(t *testing.T) { inputs := []struct { destination *backendconfig.DestinationT service string - expectedConfig *awsutils.SessionConfig + expectedConfig *awsutil.SessionConfig }{ { destination: &backendconfig.DestinationT{ @@ -1173,7 +1173,7 @@ func TestCreateAWSSessionConfig(t *testing.T) { }, }, service: "s3", - expectedConfig: &awsutils.SessionConfig{ + expectedConfig: &awsutil.SessionConfig{ AccessKeyID: rudderAccessKeyID, AccessKey: rudderAccessKey, Service: "s3", @@ -1187,7 +1187,7 @@ func TestCreateAWSSessionConfig(t *testing.T) { }, }, service: "glue", - expectedConfig: &awsutils.SessionConfig{ + expectedConfig: &awsutil.SessionConfig{ AccessKeyID: someAccessKeyID, AccessKey: someAccessKey, Service: "glue", @@ -1201,7 +1201,7 @@ func TestCreateAWSSessionConfig(t *testing.T) { WorkspaceID: someWorkspaceID, }, service: "redshift", - expectedConfig: &awsutils.SessionConfig{ + expectedConfig: &awsutil.SessionConfig{ RoleBasedAuth: true, IAMRoleARN: someIAMRoleARN, ExternalID: someWorkspaceID, @@ -1214,7 +1214,7 @@ func TestCreateAWSSessionConfig(t *testing.T) { WorkspaceID: someWorkspaceID, }, service: "redshift", - expectedConfig: &awsutils.SessionConfig{ + expectedConfig: &awsutil.SessionConfig{ AccessKeyID: rudderAccessKeyID, AccessKey: rudderAccessKey, Service: "redshift", diff --git a/warehouse/validations/validate.go b/warehouse/validations/validate.go index 09e7e882c42..77c98ba6b09 100644 --- a/warehouse/validations/validate.go +++ b/warehouse/validations/validate.go @@ -12,8 +12,8 @@ import ( "github.com/rudderlabs/rudder-server/warehouse/encoding" + "github.com/rudderlabs/rudder-go-kit/filemanager" backendconfig "github.com/rudderlabs/rudder-server/backend-config" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/utils/misc" "github.com/rudderlabs/rudder-server/warehouse/integrations/manager" "github.com/rudderlabs/rudder-server/warehouse/internal/model" @@ -267,7 +267,7 @@ func (os *objectStorage) Validate(ctx context.Context) error { var ( tempPath string err error - uploadObject filemanager.UploadOutput + uploadObject filemanager.UploadedFile ) if tempPath, err = CreateTempLoadFile(os.destination); err != nil { @@ -333,7 +333,7 @@ func (lt *loadTable) Validate(ctx context.Context) error { loadFileType = warehouseutils.GetLoadFileType(destinationType) tempPath string - uploadOutput filemanager.UploadOutput + uploadOutput filemanager.UploadedFile err error ) @@ -413,10 +413,10 @@ func CreateTempLoadFile(dest *backendconfig.DestinationT) (string, error) { return filePath, nil } -func uploadFile(ctx context.Context, dest *backendconfig.DestinationT, filePath string) (filemanager.UploadOutput, error) { +func uploadFile(ctx context.Context, dest *backendconfig.DestinationT, filePath string) (filemanager.UploadedFile, error) { var ( err error - output filemanager.UploadOutput + output filemanager.UploadedFile fm filemanager.FileManager uploadFile *os.File @@ -425,11 +425,11 @@ func uploadFile(ctx context.Context, dest *backendconfig.DestinationT, filePath ) if fm, err = createFileManager(dest); err != nil { - return filemanager.UploadOutput{}, err + return filemanager.UploadedFile{}, err } if uploadFile, err = os.Open(filePath); err != nil { - return filemanager.UploadOutput{}, fmt.Errorf("opening file: %w", err) + return filemanager.UploadedFile{}, fmt.Errorf("opening file: %w", err) } // cleanup @@ -437,7 +437,7 @@ func uploadFile(ctx context.Context, dest *backendconfig.DestinationT, filePath defer func() { _ = uploadFile.Close() }() if output, err = fm.Upload(ctx, uploadFile, prefixes...); err != nil { - return filemanager.UploadOutput{}, fmt.Errorf("uploading file: %w", err) + return filemanager.UploadedFile{}, fmt.Errorf("uploading file: %w", err) } return output, nil @@ -496,7 +496,7 @@ func createFileManager(dest *backendconfig.DestinationT) (filemanager.FileManage provider = warehouseutils.ObjectStorageType(destType, conf, misc.IsConfiguredToUseRudderObjectStorage(conf)) ) - fileManager, err := fileManagerFactory.New(&filemanager.SettingsT{ + fileManager, err := fileManagerFactory(&filemanager.Settings{ Provider: provider, Config: misc.GetObjectStorageConfig(misc.ObjectStorageOptsT{ Provider: provider, diff --git a/warehouse/validations/validations.go b/warehouse/validations/validations.go index ac295124f6b..5786962079a 100644 --- a/warehouse/validations/validations.go +++ b/warehouse/validations/validations.go @@ -11,8 +11,8 @@ import ( "github.com/rudderlabs/rudder-server/warehouse/internal/model" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" "github.com/rudderlabs/rudder-go-kit/logger" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/utils/misc" ) @@ -24,7 +24,7 @@ const ( var ( connectionTestingFolder string pkgLogger logger.Logger - fileManagerFactory filemanager.FileManagerFactory + fileManagerFactory filemanager.Factory objectStorageValidationTimeout time.Duration ) @@ -49,7 +49,7 @@ type validationFunc struct { func Init() { connectionTestingFolder = config.GetString("RUDDER_CONNECTION_TESTING_BUCKET_FOLDER_NAME", misc.RudderTestPayload) pkgLogger = logger.NewLogger().Child("warehouse").Child("validations") - fileManagerFactory = filemanager.DefaultFileManagerFactory + fileManagerFactory = filemanager.New objectStorageValidationTimeout = 15 * time.Second } diff --git a/warehouse/warehouse.go b/warehouse/warehouse.go index 0b4278f1bdc..c88445d413a 100644 --- a/warehouse/warehouse.go +++ b/warehouse/warehouse.go @@ -30,6 +30,7 @@ import ( "github.com/cenkalti/backoff/v4" "github.com/rudderlabs/rudder-go-kit/config" + "github.com/rudderlabs/rudder-go-kit/filemanager" kithttputil "github.com/rudderlabs/rudder-go-kit/httputil" "github.com/rudderlabs/rudder-go-kit/logger" "github.com/rudderlabs/rudder-go-kit/stats" @@ -39,7 +40,6 @@ import ( "github.com/rudderlabs/rudder-server/rruntime" "github.com/rudderlabs/rudder-server/services/controlplane" "github.com/rudderlabs/rudder-server/services/db" - "github.com/rudderlabs/rudder-server/services/filemanager" "github.com/rudderlabs/rudder-server/services/pgnotifier" migrator "github.com/rudderlabs/rudder-server/services/sql-migrator" "github.com/rudderlabs/rudder-server/services/validators" @@ -50,6 +50,7 @@ import ( cpclient "github.com/rudderlabs/rudder-server/warehouse/client/controlplane" "github.com/rudderlabs/rudder-server/warehouse/integrations/deltalake" "github.com/rudderlabs/rudder-server/warehouse/integrations/manager" + "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" "github.com/rudderlabs/rudder-server/warehouse/internal/api" "github.com/rudderlabs/rudder-server/warehouse/internal/loadfiles" "github.com/rudderlabs/rudder-server/warehouse/internal/model" @@ -65,6 +66,8 @@ var ( application app.App webPort int dbHandle *sql.DB + wrappedDBHandle *sqlquerywrapper.DB + dbHanndleTimeout time.Duration notifier pgnotifier.PGNotifier tenantManager *multitenant.Manager controlPlaneClient *controlplane.Client @@ -207,6 +210,7 @@ func loadConfig() { config.RegisterIntConfigVariable(8, &maxParallelJobCreation, true, 1, "Warehouse.maxParallelJobCreation") config.RegisterBoolConfigVariable(false, &enableJitterForSyncs, true, "Warehouse.enableJitterForSyncs") config.RegisterDurationConfigVariable(30, &tableCountQueryTimeout, true, time.Second, []string{"Warehouse.tableCountQueryTimeout", "Warehouse.tableCountQueryTimeoutInS"}...) + config.RegisterDurationConfigVariable(5, &dbHanndleTimeout, true, time.Minute, []string{"Warehouse.dbHanndleTimeout", "Warehouse.dbHanndleTimeoutInMin"}...) appName = misc.DefaultString("rudder-server").OnError(os.Hostname()) } @@ -870,10 +874,10 @@ func (wh *HandleT) Setup(ctx context.Context, whType string) error { wh.dbHandle = dbHandle // We now have access to the warehouseDBHandle through // which we will be running the db calls. - wh.warehouseDBHandle = NewWarehouseDB(dbHandle) - wh.stagingRepo = repo.NewStagingFiles(dbHandle) - wh.uploadRepo = repo.NewUploads(dbHandle) - wh.whSchemaRepo = repo.NewWHSchemas(dbHandle) + wh.warehouseDBHandle = NewWarehouseDB(wrappedDBHandle) + wh.stagingRepo = repo.NewStagingFiles(wrappedDBHandle) + wh.uploadRepo = repo.NewUploads(wrappedDBHandle) + wh.whSchemaRepo = repo.NewWHSchemas(wrappedDBHandle) wh.notifier = notifier wh.destType = whType @@ -897,11 +901,11 @@ func (wh *HandleT) Setup(ctx context.Context, whType string) error { loadFile: &loadfiles.LoadFileGenerator{ Logger: pkgLogger.Child("loadfile"), Notifier: ¬ifier, - StageRepo: repo.NewStagingFiles(dbHandle), - LoadRepo: repo.NewLoadFiles(dbHandle), + StageRepo: repo.NewStagingFiles(wrappedDBHandle), + LoadRepo: repo.NewLoadFiles(wrappedDBHandle), ControlPlaneClient: controlPlaneClient, }, - recovery: service.NewRecovery(whType, repo.NewUploads(dbHandle)), + recovery: service.NewRecovery(whType, repo.NewUploads(wrappedDBHandle)), } loadfiles.WithConfig(wh.uploadJobFactory.loadFile, config.Default) @@ -995,7 +999,7 @@ func minimalConfigSubscriber(ctx context.Context) { wh := &HandleT{ dbHandle: dbHandle, destType: destination.DestinationDefinition.Name, - whSchemaRepo: repo.NewWHSchemas(dbHandle), + whSchemaRepo: repo.NewWHSchemas(wrappedDBHandle), conf: config.Default, } namespace := wh.getNamespace(ctx, source, destination) @@ -1185,7 +1189,7 @@ func pendingEventsHandler(w http.ResponseWriter, r *http.Request) { // check whether there are any pending staging files or uploads for the given source id // get pending staging files - pendingStagingFileCount, err = repo.NewStagingFiles(dbHandle).CountPendingForSource(ctx, sourceID) + pendingStagingFileCount, err = repo.NewStagingFiles(wrappedDBHandle).CountPendingForSource(ctx, sourceID) if err != nil { err := fmt.Errorf("error getting pending staging file count : %v", err) pkgLogger.Errorf("[WH]: %v", err) @@ -1285,7 +1289,7 @@ func pendingEventsHandler(w http.ResponseWriter, r *http.Request) { func getFilteredCount(ctx context.Context, filters ...repo.FilterBy) (int64, error) { pkgLogger.Debugf("fetching filtered count") - return repo.NewUploads(dbHandle).Count(ctx, filters...) + return repo.NewUploads(wrappedDBHandle).Count(ctx, filters...) } func getPendingUploadCount(filters ...warehouseutils.FilterBy) (uploadCount int64, err error) { @@ -1310,7 +1314,7 @@ func getPendingUploadCount(filters ...warehouseutils.FilterBy) (uploadCount int6 args = append(args, filter.Value) } - err = dbHandle.QueryRow(query, args...).Scan(&uploadCount) + err = wrappedDBHandle.QueryRow(query, args...).Scan(&uploadCount) if err != nil && err != sql.ErrNoRows { err = fmt.Errorf("query: %s failed with Error : %w", query, err) return @@ -1436,7 +1440,7 @@ func fetchTablesHandler(w http.ResponseWriter, r *http.Request) { return } - schemaRepo := repo.NewWHSchemas(dbHandle) + schemaRepo := repo.NewWHSchemas(wrappedDBHandle) tables, err := schemaRepo.GetTablesForConnection(ctx, connectionsTableRequest.Connections) if err != nil { pkgLogger.Errorf("[WH]: Error fetching tables: %v", err) @@ -1556,7 +1560,7 @@ func startWebHandler(ctx context.Context) error { srvMux.Handle("/v1/process", (&api.WarehouseAPI{ Logger: pkgLogger, Stats: stats.Default, - Repo: repo.NewStagingFiles(dbHandle), + Repo: repo.NewStagingFiles(wrappedDBHandle), Multitenant: tenantManager, }).Handler()) @@ -1571,7 +1575,9 @@ func startWebHandler(ctx context.Context) error { srvMux.Get("/v1/warehouse/jobs/status", asyncWh.StatusWarehouseJobHandler) // FIXME: add degraded mode // fetch schema info + // TODO: Remove this endpoint once sources change is released srvMux.Get("/v1/warehouse/fetch-tables", fetchTablesHandler) + srvMux.Get("/internal/v1/warehouse/fetch-tables", fetchTablesHandler) pkgLogger.Infof("WH: Starting warehouse master service in %d", webPort) } else { @@ -1638,6 +1644,12 @@ func setupDB(ctx context.Context, connInfo string) error { return fmt.Errorf("could not ping WH db: %w", err) } + wrappedDBHandle = sqlquerywrapper.New( + dbHandle, + sqlquerywrapper.WithLogger(pkgLogger.Child("dbHandle")), + sqlquerywrapper.WithQueryTimeout(dbHanndleTimeout), + ) + return setupTables(dbHandle) } @@ -1776,7 +1788,7 @@ func Start(ctx context.Context, app app.App) error { DB: dbHandle, Stats: stats.Default, Logger: pkgLogger.Child("archiver"), - FileManager: filemanager.DefaultFileManagerFactory, + FileManager: filemanager.New, Multitenant: tenantManager, } g.Go(misc.WithBugsnagForWarehouse(func() error { diff --git a/warehouse/warehouse_test.go b/warehouse/warehouse_test.go index 4d3e3415cfa..934dc936584 100644 --- a/warehouse/warehouse_test.go +++ b/warehouse/warehouse_test.go @@ -8,6 +8,8 @@ import ( "time" backendconfig "github.com/rudderlabs/rudder-server/backend-config" + "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" + sqlmiddleware "github.com/rudderlabs/rudder-server/warehouse/integrations/middleware/sqlquerywrapper" "github.com/rudderlabs/rudder-server/warehouse/internal/repo" "github.com/rudderlabs/rudder-server/warehouse/multitenant" @@ -150,11 +152,11 @@ func TestUploadJob_ProcessingStats(t *testing.T) { destType: tc.destType, stats: store, dbHandle: pgResource.DB, - whSchemaRepo: repo.NewWHSchemas(pgResource.DB), + whSchemaRepo: repo.NewWHSchemas(sqlquerywrapper.New(pgResource.DB)), } tenantManager = &multitenant.Manager{} - jobStats, err := repo.NewUploads(pgResource.DB, repo.WithNow(func() time.Time { + jobStats, err := repo.NewUploads(sqlmiddleware.New(pgResource.DB), repo.WithNow(func() time.Time { // nowSQL := "'2022-12-06 22:00:00'" return time.Date(2022, 12, 6, 22, 0, 0, 0, time.UTC) })).UploadJobsStats(ctx, tc.destType, repo.ProcessOptions{ @@ -343,7 +345,7 @@ func Test_GetNamespace(t *testing.T) { destType: tc.destType, stats: store, dbHandle: pgResource.DB, - whSchemaRepo: repo.NewWHSchemas(pgResource.DB), + whSchemaRepo: repo.NewWHSchemas(sqlquerywrapper.New(pgResource.DB)), conf: conf, } if tc.setConfig {