Destination Redshift - additional check method check, fix s3 file del…

…etion (airbytehq#34186) Signed-off-by: Gireesh Sreepathi <gisripa@gmail.com> Co-authored-by: Sitaram Shelke <sitaramshelke@gmail.com> Co-authored-by: Marcos Marx <marcosmarxm@users.noreply.github.com> Co-authored-by: Cynthia Yin <cynthia@airbyte.io> Co-authored-by: Baz <oleksandr.bazarnov@globallogic.com> Co-authored-by: bazarnov <bazarnov@users.noreply.github.com> Co-authored-by: kekiss <karaekiss@yahoo.com> Co-authored-by: Anatolii Yatsuk <35109939+tolik0@users.noreply.github.com> Co-authored-by: Edward Gao <edward.gao@airbyte.io> Co-authored-by: Augustin <augustin@airbyte.io> Co-authored-by: Joe Reuter <joe@airbyte.io> Co-authored-by: Alexandre Cuoci <Hesperide@users.noreply.github.com> Co-authored-by: perangel <perangel@gmail.com> Co-authored-by: Aaron ("AJ") Steers <aj@airbyte.io> Co-authored-by: Ben Church <ben@airbyte.io> Co-authored-by: Gireesh Sreepathi <gisripa@gmail.com>
ollionorg · Feb 26, 2024 · 72d34ae · 72d34ae
1 parent 88ed8a0
commit 72d34ae
Show file tree

Hide file tree

Showing 18 changed files with 82 additions and 91 deletions.
diff --git a/airbyte-cdk/java/airbyte-cdk/README.md b/airbyte-cdk/java/airbyte-cdk/README.md
@@ -166,6 +166,7 @@ MavenLocal debugging steps:
 
 | Version | Date       | Pull Request                                               | Subject                                                                                                                                                        |
 |:--------|:-----------|:-----------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| 0.12.1  | 2024-01-11 | [\#34186](https://github.com/airbytehq/airbyte/pull/34186) | Add hook for additional destination specific checks to JDBC destination check method                                                                           |
 | 0.12.0  | 2024-01-10 | [\#33875](https://github.com/airbytehq/airbyte/pull/33875) | Upgrade sshd-mina to 2.11.1                                                                                                                                    |
 | 0.11.5  | 2024-01-10 | [\#34119](https://github.com/airbytehq/airbyte/pull/34119) | Remove wal2json support for postgres+debezium.                                                                                                                 |
 | 0.11.4  | 2024-01-09 | [\#33305](https://github.com/airbytehq/airbyte/pull/33305) | Source stats in incremental syncs                                                                                                                              |

diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties b/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties
@@ -1 +1 @@
-version=0.12.0
+version=0.12.1
diff --git a/...s/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.java b/...s/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.java
@@ -92,6 +92,7 @@ public AirbyteConnectionStatus check(final JsonNode config) {
         final var v2RawSchema = namingResolver.getIdentifier(TypingAndDedupingFlag.getRawNamespaceOverride(RAW_SCHEMA_OVERRIDE)
             .orElse(JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE));
         attemptTableOperations(v2RawSchema, database, namingResolver, sqlOperations, false);
+        destinationSpecificTableOperations(database);
       }
       return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED);
     } catch (final ConnectionErrorException ex) {
@@ -114,6 +115,15 @@ public AirbyteConnectionStatus check(final JsonNode config) {
     }
   }
 
+  /**
+   * Specific Databases may have additional checks unique to them which they need to perform, override
+   * this method to add additional checks.
+   *
+   * @param database the database to run checks against
+   * @throws Exception
+   */
+  protected void destinationSpecificTableOperations(final JdbcDatabase database) throws Exception {}
+
   /**
    * This method is deprecated. It verifies table creation, but not insert right to a newly created
    * table. Use attemptTableOperations with the attemptInsert argument instead.

diff --git a/...rc/main/java/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.java b/...rc/main/java/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.java
@@ -13,6 +13,7 @@
 import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.UUID;
 import java.util.concurrent.locks.Lock;
 import lombok.extern.slf4j.Slf4j;
 
@@ -22,6 +23,16 @@
 @Slf4j
 public class GeneralStagingFunctions {
 
+  // using a random string here as a placeholder for the moment.
+  // This would avoid mixing data in the staging area between different syncs (especially if they
+  // manipulate streams with similar names)
+  // if we replaced the random connection id by the actual connection_id, we'd gain the opportunity to
+  // leverage data that was uploaded to stage
+  // in a previous attempt but failed to load to the warehouse for some reason (interrupted?) instead.
+  // This would also allow other programs/scripts
+  // to load (or reload backups?) in the connection's staging area to be loaded at the next sync.
+  public static final UUID RANDOM_CONNECTION_ID = UUID.randomUUID();
+
   public static OnStartFunction onStartFunction(final JdbcDatabase database,
                                                 final StagingOperations stagingOperations,
                                                 final List<WriteConfig> writeConfigs,
@@ -34,7 +45,6 @@ public static OnStartFunction onStartFunction(final JdbcDatabase database,
         final String schema = writeConfig.getOutputSchemaName();
         final String stream = writeConfig.getStreamName();
         final String dstTableName = writeConfig.getOutputTableName();
-        final String stageName = stagingOperations.getStageName(schema, dstTableName);
         final String stagingPath =
             stagingOperations.getStagingPath(SerialStagingConsumerFactory.RANDOM_CONNECTION_ID, schema, stream, writeConfig.getOutputTableName(),
                 writeConfig.getWriteDatetime());
@@ -44,7 +54,7 @@ public static OnStartFunction onStartFunction(final JdbcDatabase database,
 
         stagingOperations.createSchemaIfNotExists(database, schema);
         stagingOperations.createTableIfNotExists(database, schema, dstTableName);
-        stagingOperations.createStageIfNotExists(database, stageName);
+        stagingOperations.createStageIfNotExists();
 
         /*
          * When we're in OVERWRITE, clear out the table at the start of a sync, this is an expected side
@@ -68,7 +78,6 @@ public static OnStartFunction onStartFunction(final JdbcDatabase database,
    * upload was unsuccessful
    */
   public static void copyIntoTableFromStage(final JdbcDatabase database,
-                                            final String stageName,
                                             final String stagingPath,
                                             final List<String> stagedFiles,
                                             final String tableName,
@@ -83,7 +92,7 @@ public static void copyIntoTableFromStage(final JdbcDatabase database,
       final Lock rawTableInsertLock = typerDeduper.getRawTableInsertLock(streamNamespace, streamName);
       rawTableInsertLock.lock();
       try {
-        stagingOperations.copyIntoTableFromStage(database, stageName, stagingPath, stagedFiles,
+        stagingOperations.copyIntoTableFromStage(database, stagingPath, stagedFiles,
             tableName, schemaName);
       } finally {
         rawTableInsertLock.unlock();
@@ -96,8 +105,6 @@ public static void copyIntoTableFromStage(final JdbcDatabase database,
         typerDeduperValve.updateTimeAndIncreaseInterval(streamId);
       }
     } catch (final Exception e) {
-      stagingOperations.cleanUpStage(database, stageName, stagedFiles);
-      log.info("Cleaning stage path {}", stagingPath);
       throw new RuntimeException("Failed to upload data from stage " + stagingPath, e);
     }
   }
@@ -124,10 +131,15 @@ public static OnCloseFunction onCloseFunction(final JdbcDatabase database,
       for (final WriteConfig writeConfig : writeConfigs) {
         final String schemaName = writeConfig.getOutputSchemaName();
         if (purgeStagingData) {
-          final String stageName = stagingOperations.getStageName(schemaName, writeConfig.getOutputTableName());
+          final String stagePath = stagingOperations.getStagingPath(
+              RANDOM_CONNECTION_ID,
+              schemaName,
+              writeConfig.getStreamName(),
+              writeConfig.getOutputTableName(),
+              writeConfig.getWriteDatetime());
           log.info("Cleaning stage in destination started for stream {}. schema {}, stage: {}", writeConfig.getStreamName(), schemaName,
-              stageName);
-          stagingOperations.dropStageIfExists(database, stageName);
+              stagePath);
+          stagingOperations.dropStageIfExists(database, stagePath);
         }
       }
       typerDeduper.commitFinalTables();

diff --git a/...stinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/SerialFlush.java b/...stinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/SerialFlush.java
@@ -81,15 +81,14 @@ public static FlushBufferFunction function(
 
       final WriteConfig writeConfig = pairToWriteConfig.get(pair);
       final String schemaName = writeConfig.getOutputSchemaName();
-      final String stageName = stagingOperations.getStageName(schemaName, writeConfig.getOutputTableName());
       final String stagingPath =
           stagingOperations.getStagingPath(
               SerialStagingConsumerFactory.RANDOM_CONNECTION_ID, schemaName, writeConfig.getStreamName(),
               writeConfig.getOutputTableName(), writeConfig.getWriteDatetime());
       try (writer) {
         writer.flush();
-        final String stagedFile = stagingOperations.uploadRecordsToStage(database, writer, schemaName, stageName, stagingPath);
-        GeneralStagingFunctions.copyIntoTableFromStage(database, stageName, stagingPath, List.of(stagedFile), writeConfig.getOutputTableName(),
+        final String stagedFile = stagingOperations.uploadRecordsToStage(database, writer, schemaName, stagingPath);
+        GeneralStagingFunctions.copyIntoTableFromStage(database, stagingPath, List.of(stagedFile), writeConfig.getOutputTableName(),
             schemaName,
             stagingOperations,
             writeConfig.getNamespace(),

diff --git a/...ions/src/main/java/io/airbyte/cdk/integrations/destination/staging/StagingOperations.java b/...ions/src/main/java/io/airbyte/cdk/integrations/destination/staging/StagingOperations.java
@@ -18,15 +18,6 @@
  */
 public interface StagingOperations extends SqlOperations {
 
-  /**
-   * Returns the staging environment's name
-   *
-   * @param namespace Name of schema
-   * @param streamName Name of the stream
-   * @return Fully qualified name of the staging environment
-   */
-  String getStageName(String namespace, String streamName);
-
   /**
    * @param outputTableName The name of the table this staging file will be loaded into (typically a
    *        raw table). Not all destinations use the table name in the staging path (e.g. Snowflake
@@ -37,48 +28,36 @@ public interface StagingOperations extends SqlOperations {
   /**
    * Create a staging folder where to upload temporary files before loading into the final destination
    */
-  void createStageIfNotExists(JdbcDatabase database, String stageName) throws Exception;
+  void createStageIfNotExists() throws Exception;
 
   /**
    * Upload the data file into the stage area.
    *
    * @param database database used for syncing
    * @param recordsData records stored in in-memory buffer
    * @param schemaName name of schema
-   * @param stageName name of the staging area folder
    * @param stagingPath path of staging folder to data files
    * @return the name of the file that was uploaded.
    */
-  String uploadRecordsToStage(JdbcDatabase database, SerializableBuffer recordsData, String schemaName, String stageName, String stagingPath)
+  String uploadRecordsToStage(JdbcDatabase database, SerializableBuffer recordsData, String schemaName, String stagingPath)
       throws Exception;
 
   /**
    * Load the data stored in the stage area into a temporary table in the destination
    *
    * @param database database interface
-   * @param stageName name of staging area folder
    * @param stagingPath path to staging files
    * @param stagedFiles collection of staged files
    * @param tableName name of table to write staging files to
    * @param schemaName name of schema
    */
   void copyIntoTableFromStage(JdbcDatabase database,
-                              String stageName,
                               String stagingPath,
                               List<String> stagedFiles,
                               String tableName,
                               String schemaName)
       throws Exception;
 
-  /**
-   * Remove files that were just staged
-   *
-   * @param database database used for syncing
-   * @param stageName name of staging area folder
-   * @param stagedFiles collection of the staging files to remove
-   */
-  void cleanUpStage(JdbcDatabase database, String stageName, List<String> stagedFiles) throws Exception;
-
   /**
    * Delete the stage area and all staged files that was in it
    *

diff --git a/...tions/src/main/java/io/airbyte/cdk/integrations/destination/s3/BlobStorageOperations.java b/...tions/src/main/java/io/airbyte/cdk/integrations/destination/s3/BlobStorageOperations.java
@@ -31,7 +31,7 @@ protected BlobStorageOperations() {
    *
    * @return the name of the file that was uploaded.
    */
-  public abstract String uploadRecordsToBucket(SerializableBuffer recordsData, String namespace, String streamName, String objectPath)
+  public abstract String uploadRecordsToBucket(SerializableBuffer recordsData, String namespace, String objectPath)
       throws Exception;
 
   /**

diff --git a/...tinations/src/main/java/io/airbyte/cdk/integrations/destination/s3/S3ConsumerFactory.java b/...tinations/src/main/java/io/airbyte/cdk/integrations/destination/s3/S3ConsumerFactory.java
@@ -128,7 +128,6 @@ private FlushBufferFunction flushBufferFunction(final BlobStorageOperations stor
         writeConfig.addStoredFile(storageOperations.uploadRecordsToBucket(
             writer,
             writeConfig.getNamespace(),
-            writeConfig.getStreamName(),
             writeConfig.getFullOutputPath()));
       } catch (final Exception e) {
         LOGGER.error("Failed to flush and upload buffer to storage:", e);

diff --git a/...nations/src/main/java/io/airbyte/cdk/integrations/destination/s3/S3StorageOperations.java b/...nations/src/main/java/io/airbyte/cdk/integrations/destination/s3/S3StorageOperations.java
@@ -120,7 +120,6 @@ protected boolean doesBucketExist(final String bucket) {
   @Override
   public String uploadRecordsToBucket(final SerializableBuffer recordsData,
                                       final String namespace,
-                                      final String streamName,
                                       final String objectPath) {
     final List<Exception> exceptionsThrown = new ArrayList<>();
     while (exceptionsThrown.size() < UPLOAD_RETRY_LIMIT) {

diff --git a/...estinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/AsyncFlush.java b/...estinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/AsyncFlush.java
@@ -103,19 +103,17 @@ public void flush(final StreamDescriptor decs, final Stream<PartialAirbyteMessag
 
     final WriteConfig writeConfig = streamDescToWriteConfig.get(decs);
     final String schemaName = writeConfig.getOutputSchemaName();
-    final String stageName = stagingOperations.getStageName(schemaName, writeConfig.getOutputTableName());
     final String stagingPath =
         stagingOperations.getStagingPath(
-            StagingConsumerFactory.RANDOM_CONNECTION_ID,
+            GeneralStagingFunctions.RANDOM_CONNECTION_ID,
             schemaName,
             writeConfig.getStreamName(),
             writeConfig.getOutputTableName(),
             writeConfig.getWriteDatetime());
     try {
-      final String stagedFile = stagingOperations.uploadRecordsToStage(database, writer, schemaName, stageName, stagingPath);
+      final String stagedFile = stagingOperations.uploadRecordsToStage(database, writer, schemaName, stagingPath);
       GeneralStagingFunctions.copyIntoTableFromStage(
           database,
-          stageName,
           stagingPath,
           List.of(stagedFile),
           writeConfig.getOutputTableName(),

diff --git a/...src/main/java/io/airbyte/cdk/integrations/destination/staging/StagingConsumerFactory.java b/...src/main/java/io/airbyte/cdk/integrations/destination/staging/StagingConsumerFactory.java
@@ -33,7 +33,6 @@
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
-import java.util.UUID;
 import java.util.function.Consumer;
 import java.util.function.Function;
 import org.joda.time.DateTime;
@@ -49,16 +48,7 @@ public class StagingConsumerFactory extends SerialStagingConsumerFactory {
 
   private static final Logger LOGGER = LoggerFactory.getLogger(StagingConsumerFactory.class);
 
-  // using a random string here as a placeholder for the moment.
-  // This would avoid mixing data in the staging area between different syncs (especially if they
-  // manipulate streams with similar names)
-  // if we replaced the random connection id by the actual connection_id, we'd gain the opportunity to
-  // leverage data that was uploaded to stage
-  // in a previous attempt but failed to load to the warehouse for some reason (interrupted?) instead.
-  // This would also allow other programs/scripts
-  // to load (or reload backups?) in the connection's staging area to be loaded at the next sync.
   private static final DateTime SYNC_DATETIME = DateTime.now(DateTimeZone.UTC);
-  public static final UUID RANDOM_CONNECTION_ID = UUID.randomUUID();
 
   public SerializedAirbyteMessageConsumer createAsync(final Consumer<AirbyteMessage> outputRecordCollector,
                                                       final JdbcDatabase database,

diff --git a/airbyte-integrations/connectors/destination-redshift/build.gradle b/airbyte-integrations/connectors/destination-redshift/build.gradle
@@ -4,12 +4,11 @@ plugins {
 }
 
 airbyteJavaConnector {
-    cdkVersionRequired = '0.12.0'
+    cdkVersionRequired = '0.12.1'
     features = ['db-destinations', 's3-destinations', 'typing-deduping']
     useLocalCdk = false
 }
 
-//remove once upgrading the CDK version to 0.4.x or later
 java {
     compileJava {
         options.compilerArgs.remove("-Werror")

diff --git a/airbyte-integrations/connectors/destination-redshift/metadata.yaml b/airbyte-integrations/connectors/destination-redshift/metadata.yaml
@@ -5,7 +5,7 @@ data:
   connectorSubtype: database
   connectorType: destination
   definitionId: f7a7d195-377f-cf5b-70a5-be6b819019dc
-  dockerImageTag: 0.7.14
+  dockerImageTag: 0.7.15
   dockerRepository: airbyte/destination-redshift
   documentationUrl: https://docs.airbyte.com/integrations/destinations/redshift
   githubIssueLabel: destination-redshift

diff --git a/...src/main/java/io/airbyte/integrations/destination/redshift/RedshiftInsertDestination.java b/...src/main/java/io/airbyte/integrations/destination/redshift/RedshiftInsertDestination.java
@@ -22,6 +22,7 @@
 import io.airbyte.integrations.destination.redshift.operations.RedshiftSqlOperations;
 import io.airbyte.integrations.destination.redshift.typing_deduping.RedshiftDestinationHandler;
 import io.airbyte.integrations.destination.redshift.typing_deduping.RedshiftSqlGenerator;
+import io.airbyte.integrations.destination.redshift.util.RedshiftUtil;
 import java.time.Duration;
 import java.util.HashMap;
 import java.util.Map;
@@ -60,6 +61,11 @@ public DataSource getDataSource(final JsonNode config) {
         Duration.ofMinutes(2));
   }
 
+  @Override
+  protected void destinationSpecificTableOperations(final JdbcDatabase database) throws Exception {
+    RedshiftUtil.checkSvvTableAccess(database);
+  }
+
   @Override
   public JdbcDatabase getDatabase(final DataSource dataSource) {
     return new DefaultJdbcDatabase(dataSource);

diff --git a/.../main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java b/.../main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java
@@ -51,6 +51,7 @@
 import io.airbyte.integrations.destination.redshift.operations.RedshiftSqlOperations;
 import io.airbyte.integrations.destination.redshift.typing_deduping.RedshiftDestinationHandler;
 import io.airbyte.integrations.destination.redshift.typing_deduping.RedshiftSqlGenerator;
+import io.airbyte.integrations.destination.redshift.util.RedshiftUtil;
 import io.airbyte.protocol.models.v0.AirbyteConnectionStatus;
 import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status;
 import io.airbyte.protocol.models.v0.AirbyteMessage;
@@ -103,7 +104,8 @@ public AirbyteConnectionStatus check(final JsonNode config) {
     try {
       final JdbcDatabase database = new DefaultJdbcDatabase(dataSource);
       final String outputSchema = super.getNamingResolver().getIdentifier(config.get(JdbcUtils.SCHEMA_KEY).asText());
-      attemptSQLCreateAndDropTableOperations(outputSchema, database, nameTransformer, redshiftS3StagingSqlOperations);
+      attemptTableOperations(outputSchema, database, nameTransformer, redshiftS3StagingSqlOperations, false);
+      RedshiftUtil.checkSvvTableAccess(database);
       return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED);
     } catch (final ConnectionErrorException e) {
       final String message = getErrorMessage(e.getStateCode(), e.getErrorCode(), e.getExceptionMessage(), e);