From ba7c8c59a6d52ed1207b10337b0eedf63735cbf0 Mon Sep 17 00:00:00 2001 From: Walter Ngo Date: Wed, 4 Sep 2019 15:59:16 +1000 Subject: [PATCH 1/3] [SP-337] Move method to batch data loader --- rdl/BatchDataLoader.py | 7 ++++++- rdl/data_sources/AWSLambdaDataSource.py | 4 +--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/rdl/BatchDataLoader.py b/rdl/BatchDataLoader.py index e727738..f3d4445 100644 --- a/rdl/BatchDataLoader.py +++ b/rdl/BatchDataLoader.py @@ -61,6 +61,10 @@ def load_batch(self, batch_key_tracker): batch_key_tracker.has_more_data = False return + # replacing unicode null characters because postgres doesn't support null characters in text fields + # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.replace.html + data_frame = data_frame.replace(regex=r"\x00", value="") + data_frame = self.attach_column_transformers(data_frame) self.write_data_frame_to_table(data_frame) batch_tracker.load_completed_successfully() @@ -122,7 +126,8 @@ def write_data_frame_to_table(self, data_frame): f"null '\\N', " f"FORCE_NULL ({column_list}))" ) - self.logger.debug(f"Writing to table using command '{sql}'") + + self.logger.info(f"Writing to table using command '{sql}'") curs.copy_expert(sql=sql, file=data) diff --git a/rdl/data_sources/AWSLambdaDataSource.py b/rdl/data_sources/AWSLambdaDataSource.py index da4f33b..4b25ebd 100644 --- a/rdl/data_sources/AWSLambdaDataSource.py +++ b/rdl/data_sources/AWSLambdaDataSource.py @@ -141,9 +141,7 @@ def __get_table_data( return result["ColumnNames"], data def __get_data_frame(self, data: [[]], column_names: []): - return pandas.DataFrame(data=data, columns=column_names).replace( - regex=r"\x00", value="" - ) + return pandas.DataFrame(data=data, columns=column_names) def __invoke_lambda(self, pay_load): self.logger.debug("\nRequest being sent to Lambda:") From 5f0c2b96109e2775a9c2234a2aee426669ec892d Mon Sep 17 00:00:00 2001 From: Walter Ngo Date: Wed, 4 Sep 2019 16:16:51 +1000 Subject: [PATCH 2/3] [SP-337] Update rdl/BatchDataLoader.py Co-Authored-By: Chintan Raval --- rdl/BatchDataLoader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/rdl/BatchDataLoader.py b/rdl/BatchDataLoader.py index f3d4445..f2866d5 100644 --- a/rdl/BatchDataLoader.py +++ b/rdl/BatchDataLoader.py @@ -126,7 +126,6 @@ def write_data_frame_to_table(self, data_frame): f"null '\\N', " f"FORCE_NULL ({column_list}))" ) - self.logger.info(f"Writing to table using command '{sql}'") curs.copy_expert(sql=sql, file=data) From 3333c69a24fd6d3101b23eb802df8fbfe9cc41f0 Mon Sep 17 00:00:00 2001 From: Walter Ngo Date: Wed, 4 Sep 2019 16:16:57 +1000 Subject: [PATCH 3/3] [SP-337] Update rdl/BatchDataLoader.py Co-Authored-By: Chintan Raval --- rdl/BatchDataLoader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdl/BatchDataLoader.py b/rdl/BatchDataLoader.py index f2866d5..4c700c0 100644 --- a/rdl/BatchDataLoader.py +++ b/rdl/BatchDataLoader.py @@ -126,7 +126,7 @@ def write_data_frame_to_table(self, data_frame): f"null '\\N', " f"FORCE_NULL ({column_list}))" ) - self.logger.info(f"Writing to table using command '{sql}'") + self.logger.debug(f"Writing to table using command '{sql}'") curs.copy_expert(sql=sql, file=data)