Skip to content

Commit

Permalink
Added read_yaml_from_s3 function and test
Browse files Browse the repository at this point in the history
  • Loading branch information
AntFMoJ committed Mar 23, 2022
1 parent 011f728 commit f4f46d1
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 11 deletions.
42 changes: 32 additions & 10 deletions dataengineeringutils3/s3.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import boto3
import botocore
import gzip
from io import StringIO
import json
import os
import yaml

from io import StringIO
from pathlib import Path
from typing import Union

import boto3
import botocore


def gzip_string_write_to_s3(file_as_string, s3_path):
"""
Writes IoString to s3 path as gziped output
Expand Down Expand Up @@ -84,19 +84,29 @@ def get_filepaths_from_s3_folder(
return paths


def read_json_from_s3(s3_path, encoding="utf-8", *args, **kwargs):
def get_object_body(s3_path: str, encoding: str="utf-8") -> str:
"""
Reads a json from the provided s3 path
Gets object body from file in S3
:param s3_path: "s3://...."
:param encoding: File type encoding (utf-8 default)
:param *args: Passed to json.loads call
:param **kwargs: Passed to json.loads call
:return: data from the json
:return: decoded string data from S3
"""
s3_resource = boto3.resource("s3")
bucket, key = s3_path_to_bucket_key(s3_path)
obj = s3_resource.Object(bucket, key)
text = obj.get()["Body"].read().decode(encoding)
return text

def read_json_from_s3(s3_path: str, encoding: str="utf-8", *args, **kwargs) -> dict:
"""
Reads a json from the provided s3 path
:param s3_path: "s3://...."
:param encoding: File type encoding (utf-8 default)
:param *args: Passed to json.loads call
:param **kwargs: Passed to json.loads call
:return: data from the json
"""
text = get_object_body(s3_path, encoding)
return json.loads(text, *args, **kwargs)


Expand All @@ -117,6 +127,18 @@ def write_json_to_s3(data, s3_path, *args, **kwargs):
log_upload_resp = log_obj.put(Body=log_file.getvalue())
return log_upload_resp

def read_yaml_from_s3(s3_path: str, encoding: str="utf-8", *args, **kwargs) -> dict:
"""
Reads a yaml file from the provided s3 path
:param s3_path: "s3://...."
:param encoding: File type encoding (utf-8 default)
:param *args: Passed to yaml.safe_load call
:param **kwargs: Passed to yaml.safe_load call
:return: data from the yaml
"""
text = get_object_body(s3_path, encoding)
return yaml.safe_load(text, *args, **kwargs)


def copy_s3_folder_contents_to_new_folder(
from_s3_folder_path, to_s3_folder_path, exclude_zero_byte_files=False
Expand Down
19 changes: 18 additions & 1 deletion tests/test_s3.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
import gzip
import io
import os
from pathlib import Path
import pytest
import json
import yaml

from dataengineeringutils3.s3 import (
s3_path_to_bucket_key,
gzip_string_write_to_s3,
get_filepaths_from_s3_folder,
read_json_from_s3,
write_json_to_s3,
read_yaml_from_s3,
copy_s3_folder_contents_to_new_folder,
delete_s3_object,
delete_s3_folder_contents,
Expand All @@ -21,6 +22,7 @@
write_s3_file_to_local,
write_s3_folder_to_local,
)
from pathlib import Path

bucket_name = "test"

Expand Down Expand Up @@ -100,6 +102,21 @@ def test_read_json_from_s3(s3, bucket):

assert read_json_from_s3("s3://test/f1/agfa/file_no_ext") == test_dict

def test_read_yaml_from_s3(s3):

test_dict = {"foo": "bar"}
body = yaml.dump(test_dict)
files = [
{"folder": "f1", "key": "my_file.json", "body": body},
{"folder": "f1/agfa", "key": "file_no_ext", "body": body},
]
for f in files:
s3.Object(bucket_name, f["folder"] + "/" + f["key"]).put(Body=f["body"])

assert read_yaml_from_s3("s3://test/f1/my_file.json") == test_dict

assert read_yaml_from_s3("s3://test/f1/agfa/file_no_ext") == test_dict


def test_write_json_s3(s3, bucket):
test_dict = {"foo": "bar", "something": 0}
Expand Down

0 comments on commit f4f46d1

Please sign in to comment.