# Setup Scratch

In [8]:
import os

# scratch directory is apart of the .gitignore to ensure it is not committed to git
%env SCRATCH=../scratch
! [ -e "${SCRATCH}" ] || mkdir -p "${SCRATCH}"

scratch_path = os.environ.get("SCRATCH", "scratch")

env: SCRATCH=../scratch


In [9]:
# fetch all env variable - these values will be fetched from Data Connection setup

access_key = os.getenv("AWS_ACCESS_KEY_ID", "minioadmin")
secret_key = os.getenv("AWS_SECRET_ACCESS_KEY", "minioadmin")
s3_endpoint = os.getenv("AWS_S3_ENDPOINT".lstrip("http://"), "localhost:9000")
bucket_name = os.getenv("AWS_S3_BUCKET", "models")

In [10]:
# import minio and dependencies
from minio import Minio
import os
import glob
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [11]:
# Create a Minio client object

s3_client = Minio(
    s3_endpoint, 
    access_key=access_key,
    secret_key=secret_key,
    secure=False
)

In [12]:
def upload_local_directory_to_s3(bucket_name, local_path):
    assert os.path.isdir(local_path)

    # Upload files in the directory to the bucket
    for local_file in glob.glob(local_path + "/**"):
        local_file = local_file.replace(os.sep, "/")

        print("local_file: " + local_file)

        if not os.path.isfile(local_file):
            upload_local_directory_to_s3(bucket_name, local_file)
        else:
            remote_path = os.path.join(
                local_path.lstrip(scratch_path), local_file[1 + len(local_file) :]
            )
            remote_path = remote_path.replace(os.sep, "/")
            remote_file = remote_path + os.path.basename(local_file)

            print("remote_file: " + remote_file)

            if not s3_client.stat_object(bucket_name, remote_file):
                s3_client.fput_object(bucket_name, remote_file)


def download_all_from_s3(local_path):
    for bucket in s3_client.list_buckets():
        for item in s3_client.list_objects(bucket.name, recursive=True):
            print(local_path + item.object_name)
            # s3_client.fget_object(bucket.name, item.object_name, local_path + item.object_name)

In [13]:
try:
    # Check if the bucket already exists
    if not s3_client.bucket_exists(bucket_name):
        s3_client.make_bucket(bucket_name)
        print(f"Bucket '{bucket_name}' created successfully.")

    upload_local_directory_to_s3(bucket_name, scratch_path + "/Vegetable Images")

except Exception as err:
    print(f"Failed to upload files to bucket '{bucket_name}': {err}")

local_file: ../scratch/Vegetable Images/test
local_file: ../scratch/Vegetable Images/test/Capsicum
local_file: ../scratch/Vegetable Images/test/Capsicum/1179.jpg
remote_file: Vegetable Images/test/Capsicum/1179.jpg
local_file: ../scratch/Vegetable Images/test/Capsicum/1151.jpg
remote_file: Vegetable Images/test/Capsicum/1151.jpg
local_file: ../scratch/Vegetable Images/test/Capsicum/1145.jpg
remote_file: Vegetable Images/test/Capsicum/1145.jpg
local_file: ../scratch/Vegetable Images/test/Capsicum/1192.jpg
remote_file: Vegetable Images/test/Capsicum/1192.jpg
local_file: ../scratch/Vegetable Images/test/Capsicum/1186.jpg
remote_file: Vegetable Images/test/Capsicum/1186.jpg
local_file: ../scratch/Vegetable Images/test/Capsicum/1019.jpg
remote_file: Vegetable Images/test/Capsicum/1019.jpg
local_file: ../scratch/Vegetable Images/test/Capsicum/1025.jpg
remote_file: Vegetable Images/test/Capsicum/1025.jpg
local_file: ../scratch/Vegetable Images/test/Capsicum/1031.jpg
remote_file: Vegetable Ima

In [None]:
# s3 = boto3.resource(
#     "s3",
#     aws_access_key_id=access_key,
#     aws_secret_access_key=secret,
#     config=boto3.session.Config(signature_version="s3v4"),
#     verify=False,
#     endpoint_url=s3_endpoint,
# )
# s3_client = s3.meta.client

# bucket = s3.Bucket(bucket_name)

# objects = bucket.objects.filter(Prefix="/")
# for obj in objects:
#     path, filename = os.path.split(obj.key)
#     if len(path) != 0 and not os.path.exists(os.path.join(local_dest_dir, path)):
#         os.makedirs(os.path.join(local_dest_dir, path))
#     if not obj.key.endswith("/"):
#         download_to = local_dest_dir + path + "/" + filename if path else filename
#         s3_client.download_file(bucket_name, obj.key, download_to)