# Setup Scratch

```
# local port forward for minio
oc -n demo-active-learn port-forward svc/minio 9000:9000
```

In [None]:
import os

# scratch directory is apart of the .gitignore to ensure it is not committed to git
%env SCRATCH=../scratch
scratch_path = os.environ.get("SCRATCH", "scratch")

In [None]:
# fetch s3 env variable - these values will be fetched from Data Connection setup

access_key = os.getenv("AWS_ACCESS_KEY_ID", "minioadmin")
secret_key = os.getenv("AWS_SECRET_ACCESS_KEY", "minioadmin")
s3_endpoint = os.getenv("AWS_S3_ENDPOINT", "localhost:9000").lstrip("http://")
bucket_name = os.getenv("AWS_S3_BUCKET", "data")

In [None]:
# import minio and dependencies
from minio import Minio
import os
import glob
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [None]:
# Create a Minio client object

s3_client = Minio(
    s3_endpoint, access_key=access_key, secret_key=secret_key, secure=False
)

In [None]:
def upload_local_directory_to_s3(bucket_name, local_path):
    assert os.path.isdir(local_path)

    # Upload files in the directory to the bucket
    for local_file in glob.glob(local_path + "/**"):
        local_file = local_file.replace(os.sep, "/")

        print("local file: " + local_file)

        if not os.path.isfile(local_file):
            upload_local_directory_to_s3(bucket_name, local_file)
        else:
            remote_path = os.path.join(
                local_path.lstrip(scratch_path), local_file[1 + len(local_file) :]
            )
            remote_path = remote_path.replace(os.sep, "/")
            remote_file = remote_path + os.path.basename(local_file)

            # print("remote file: " + remote_file)

            try:
                s3_client.stat_object(bucket_name, remote_file)
                print("remote exists: " + remote_file)
            except:
                s3_client.fput_object(bucket_name, remote_file, local_file)


def download_all_from_s3(local_path):
    for bucket in s3_client.list_buckets():
        for item in s3_client.list_objects(bucket.name, recursive=True):
            local_file = local_path + "/" + item.object_name

            if os.path.exists(local_file):
                print("local exists: " + local_file)
            else:
                s3_client.fget_object(bucket.name, item.object_name, local_file)

### Upload to S3

In [None]:
# check if the bucket already exists
if not s3_client.bucket_exists(bucket_name):
    s3_client.make_bucket(bucket_name)
    print(f"Bucket '{bucket_name}' created successfully.")

In [None]:
# upload dataset
try:
    upload_local_directory_to_s3(bucket_name, scratch_path + "/Vegetable Images")

except Exception as err:
    print(f"Failed to upload files to bucket '{bucket_name}': {err}")

In [None]:
# upload models
try:
    upload_local_directory_to_s3(bucket_name, scratch_path + "/models")

except Exception as err:
    print(f"Failed to upload files to bucket '{bucket_name}': {err}")

### Download from S3

In [None]:
try:
    download_all_from_s3(scratch_path)

except Exception as err:
    print(f"Failed to download files from bucket(s) '{bucket_name}': {err}")