In [9]:
import boto3
import os
from botocore.exceptions import NoCredentialsError, PartialCredentialsError, ClientError

# --- Helper Functions (Download) ---

def download_file_from_s3(s3_key: str, local_file_path: str):
    """
    Downloads a single file from an S3 bucket to a local path.

    Args:
        s3_key (str): The S3 object key (path and filename within the bucket).
        local_file_path (str): The full local path where the file will be saved.
    """
    print(f"Attempting to download s3://{s3bucket}/{s3_key} to '{local_file_path}'")
    # Ensure the local directory exists
    os.makedirs(os.path.dirname(local_file_path) or '.', exist_ok=True)
    try:
        s3_client.download_file(s3bucket, s3_key, local_file_path)
        print(f"Successfully downloaded '{s3_key}' to '{local_file_path}'")
    except ClientError as e:
        if e.response['Error']['Code'] == "404":
            print(f"Error: The S3 object '{s3_key}' was not found in bucket '{s3bucket}'.")
        else:
            print(f"Error downloading file from S3: {e}")
    except (NoCredentialsError, PartialCredentialsError):
        print("Error: AWS credentials not found or incomplete. "
              "Please configure your AWS credentials.")
    except Exception as e:
        print(f"An unexpected error occurred during download: {e}")

def download_directory_from_s3(s3_prefix_for_dir: str, local_directory_path: str):
    """
    Downloads all objects under a given S3 prefix (acting like a directory)
    to a local directory.

    Args:
        s3_prefix_for_dir (str): The S3 prefix (folder path) in the bucket.
                                 Make sure it ends with a '/' to fetch contents of a "folder".
        local_directory_path (str): The local directory where files will be saved.
    """
    if not s3_prefix_for_dir.endswith('/'):
        s3_prefix_for_dir += '/' # Ensure it's treated as a prefix for a folder

    print(f"Starting download of S3 prefix '{s3_prefix_for_dir}' to local directory '{local_directory_path}'")
    os.makedirs(local_directory_path, exist_ok=True)

    try:
        # List objects in the specified S3 prefix
        paginator = s3_client.get_paginator('list_objects_v2')
        pages = paginator.paginate(Bucket=s3bucket, Prefix=s3_prefix_for_dir)

        downloaded_count = 0
        for page in pages:
            if "Contents" in page:
                for obj in page["Contents"]:
                    s3_key = obj["Key"]
                    # Skip if the key is just the prefix itself (empty folder representation)
                    if s3_key == s3_prefix_for_dir:
                        continue

                    # Construct the local file path
                    # Remove the base s3_prefix from the s3_key to get relative path
                    relative_path = os.path.relpath(s3_key, s3_prefix_for_dir)
                    local_file_path = os.path.join(local_directory_path, relative_path)

                    # Ensure local subdirectories exist
                    os.makedirs(os.path.dirname(local_file_path), exist_ok=True)

                    download_file_from_s3(s3_key, local_file_path)
                    downloaded_count += 1
        if downloaded_count == 0:
            print(f"No files found under S3 prefix '{s3_prefix_for_dir}' to download.")
        else:
            print(f"Finished downloading {downloaded_count} files from S3 prefix '{s3_prefix_for_dir}'.")

    except ClientError as e:
        print(f"Error listing objects in S3: {e}")
    except (NoCredentialsError, PartialCredentialsError):
        print("Error: AWS credentials not found or incomplete. "
              "Please configure your AWS credentials.")
    except Exception as e:
        print(f"An unexpected error occurred during directory download: {e}")

def upload_file_to_s3(local_file_path: str, s3_key: str):
    """
    Uploads a single file from a local path to an S3 bucket.

    Args:
        local_file_path (str): The full path to the local file.
        s3_key (str): The S3 object key (path and filename within the bucket).
                      This typically includes the s3prefix.
    """
    print(f"Attempting to upload '{local_file_path}' to s3://{s3bucket}/{s3_key}")
    try:
        s3_client.upload_file(local_file_path, s3bucket, s3_key)
        print(f"Successfully uploaded '{local_file_path}' to S3 at '{s3_key}'")
    except FileNotFoundError:
        print(f"Error: The file '{local_file_path}' was not found.")
    except (NoCredentialsError, PartialCredentialsError):
        print("Error: AWS credentials not found or incomplete. "
              "Please configure your AWS credentials.")
    except ClientError as e:
        print(f"Error uploading file to S3: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

def upload_directory_to_s3(local_directory_path: str, s3_prefix_for_dir: str):
    """
    Uploads an entire directory (and its contents recursively) to an S3 bucket.

    Args:
        local_directory_path (str): The full path to the local directory.
        s3_prefix_for_dir (str): The S3 prefix (folder path) within the bucket
                                 where the directory contents will be stored.
                                 Make sure it ends with a '/' if it represents a folder.
    """
    if not os.path.isdir(local_directory_path):
        print(f"Error: '{local_directory_path}' is not a valid directory.")
        return

    print(f"Starting upload of directory '{local_directory_path}' to s3://{s3bucket}/{s3_prefix_for_dir}")

    for root, dirs, files in os.walk(local_directory_path):
        for file in files:
            local_file_path = os.path.join(root, file)
            # Construct the S3 key relative to the base local_directory_path
            relative_path = os.path.relpath(local_file_path, local_directory_path)
            s3_key = os.path.join(s3_prefix_for_dir, relative_path).replace("\\", "/") # Ensure forward slashes for S3

            upload_file_to_s3(local_file_path, s3_key)

In [2]:
# --- Configuration ---
# Replace with your S3 bucket name
team = 'model-risk'
account = 'sagemakerprod'
project_name='ARM1_2025'
s3prefix = f"{project_name}"
s3bucket = f"upg-sagemaker-{team}-usw2-{account}"
s3_client = boto3.client('s3')

In [12]:
#Download single file to Sagemaker from S3
s3_download_file_key = os.path.join(s3prefix, 'sample_df.dat').replace("\\", "/")
local_download_file_path = "data/sample_df.dat"
download_file_from_s3(s3_download_file_key, local_download_file_path)

Attempting to download s3://upg-sagemaker-model-risk-usw2-sagemakerprod/ARM1_2025/sample_df.dat to 'data/sample_df.dat'
Successfully downloaded 'ARM1_2025/sample_df.dat' to 'data/sample_df.dat'


In [7]:
#Download a folder to SM from S3
sub_dir=''
s3_download_dir_prefix = os.path.join(s3prefix, os.path.basename(sub_dir), "").replace("\\", "/")
print(s3_download_dir_prefix)

#local_download_dir_path = "data/"
#download_directory_from_s3(s3_download_dir_prefix, local_download_dir_path)

ARM1_2025/


In [11]:
#Upload a single file to S3 from SM
data_dir='data/'
dummy_file_name = data_dir+"test_final_mrm.dat"

# Define the S3 key for the single file
s3_file_key = os.path.join(s3prefix, dummy_file_name).replace("\\", "/")
upload_file_to_s3(dummy_file_name, s3_file_key)

Attempting to upload 'data/test_final_mrm.dat' to s3://upg-sagemaker-model-risk-usw2-sagemakerprod/ARM1_2025/data/test_final_mrm.dat
Successfully uploaded 'data/test_final_mrm.dat' to S3 at 'ARM1_2025/data/test_final_mrm.dat'


In [None]:
#Upload a folder to S3
data_dir = "data/"

# Define the S3 prefix for the directory upload
s3_dir_prefix = os.path.join(s3prefix, os.path.basename(data_dir), "").replace("\\", "/")
upload_directory_to_s3(dummy_dir, s3_dir_prefix)

## Clean up the directory if you want to
#import shutil
#shutil.rmtree(dummy_dir)
#print(f"Cleaned up '{dummy_dir}'")