In [None]:
# Upload CSV files to S3
import boto3
import sagemaker
import os

import warnings
warnings.filterwarnings('ignore')

In [None]:
# Initialize SageMaker session and get role
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

In [None]:
# Use the specific bucket instead of the default one
bucket = "ld-autopilot-demo599"  # Specific bucket name
sm = boto3.Session().client(service_name="sagemaker", region_name=region)

In [None]:
# Display the bucket we're working with
print(f"Using S3 bucket: {bucket}")

In [None]:
# verify we can access the bucket
!aws s3 ls s3://{bucket}/

In [None]:
# Create a dedicated prefix for our data files
s3_prefix = ""
!aws s3api put-object --bucket {bucket} --key {s3_prefix}

In [None]:
# Set path to the local data directory
local_data_path = "../data/"  # notebook is in data_management folder

In [None]:
# Use os.walk to find all CSV files in the data directory and its subfolders
print("\nCSV files to be uploaded:")
csv_files = []

for root, dirs, files in os.walk(local_data_path):
    for file in files:
        if file.endswith(".csv"):
            file_path = os.path.join(root, file)
            relative_path = os.path.relpath(file_path, local_data_path)
            csv_files.append((file_path, relative_path))
            print(f"Found: {file_path}")

if not csv_files:
    print("No CSV files found in the data directory or its subfolders.")

In [None]:
# Upload all CSV files from the data directory and its subfolders to S3
print("\nUploading files to S3...")
for local_file_path, relative_path in csv_files:
    # Preserve the subfolder structure in S3
    s3_file_key = f"{s3_prefix}{relative_path}"
    
    # Upload without ACL since the bucket doesn't support it
    print(f"Uploading {local_file_path} to s3://{bucket}/{s3_file_key}")
    !aws s3 cp {local_file_path} s3://{bucket}/{s3_file_key}

In [None]:
# Check source path
!aws s3 ls s3://ld-autopilot-demo599/

In [None]:
# Check public bucket files
!aws s3 ls s3://ld-autopilot-demo599/ --recursive