In [None]:
# Import necessary libraries
import pandas as pd
import boto3
import sagemaker
import os
from sagemaker import get_execution_role

# Set up SageMaker session and IAM role
role = get_execution_role()
sagemaker_session = sagemaker.Session()
bucket = 'bedrock-log-zyl'
prefix = 'adult_census_data'

# Define S3 path for data storage
s3_path = f's3://{bucket}/{prefix}/'

# Download the UCI Adult Census dataset
data_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
data = pd.read_csv(data_url, header=None, 
                   names=['age', 'workclass', 'fnlwgt', 'education', 'education-num', 
                          'marital-status', 'occupation', 'relationship', 'race', 'sex', 
                          'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income'])

# Save the dataset locally as a CSV file
local_file = 'adult_data.csv'
data.to_csv(local_file, index=False)

# Upload the dataset to S3
s3_client = boto3.client('s3')
s3_client.upload_file(local_file, bucket, f'{prefix}/{local_file}')

# Verify the upload
print(f"Dataset uploaded to {s3_path}{local_file}")

# Optional: Display the first few rows of the dataset
print("\nFirst few rows of the dataset:")
print(data.head())

# Clean up local file
os.remove(local_file)