In [None]:
import boto3
import sagemaker
from sagemaker import Session
from sagemaker.model import Model
from sagemaker.transformer import Transformer

# Initialize boto3 and SageMaker session
session = sagemaker.Session()
region = session.boto_region_name
sm_client = boto3.client("sagemaker", region_name=region)
role = sagemaker.get_execution_role()

# Variables (update these with your values)
model_package_group_name = "your-model-package-group-name"  # Update with your model package group name
model_version = "1"  # Specify the version of the model you want to use
s3_input = "s3://your-bucket-name/test.csv"  # Path to your input data
s3_output = "s3://your-bucket-name/output/"  # Path for batch transform output
batch_transform_job_name = "batch-transform-job"  # Name for the batch job

# Get the model ARN from the Model Registry
model_package_arn = sm_client.describe_model_package(
    ModelPackageGroupName=model_package_group_name,
    ModelPackageVersion=model_version,
)["ModelPackageArn"]

# Create a SageMaker model from the model package ARN
model = Model(
    model_data=model_package_arn,
    role=role,
    framework_version="0.23-1",  # Update with your scikit-learn version
    sagemaker_session=session,
)

# Deploy the model for batch transform
transformer = Transformer(
    model_name=model.name,
    instance_type="ml.m5.large",  # Choose the appropriate instance type
    instance_count=1,  # Number of instances
    output_path=s3_output,
    sagemaker_session=session,
    strategy="SingleRecord",  # Change to "MultiRecord" if appropriate
)

# Start the batch transform job
transformer.transform(
    data=s3_input,
    content_type="text/csv",  # Update to the appropriate content type if needed
    split_type="Line",  # Update if needed (e.g., None, Line, RecordIO)
    input_filter="$[1:]",  # Update as required to filter input
)

# Wait for the job to finish
transformer.wait()

print(f"Batch Transform Job completed. Results are saved at {s3_output}")