In [3]:
import boto3
import json
from datetime import datetime, timedelta

def create_data_pipeline():
    # Initialize the AWS Data Pipeline client
    client = boto3.client('datapipeline', region_name='us-east-1')  # Change region as needed

    # Define the pipeline name with a timestamp to make it unique
    pipeline_name = f"DataCopyPipeline-{datetime.now().strftime('%Y%m%d%H%M%S')}"

    # Define the pipeline objects
    pipeline_objects = [
        {
            "id": "Default",
            "name": "Default",
            "fields": [
                {
                    "key": "type",
                    "stringValue": "Default"
                },
                {
                    "key": "scheduleType",
                    "stringValue": "cron"
                },
                {
                    "key": "failureAndRerunMode",
                    "stringValue": "CASCADE"
                },
                {
                    "key": "pipelineLogUri",
                    "stringValue": "s3://your-log-bucket/logs/"  # Replace with your log bucket
                },
                {
                    "key": "role",
                    "stringValue": "DataPipelineDefaultRole"  # Ensure this role exists in your account
                },
                {
                    "key": "resourceRole",
                    "stringValue": "DataPipelineDefaultResourceRole"  # Ensure this role exists
                }
            ]
        },
        {
            "id": "Schedule",
            "name": "Schedule",
            "fields": [
                {
                    "key": "type",
                    "stringValue": "Schedule"
                },
                {
                    "key": "startDateTime",
                    "stringValue": (datetime.utcnow() + timedelta(minutes=5)).strftime('%Y-%m-%dT%H:%M:%S')
                },
                {
                    "key": "period",
                    "stringValue": "1 day"
                },
                {
                    "key": "occurrences",
                    "stringValue": "3"  # Number of times to run (optional)
                }
            ]
        },
        {
            "id": "S3Input",
            "name": "S3Input",
            "fields": [
                {
                    "key": "type",
                    "stringValue": "S3DataNode"
                },
                {
                    "key": "directoryPath",
                    "stringValue": "s3://your-source-bucket/input/"  # Replace with your source bucket
                }
            ]
        },
        {
            "id": "S3Output",
            "name": "S3Output",
            "fields": [
                {
                    "key": "type",
                    "stringValue": "S3DataNode"
                },
                {
                    "key": "directoryPath",
                    "stringValue": "s3://your-destination-bucket/output/"  # Replace with your destination bucket
                }
            ]
        },
        {
            "id": "CopyActivity",
            "name": "CopyActivity",
            "fields": [
                {
                    "key": "type",
                    "stringValue": "CopyActivity"
                },
                {
                    "key": "input",
                    "refValue": "S3Input"
                },
                {
                    "key": "output",
                    "refValue": "S3Output"
                },
                {
                    "key": "runsOn",
                    "refValue": "Ec2Resource"
                }
            ]
        },
        {
            "id": "Ec2Resource",
            "name": "Ec2Resource",
            "fields": [
                {
                    "key": "type",
                    "stringValue": "Ec2Resource"
                },
                {
                    "key": "instanceType",
                    "stringValue": "t2.small"
                },
                {
                    "key": "terminateAfter",
                    "stringValue": "1 hour"
                }
            ]
        }
    ]

    # Define pipeline parameters (optional)
    pipeline_parameters = [
        {
            "id": "myS3Input",
            "description": "Input S3 path",
            "type": "AWS::S3::ObjectKey"
        },
        {
            "id": "myS3Output",
            "description": "Output S3 path",
            "type": "AWS::S3::ObjectKey"
        }
    ]

    try:
        # Create the pipeline
        response = client.create_pipeline(
            name=pipeline_name,
            uniqueId=pipeline_name,
            description="Pipeline to copy data between S3 buckets"
        )

        pipeline_id = response['pipelineId']
        print(f"Created pipeline with ID: {pipeline_id}")

        # Put the pipeline definition
        client.put_pipeline_definition(
            pipelineId=pipeline_id,
            pipelineObjects=pipeline_objects,
            parameterObjects=pipeline_parameters
        )

        # Activate the pipeline
        client.activate_pipeline(pipelineId=pipeline_id)
        print(f"Activated pipeline {pipeline_id}")

        return pipeline_id

    except Exception as e:
        print(f"Error creating pipeline: {str(e)}")
        raise

if __name__ == "__main__":
    create_data_pipeline()

Error creating pipeline: Unable to locate credentials


NoCredentialsError: Unable to locate credentials