In [19]:
import boto3
import json

In [20]:
session = boto3.Session(
    aws_access_key_id='#',
    aws_secret_access_key='#',
    aws_session_token='#',
    region_name='#'
)

In [21]:
client = session.client('emr')

In [28]:
response = client.run_job_flow(
    Name="2024-II",
    ReleaseLabel='emr-7.3.0',
    LogUri='s3://big-data-topicos/EMR/logs/',
    Instances={
        'KeepJobFlowAliveWhenNoSteps': True,
        'TerminationProtected': False,
        'InstanceGroups': [
            {
                'Name': 'Master',
                'Market': 'ON_DEMAND',
                'InstanceRole': 'MASTER',
                'InstanceType': 'm5.xlarge',
                'InstanceCount': 1
            },
            {
                'Name': 'Core',
                'Market': 'ON_DEMAND',
                'InstanceRole': 'CORE',
                'InstanceType': 'm5.xlarge',
                'InstanceCount': 2
            },
            {
                'Name': 'Task',
                'Market': 'ON_DEMAND',
                'InstanceRole': 'TASK',
                'InstanceType': 'm5.xlarge',
                'InstanceCount': 1
            }
        ],
        'Ec2KeyName': 'emr'
    },
    Applications=[
        {'Name': 'Spark'},
        {'Name': 'Hadoop'},
        {'Name': 'Hive'},
        {'Name': 'JupyterHub'},
        {'Name': 'Livy'},
        {'Name': 'Zeppelin'},
        {'Name': 'TensorFlow'},
        {'Name': 'Hue'},
        {'Name': 'Tez'},
        {'Name': 'Zookeeper'}
    ],
    Steps=[
        {
            'Name': 'Install Dependencies',
            'ActionOnFailure': 'CONTINUE',
            'HadoopJarStep': {
                'Jar': 'command-runner.jar',
                'Args': [
                    'pip3', 'install', 'pandas', 'sodapy', 'boto3', 's3fs', 'fsspec', 'mysql-connector-python'
                ]
            }
        },
        {
            'Name': 'Run Python Script to Consume API',
            'ActionOnFailure': 'CONTINUE',
            'HadoopJarStep': {
                'Jar': 'command-runner.jar',
                'Args': [
                    'bash', '-c', 
                    'aws s3 cp s3://big-data-topicos/bigdata/ingest.py /home/hadoop/ingest.py && python3 /home/hadoop/ingest.py'
                ]
            }
        },
        {
            'Name': 'Load DB',
            'ActionOnFailure': 'CONTINUE',
            'HadoopJarStep': {
                'Jar': 'command-runner.jar',
                'Args': [
                    'bash', '-c', 
                    'aws s3 cp s3://big-data-topicos/COVID/load_s3.py /home/hadoop/load_s3.py && python3 /home/hadoop/load_s3.py'
                ]
            }
        },
        {
            'Name': 'ETL',
            'ActionOnFailure': 'CONTINUE',
            'HadoopJarStep': {
                'Jar': 'command-runner.jar',
                'Args': [
                    'bash', '-c', 
                    'aws s3 cp s3://big-data-topicos/COVID/ETL.py /home/hadoop/ETL.py && spark-submit /home/hadoop/ETL.py'
                ]
            }
        },
        {
            'Name': 'DF',
            'ActionOnFailure': 'CONTINUE',
            'HadoopJarStep': {
                'Jar': 'command-runner.jar',
                'Args': [
                    'bash', '-c', 
                    'aws s3 cp s3://big-data-topicos/COVID/dataframes.py /home/hadoop/dataframes.py && spark-submit /home/hadoop/dataframes.py'
                ]
            }
        },
        {
            'Name': 'SparkSQL',
            'ActionOnFailure': 'CONTINUE',
            'HadoopJarStep': {
                'Jar': 'command-runner.jar',
                'Args': [
                    'bash', '-c', 
                    'aws s3 cp s3://big-data-topicos/COVID/Sparksql.py /home/hadoop/Sparksql.py && spark-submit /home/hadoop/Sparksql.py'
                ]
            }
        },
    ],
    VisibleToAllUsers=True,
    ServiceRole='EMR_DefaultRole',
    JobFlowRole='EMR_EC2_DefaultRole',
    AutoScalingRole='EMR_AutoScaling_DefaultRole'
)

print("Cluster creado con éxito:", response['JobFlowId'])


Cluster creado con éxito: j-1DOVP13JGOGV9
