In [74]:
import configparser

import boto3

In [75]:
config_aws = configparser.ConfigParser()
config_aws.read_file(open('../aws_credentials.cfg'))

KEY                    = config_aws.get('AWS','KEY')
SECRET                 = config_aws.get('AWS','SECRET')
# IAM_NAME               = config_aws.get('AWS', 'IAM_NAME')
# IAM_ARN                = config_aws.get('AWS', 'IAM_ARN')

In [76]:
config_setup = configparser.ConfigParser()
config_setup.read_file(open('../aws_setup.cfg'))

PVR_SUBNET_NAME        = config_setup.get('VPC', 'PVR_SUBNET_NAME')
PVR_SUBNET_REGION      = config_setup.get('VPC', 'PVR_SUBNET_REGION')
PUB_SUBNET_NAME        = config_setup.get('VPC', 'PUB_SUBNET_NAME')
PUB_SUBNET_REGION      = config_setup.get('VPC', 'PUB_SUBNET_REGION')

EMR_NAME               = config_setup.get('EMR', 'NAME')
EMR_TYPE               = config_setup.get('EMR', 'TYPE')
EMR_REGION             = config_setup.get('EMR', 'REGION')
EMR_MASTER_NAME        = config_setup.get('EMR', 'MASTER_NAME')
EMR_WORKER_NAME        = config_setup.get('EMR', 'WORKER_NAME')
MASTER_COUNT           = config_setup.get('EMR', 'MASTER_COUNT')
WORKER_COUNT           = config_setup.get('EMR', 'WORKER_COUNT')

S3_REGION              = config_setup.get('S3', 'REGION')
S3_BUCKET              = config_setup.get('S3', 'NAME')

In [77]:
ec2_client = boto3.client('ec2',
                     aws_access_key_id=KEY,
                     aws_secret_access_key= SECRET)

In [78]:
pub_subnet = ec2_client.describe_subnets(Filters=[{'Name': 'tag:Name',
                                      'Values':[PUB_SUBNET_NAME]}])

In [73]:
pub_subnet['Subnets'][0]['SubnetId']

'subnet-07b1a633f0ba1b369'

In [7]:
emr_client = boto3.client('emr',
                         region_name=EMR_REGION,
                         aws_access_key_id=KEY,
                         aws_secret_access_key= SECRET)

In [80]:
cluster_id = emr_client.run_job_flow(
    Name=EMR_NAME,
    LogUri=f"s3://{S3_BUCKET}/logs/",
    Instances={
#         'MasterInstanceType': EMR_TYPE,
#         'SlaveInstanceType': EMR_TYPE,
        'InstanceGroups':[{
            'Name': EMR_MASTER_NAME,
            'Market':'ON_DEMAND',
            'InstanceRole':'MASTER',
            'InstanceType':EMR_TYPE,
            'InstanceCount':int(MASTER_COUNT),
        },
        {   'Name': EMR_WORKER_NAME,
            'Market':'ON_DEMAND',
            'InstanceRole':'CORE',
            'InstanceType':EMR_TYPE,
            'InstanceCount':int(WORKER_COUNT),
        }],
        'Ec2SubnetId':pub_subnet['Subnets'][0]['SubnetId'],
        'KeepJobFlowAliveWhenNoSteps':True
    },
    Applications=[
        {   'Name':'Spark'},
        {   'Name':'Hadoop'},
        {   'Name': 'livy' },
    ],
    JobFlowRole='EMR_EC2_DefaultRole',
    ServiceRole='EMR_DefaultRole',
    ReleaseLabel='emr-5.28.0',
)
waiter = emr_client.get_waiter("cluster_running")
waiter.wait(
    ClusterId=cluster_id['JobFlowId'],
)

In [81]:
cluster_id

{'JobFlowId': 'j-3D1N5LMLK6KR8',
 'ClusterArn': 'arn:aws:elasticmapreduce:us-east-2:736387989270:cluster/j-3D1N5LMLK6KR8',
 'ResponseMetadata': {'RequestId': '0e1c6572-3104-4a80-86e8-ce7a27d383a9',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '0e1c6572-3104-4a80-86e8-ce7a27d383a9',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '118',
   'date': 'Sun, 13 Sep 2020 02:11:14 GMT'},
  'RetryAttempts': 0}}

In [88]:
## terminate job flow
clusters_reponse = emr_client.list_clusters(
    ClusterStates=['RUNNING', 'WAITING']
)

for cluster in clusters_reponse['Clusters']:
    if cluster['Name'] == EMR_NAME:
        cluster_id = cluster['Id']
        break

emr_client.terminate_job_flows(JobFlowIds=[cluster_id])
waiter = emr_client.get_waiter("cluster_terminated")
waiter.wait(
    ClusterId=cluster_id,
)