In [87]:
import configparser

import boto3

In [88]:
config_aws = configparser.ConfigParser()
config_aws.read_file(open('../aws_credentials.cfg'))

KEY                    = config_aws.get('AWS','KEY')
SECRET                 = config_aws.get('AWS','SECRET')
# IAM_NAME               = config_aws.get('AWS', 'IAM_NAME')
# IAM_ARN                = config_aws.get('AWS', 'IAM_ARN')

In [114]:
config_setup = configparser.ConfigParser()
config_setup.read_file(open('../aws_setup.cfg'))

KEY_PAIR_NAME          = config_setup.get('AWS', 'KEY_PAIR_NAME')

EC2_NAME               = config_setup.get('EC2', 'NAME')
VPC_NAME               = config_setup.get('VPC', 'NAME')
PVR_SUBNET_NAME        = config_setup.get('VPC', 'PVR_SUBNET_NAME')
PVR_SUBNET_REGION      = config_setup.get('VPC', 'PVR_SUBNET_REGION')
PUB_SUBNET_NAME        = config_setup.get('VPC', 'PUB_SUBNET_NAME')
PUB_SUBNET_REGION      = config_setup.get('VPC', 'PUB_SUBNET_REGION')

EMR_NAME               = config_setup.get('EMR', 'NAME')
EMR_TYPE               = config_setup.get('EMR', 'TYPE')
EMR_REGION             = config_setup.get('EMR', 'REGION')
EMR_MASTER_NAME        = config_setup.get('EMR', 'MASTER_NAME')
EMR_WORKER_NAME        = config_setup.get('EMR', 'WORKER_NAME')
MASTER_COUNT           = config_setup.get('EMR', 'MASTER_COUNT')
WORKER_COUNT           = config_setup.get('EMR', 'WORKER_COUNT')

S3_REGION              = config_setup.get('S3', 'REGION')
S3_BUCKET              = config_setup.get('S3', 'NAME')

In [90]:
ec2_client = boto3.client('ec2',
                     aws_access_key_id=KEY,
                     aws_secret_access_key= SECRET)

In [43]:
pub_subnet = ec2_client.describe_subnets(Filters=[{'Name': 'tag:Name',
                                      'Values':[PUB_SUBNET_NAME]}])

In [44]:
pub_subnet['Subnets'][0]['SubnetId']

'subnet-07b1a633f0ba1b369'

In [49]:
emr_client = boto3.client('emr',
                         region_name=EMR_REGION,
                         aws_access_key_id=KEY,
                         aws_secret_access_key= SECRET)

In [50]:
cluster_id = emr_client.run_job_flow(
    Name=EMR_NAME,
    LogUri=f"s3://{S3_BUCKET}/logs/",
    Instances={
#         'MasterInstanceType': EMR_TYPE,
#         'SlaveInstanceType': EMR_TYPE,
        'InstanceGroups':[{
            'Name': EMR_MASTER_NAME,
            'Market':'ON_DEMAND',
            'InstanceRole':'MASTER',
            'InstanceType':EMR_TYPE,
            'InstanceCount':int(MASTER_COUNT),
        },
        {   'Name': EMR_WORKER_NAME,
            'Market':'ON_DEMAND',
            'InstanceRole':'CORE',
            'InstanceType':EMR_TYPE,
            'InstanceCount':int(WORKER_COUNT),
        }],
        'Ec2KeyName':KEY_PAIR_NAME,
        'Ec2SubnetId':pub_subnet['Subnets'][0]['SubnetId'],
        'KeepJobFlowAliveWhenNoSteps':True
    },
    Applications=[
        {   'Name':'Spark'},
        {   'Name':'Hadoop'},
        {   'Name': 'livy' },
    ],
    JobFlowRole='EMR_EC2_DefaultRole',
    ServiceRole='EMR_DefaultRole',
    ReleaseLabel='emr-5.28.0',
)
waiter = emr_client.get_waiter("cluster_running")
waiter.wait(
    ClusterId=cluster_id['JobFlowId'],
)

In [48]:
cluster_id

{'JobFlowId': 'j-21POIRC3P0R3F',
 'ClusterArn': 'arn:aws:elasticmapreduce:us-east-2:736387989270:cluster/j-21POIRC3P0R3F',
 'ResponseMetadata': {'RequestId': 'f859d389-107d-49a5-b9a5-b8c2d0963991',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f859d389-107d-49a5-b9a5-b8c2d0963991',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '118',
   'date': 'Thu, 17 Sep 2020 00:03:54 GMT'},
  'RetryAttempts': 0}}

In [91]:
vpc = ec2_client.describe_vpcs(Filters=[{'Name': 'tag:Name',
                                      'Values':[VPC_NAME]}])

In [115]:
ec2_client.describe_public_ipv4_pools(Filters=[{'Name': 'tag:Name',
                                      'Values':[EC2_NAME]}])

{'PublicIpv4Pools': [],
 'ResponseMetadata': {'RequestId': '89e17d35-05a4-4df9-9afc-73b24b97c9fe',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '89e17d35-05a4-4df9-9afc-73b24b97c9fe',
   'content-type': 'text/xml;charset=UTF-8',
   'transfer-encoding': 'chunked',
   'vary': 'accept-encoding',
   'date': 'Fri, 18 Sep 2020 01:41:50 GMT',
   'server': 'AmazonEC2'},
  'RetryAttempts': 0}}

In [92]:
vpc

{'Vpcs': [{'CidrBlock': '20.0.0.0/16',
   'DhcpOptionsId': 'dopt-6244e709',
   'State': 'available',
   'VpcId': 'vpc-070a25ab1177048a2',
   'OwnerId': '736387989270',
   'InstanceTenancy': 'default',
   'CidrBlockAssociationSet': [{'AssociationId': 'vpc-cidr-assoc-0bd1940c813a3b3e0',
     'CidrBlock': '20.0.0.0/16',
     'CidrBlockState': {'State': 'associated'}}],
   'IsDefault': False,
   'Tags': [{'Key': 'Name', 'Value': 'IMMIGRATE_DEMOGRAPHICS_VPC'}]}],
 'ResponseMetadata': {'RequestId': 'bf4355f8-a0b1-4a9a-9071-22daa11fe5d4',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'bf4355f8-a0b1-4a9a-9071-22daa11fe5d4',
   'content-type': 'text/xml;charset=UTF-8',
   'content-length': '1162',
   'date': 'Fri, 18 Sep 2020 00:35:51 GMT',
   'server': 'AmazonEC2'},
  'RetryAttempts': 0}}

In [139]:
## get public ip address of the EC2 instance
# r = ec2_client.describe_instances().get('Reservations')
r = ec2_client.describe_instances(Filters=[{'Name': 'tag:Name',
                                      'Values':[EC2_NAME]}])

In [150]:
r['Reservations'][0]['Instances'][0]['NetworkInterfaces'][0]['Association']['PublicIp']

'3.135.164.60'

In [127]:
sg = ec2_client.describe_security_groups(Filters=[{'Name': 'vpc-id',
                                                    'Values': [vpc['Vpcs'][0]['VpcId']]}
                                                 ])

In [128]:
sg['SecurityGroups'][0]['IpPermissions']

[{'FromPort': 0,
  'IpProtocol': 'tcp',
  'IpRanges': [],
  'Ipv6Ranges': [],
  'PrefixListIds': [],
  'ToPort': 65535,
  'UserIdGroupPairs': [{'GroupId': 'sg-02df4eb3ef2f88991',
    'UserId': '736387989270'},
   {'GroupId': 'sg-0af093b2f9fbe98e3', 'UserId': '736387989270'}]},
 {'FromPort': 8443,
  'IpProtocol': 'tcp',
  'IpRanges': [{'CidrIp': '52.95.24.0/23'}],
  'Ipv6Ranges': [],
  'PrefixListIds': [],
  'ToPort': 8443,
  'UserIdGroupPairs': []},
 {'FromPort': 8998,
  'IpProtocol': 'tcp',
  'IpRanges': [{'CidrIp': '71.104.42.57/32'}],
  'Ipv6Ranges': [],
  'PrefixListIds': [],
  'ToPort': 8998,
  'UserIdGroupPairs': []},
 {'FromPort': 0,
  'IpProtocol': 'udp',
  'IpRanges': [],
  'Ipv6Ranges': [],
  'PrefixListIds': [],
  'ToPort': 65535,
  'UserIdGroupPairs': [{'GroupId': 'sg-02df4eb3ef2f88991',
    'UserId': '736387989270'},
   {'GroupId': 'sg-0af093b2f9fbe98e3', 'UserId': '736387989270'}]},
 {'FromPort': -1,
  'IpProtocol': 'icmp',
  'IpRanges': [],
  'Ipv6Ranges': [],
  'PrefixL

In [129]:
for sg_ in sg['SecurityGroups']:
    if sg_['GroupName'] == 'default':
        default_sg = sg_
    if sg_['GroupName'] == 'ElasticMapReduce-master':
        emr_sg = sg_

In [130]:
default_sg['GroupId'], emr_sg['GroupId']

('sg-059da5c466fb08b7e', 'sg-02df4eb3ef2f88991')

In [131]:
emr_sg['IpPermissions']

[{'FromPort': 0,
  'IpProtocol': 'tcp',
  'IpRanges': [],
  'Ipv6Ranges': [],
  'PrefixListIds': [],
  'ToPort': 65535,
  'UserIdGroupPairs': [{'GroupId': 'sg-02df4eb3ef2f88991',
    'UserId': '736387989270'},
   {'GroupId': 'sg-0af093b2f9fbe98e3', 'UserId': '736387989270'}]},
 {'FromPort': 8443,
  'IpProtocol': 'tcp',
  'IpRanges': [{'CidrIp': '52.95.24.0/23'}],
  'Ipv6Ranges': [],
  'PrefixListIds': [],
  'ToPort': 8443,
  'UserIdGroupPairs': []},
 {'FromPort': 8998,
  'IpProtocol': 'tcp',
  'IpRanges': [{'CidrIp': '71.104.42.57/32'}],
  'Ipv6Ranges': [],
  'PrefixListIds': [],
  'ToPort': 8998,
  'UserIdGroupPairs': []},
 {'FromPort': 0,
  'IpProtocol': 'udp',
  'IpRanges': [],
  'Ipv6Ranges': [],
  'PrefixListIds': [],
  'ToPort': 65535,
  'UserIdGroupPairs': [{'GroupId': 'sg-02df4eb3ef2f88991',
    'UserId': '736387989270'},
   {'GroupId': 'sg-0af093b2f9fbe98e3', 'UserId': '736387989270'}]},
 {'FromPort': -1,
  'IpProtocol': 'icmp',
  'IpRanges': [],
  'Ipv6Ranges': [],
  'PrefixL

In [132]:
for permission in emr_sg['IpPermissions']:
    print(permission['FromPort'], permission['ToPort'], [x['GroupId'] for x in permission['UserIdGroupPairs']])

0 65535 ['sg-02df4eb3ef2f88991', 'sg-0af093b2f9fbe98e3']
8443 8443 []
8998 8998 []
0 65535 ['sg-02df4eb3ef2f88991', 'sg-0af093b2f9fbe98e3']
-1 -1 ['sg-02df4eb3ef2f88991', 'sg-0af093b2f9fbe98e3']


In [119]:
already_have_access = False
for permission_sg in emr_sg['IpPermissions']:
    if permission_sg['FromPort'] == -1:
#     if permission_sg['FromPort'] == 8998 and permission_sg['ToPort'] == 8998:
        user_group_pairs = permission_sg['UserIdGroupPairs']
        user_group_pairs = set([x['GroupId'] for x in user_group_pairs])
        if emr_sg_id in user_group_pairs:
            already_have_access = True
            break

True

In [103]:
set(['sg-02df4eb3ef2f88991', 'sg-0af093b2f9fbe98e3'])

{'sg-02df4eb3ef2f88991', 'sg-0af093b2f9fbe98e3'}

In [104]:
'sg-02df4eb3ef2f88991' in set(['sg-02df4eb3ef2f88991', 'sg-0af093b2f9fbe98e3'])

True

In [96]:
for emr_permission in emr_sg['IpPermissions']:
    print(emr_permission['UserIdGroupPairs'])

[{'GroupId': 'sg-02df4eb3ef2f88991', 'UserId': '736387989270'}, {'GroupId': 'sg-0af093b2f9fbe98e3', 'UserId': '736387989270'}]
[]
[]
[{'GroupId': 'sg-02df4eb3ef2f88991', 'UserId': '736387989270'}, {'GroupId': 'sg-0af093b2f9fbe98e3', 'UserId': '736387989270'}]
[{'GroupId': 'sg-02df4eb3ef2f88991', 'UserId': '736387989270'}, {'GroupId': 'sg-0af093b2f9fbe98e3', 'UserId': '736387989270'}]


In [77]:
default_sg_id = default_sg['GroupId']

In [78]:
emr_sg_id = emr_sg['GroupId']

In [122]:
ec2_client.authorize_security_group_ingress(GroupId = emr_sg_id,
                                        IpPermissions=[
                                        {
                                            'FromPort': -1,
                                            'IpProtocol': 'icmp',
                                            'UserIdGroupPairs': [
                                                {
                                                    'GroupId': default_sg_id,
                                                },
                                            ],
                                            'ToPort': -1,
                                        }
                                    ],)

{'ResponseMetadata': {'RequestId': '7b38c68d-11a2-45ef-b7a4-f6f0a9d6b776',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '7b38c68d-11a2-45ef-b7a4-f6f0a9d6b776',
   'content-type': 'text/xml;charset=UTF-8',
   'content-length': '259',
   'date': 'Fri, 18 Sep 2020 02:05:57 GMT',
   'server': 'AmazonEC2'},
  'RetryAttempts': 0}}

In [53]:
## open port 8998 to EC2 instance
emr_client.get_block_public_access_configuration()

{'BlockPublicAccessConfiguration': {'BlockPublicSecurityGroupRules': True,
  'PermittedPublicSecurityGroupRuleRanges': [{'MinRange': 22,
    'MaxRange': 22}]},
 'BlockPublicAccessConfigurationMetadata': {'CreationDateTime': datetime.datetime(1969, 12, 31, 19, 0, 0, 1000, tzinfo=tzlocal()),
  'CreatedByArn': ''},
 'ResponseMetadata': {'RequestId': '305858ab-498c-4efe-b7a6-618bd788d43d',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '305858ab-498c-4efe-b7a6-618bd788d43d',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '232',
   'date': 'Thu, 17 Sep 2020 02:59:48 GMT'},
  'RetryAttempts': 0}}

In [None]:
## terminate job flow
clusters_reponse = emr_client.list_clusters(
    ClusterStates=['RUNNING', 'WAITING']
)

for cluster in clusters_reponse['Clusters']:
    if cluster['Name'] == EMR_NAME:
        cluster_id = cluster['Id']
        break

emr_client.terminate_job_flows(JobFlowIds=[cluster_id])
waiter = emr_client.get_waiter("cluster_terminated")
waiter.wait(
    ClusterId=cluster_id,
)

In [13]:
cluster_id

'j-20JF4Q6YEJGUZ'