In [1]:
data_file_name = 'EMS_DCMA_DD_23.08.graphml'
import os
import networkx as nx
import boto3
import paramiko
from zipfile import ZipFile
import pandas as pd
# Run arguments
host_os = 'linux_mac' # or 'windows'
working_dir = os.getcwd()
data_dir = os.path.join(working_dir, 'data')
data_path = os.path.join(data_dir, data_file_name)
experiment = data_file_name.replace('.graphml', '').replace('.', '_')
experiment_dir = '{e}_results'.format(e=experiment)
if experiment_dir not in os.listdir(working_dir):
    os.mkdir(experiment_dir)
tasks = 'tdas'
results = 'prt'

## AWS
# IPs
resultsIP = '172.31.10.240'
serviceIP = '172.31.15.123'
# Compute instance
key_file_name = 'ds_eu_west2_2.pem'
if host_os=='windows':
    key_file_name = 'C:\\Users\\username\\.aws\\ds_eu_west2_2.pem'
else:
    key_file_name = '~/.aws/ds_eu_west2_2.pem'
INSTANCE_ID = 'i-0586e11281d4b02a2'
AWS_REGION = "eu-west-2"
EC2_RESOURCE = boto3.resource('ec2', region_name=AWS_REGION)
EC2_CLIENT = boto3.client('ec2', region_name=AWS_REGION)

# Storage bucket
profile_name = 'ds_sandbox'
data_bucket = 'programsdatabucket'
results_bucket = 'chainsresults'
session = boto3.session.Session(profile_name=profile_name)
S3_CLIENT = session.client('s3')
S3_RESOURCE = boto3.resource('s3')

In [2]:
# Upload data to an S3 bucket
S3_CLIENT.upload_file(data_path, data_bucket, data_file_name)

In [3]:
# Start compute instance
instance = EC2_RESOURCE.Instance(INSTANCE_ID)
instance.start()
print(f'Starting EC2 instance: {instance.id}')
instance.wait_until_running()
print(f'EC2 instance "{instance.id}" has been started')

Starting EC2 instance: i-0586e11281d4b02a2
EC2 instance "i-0586e11281d4b02a2" has been started


In [4]:
# Connect to instance to run process 
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
privkey = paramiko.RSAKey.from_private_key_file('ds_eu_west2_2.pem') # Works only with local pem file
ssh.connect(hostname=serviceIP, username='ubuntu', pkey=privkey)

In [5]:
process_statement = "python3 remote_run_test.py"
process_statement = 'cd services/milestones_chains && python3 service.py {f} {e} {t} {r}'\
.format(f=data_file_name, e=experiment, t=tasks, r=results)
stdin, stdout, stderr = ssh.exec_command(process_statement)
#if stderr:
#    print('Run attempt encountered error:\n', stderr.readlines())
print('calculation finished')
# Stop compute instance
response = EC2_CLIENT.stop_instances(InstanceIds=[INSTANCE_ID], DryRun=False)

Run attempt encountered error:
 []
calculation finished


In [6]:
# Prepare results
S3_RESOURCE.Bucket(results_bucket).download_file(experiment, 'experiment_zipped')
with ZipFile('experiment_zipped', 'r') as zipObj:
   zipObj.extractall(path=experiment_dir)
results_files = os.listdir(experiment_dir)
if len(results_files) == 1:
    file = results_files[0]
    df = pd.read_parquet(file)
    df.to_excel(os.path.join(experiment_dir, 'results.xlsx'), index=False)
print('The results have been downloaded to {e}'.format(experiment_dir))