In [1]:
import boto3
import cloudknot as ck
import numpy as np
import pandas as pd # for pd.to_datetime and the timedelta type
import pickle # to save results

from datetime import datetime
from time import sleep

In [2]:
def solve_heateq_2d(t_top=100.0, t_bottom=0.0,
                    t_left=0.0, t_right=0.0, side_len=10,
                    max_iter=10000, rtol=1e-4, atol=1e-7):
    """Solve steady-state 2D heat equation by Gauss-Seidel Method
    
    This is a pedagogical or benchmarking tool only. There are
    better ways to solve the 2D heat equation if that's all you
    want. There are even better ways to implement Gauss-Seidel.
    
    Parameters
    ----------
    max_iter : int, default=10000
        Maximum iteration number for Gauss-Seidel Method
        
    t_top : float, default=100.0
        Dirichlet boundary condition for the top of the plate

    t_bottom : float, default=0.0
        Dirichlet boundary condition for the bottom of the plate
    
    t_left : float, default=0.0
        Dirichlet boundary condition for the left of the plate
    
    t_right : float, default=0.0
        Dirichlet boundary condition for the right of the plate

    side_len : int, default=10
        Number of points on one side of the 2D grid
        
    rtol : float, default=1e-4
        Relative convergence tolerence for early exit of Gauss-Seidel loop

    atol : float, default=1e-7
        Absolute convergence tolerence for early exit of Gauss-Seidel loop
        
    Returns
    -------
    collections.namedtuple
        namedtuple with elements
        temp - final temperature
        iteration - convergence iteration number
            (or max_iter if convergence not reached)
    """
    import numpy as np
    
    # Initial guess of interior grid
    t_init = np.mean([t_top, t_bottom, t_left, t_right])

    # Create grid of temps
    t = np.ones((side_len, side_len), dtype=np.float64) * t_init

    # Set Boundary condition
    t[-1, :] = t_top
    t[0, :] = t_bottom
    t[:, -1] = t_right
    t[:, 0] = t_left
    
    # Gauss-Seidel Loop
    t_old = np.copy(t)
    for iteration in range(0, max_iter):
        for i in range(1, side_len-1):
            for j in range(1, side_len-1):
                t[i, j] = 0.25 * (t[i+1][j] + t[i-1][j] + t[i][j+1] + t[i][j-1])

        if np.allclose(t, t_old):
            break
        else:
            t_old = np.copy(t)
    
    return {'temperature': t, 'iteration': iteration}

In [4]:
t_start = datetime.now()
knot = ck.Knot(
    name='test-heat-eq',
    func=solve_heateq_2d,
    min_vcpus=512,
    desired_vcpus=2048,
    max_vcpus=4096
)
t_stop = datetime.now()
print('First time Knot initialization time: ', t_stop - t_start)



First time Knot initialization time:  0:00:33.452684


In [5]:
t_start = datetime.now()
knot = ck.Knot(
    name='test-heat-eq',
    func=solve_heateq_2d,
    min_vcpus=512,
    desired_vcpus=2048,
    max_vcpus=4096
)
t_stop = datetime.now()
print('Subsequent Knot initialization time: ', t_stop - t_start)



Subsequent Knot initialization time:  0:00:02.530435


In [6]:
futures = {}
for npoints in np.power(2, np.arange(1, 13)):
    print('npoints = {npoints:d}'.format(npoints=int(npoints)))
    # arg tuples are (t_top, t_bottom, t_left, t_right, side_len, max_iter, rtol, atol)
    args = [(t, 0.0, 0.0, 0.0, 10, 10000, 1e-4, 1e-7) for t in np.linspace(0, 100, int(npoints))]
    f = knot.map(args, starmap=True)
    futures[npoints] = f
    f.result()

npoints = 2
npoints = 4
npoints = 8
npoints = 16
npoints = 32
npoints = 64
npoints = 128
npoints = 256
npoints = 512
npoints = 1024
npoints = 2048
npoints = 4096


In [7]:
batch = boto3.client('batch', region_name='us-east-2')
def get_job_time(parent_job_id):
    parent_job = batch.describe_jobs(jobs=[parent_job_id])
    job_size = parent_job['jobs'][0]['arrayProperties']['size']
    start_time = pd.to_datetime(parent_job['jobs'][0]['createdAt'], unit='ms')
    stop_times = []
    for i in range(job_size):
        child_job_id = parent_job_id + ':{i:d}'.format(i=i)
        child_job = batch.describe_jobs(jobs=[child_job_id])
        stop_times.append(child_job['jobs'][0]['stoppedAt'])
    
    stop_times = pd.to_datetime(stop_times, unit='ms')
    max_delta = (stop_times - start_time).max()
    return max_delta

In [8]:
knot.jobs

[<cloudknot.aws.batch.BatchJob at 0x10e08db00>,
 <cloudknot.aws.batch.BatchJob at 0x109fb0208>,
 <cloudknot.aws.batch.BatchJob at 0x10ec8b780>,
 <cloudknot.aws.batch.BatchJob at 0x10a1426a0>,
 <cloudknot.aws.batch.BatchJob at 0x10a2f0898>,
 <cloudknot.aws.batch.BatchJob at 0x10a4242b0>,
 <cloudknot.aws.batch.BatchJob at 0x10e291748>,
 <cloudknot.aws.batch.BatchJob at 0x10f2535f8>,
 <cloudknot.aws.batch.BatchJob at 0x10e2a6438>,
 <cloudknot.aws.batch.BatchJob at 0x107c97b00>,
 <cloudknot.aws.batch.BatchJob at 0x10e451dd8>,
 <cloudknot.aws.batch.BatchJob at 0x10df50ba8>]

In [9]:
default_knot_job_ids = [job.job_id for job in knot.jobs]

In [10]:
exec_times = [get_job_time(jid) for jid in default_knot_job_ids]

In [11]:
cloudknot_nargs_scaling = []
for npoints, exec_time in zip(np.power(2, np.arange(1, 13)), exec_times):
    cloudknot_nargs_scaling.append({
        'npoints': npoints,
        'max_job_time': exec_time
    })
cloudknot_nargs_scaling

[{'max_job_time': Timedelta('0 days 00:01:02.008000'), 'npoints': 2},
 {'max_job_time': Timedelta('0 days 00:00:45.058000'), 'npoints': 4},
 {'max_job_time': Timedelta('0 days 00:00:43.976000'), 'npoints': 8},
 {'max_job_time': Timedelta('0 days 00:00:26.312000'), 'npoints': 16},
 {'max_job_time': Timedelta('0 days 00:00:23.610000'), 'npoints': 32},
 {'max_job_time': Timedelta('0 days 00:00:30.847000'), 'npoints': 64},
 {'max_job_time': Timedelta('0 days 00:00:43.013000'), 'npoints': 128},
 {'max_job_time': Timedelta('0 days 00:01:16.391000'), 'npoints': 256},
 {'max_job_time': Timedelta('0 days 00:02:09.463000'), 'npoints': 512},
 {'max_job_time': Timedelta('0 days 00:03:42.536000'), 'npoints': 1024},
 {'max_job_time': Timedelta('0 days 00:06:42.347000'), 'npoints': 2048},
 {'max_job_time': Timedelta('0 days 00:11:02.090000'), 'npoints': 4096}]

In [12]:
with open('cloudknot_nargs_scaling.pkl', 'wb') as fp:
    pickle.dump(cloudknot_nargs_scaling, fp)

In [13]:
side_lens = np.array([10, 25, 50, 100, 125, 150, 175])

In [14]:
futures = {}
for side_len in side_lens:
    print('side_len = {sl:d}'.format(sl=int(side_len)))
    # arg tuples are (t_top, t_bottom, t_left, t_right, side_len, max_iter, rtol, atol)
    args = [(t, 0.0, 0.0, 0.0, side_len, 10000, 1e-4, 1e-7) for t in np.linspace(0, 100, 5)]
    f = knot.map(args, starmap=True)
    futures[side_len] = f
    f.result()

side_len = 10
side_len = 25
side_len = 50
side_len = 100
side_len = 125
side_len = 150
side_len = 175


In [15]:
default_knot_job_ids = [job.job_id for job in knot.jobs[12:]]
exec_times = [get_job_time(jid) for jid in default_knot_job_ids]

cloudknot_syssize_scaling = []
for side_len, exec_time in zip(side_lens, exec_times):
    cloudknot_syssize_scaling.append({
        'side_len': side_len,
        'max_job_time': exec_time
    })
cloudknot_syssize_scaling

[{'max_job_time': Timedelta('0 days 00:00:54.345000'), 'side_len': 10},
 {'max_job_time': Timedelta('0 days 00:00:30.709000'), 'side_len': 25},
 {'max_job_time': Timedelta('0 days 00:00:37.784000'), 'side_len': 50},
 {'max_job_time': Timedelta('0 days 00:01:12.454000'), 'side_len': 100},
 {'max_job_time': Timedelta('0 days 00:02:09.751000'), 'side_len': 125},
 {'max_job_time': Timedelta('0 days 00:03:43.170000'), 'side_len': 150},
 {'max_job_time': Timedelta('0 days 00:06:40.223000'), 'side_len': 175}]

In [16]:
with open('cloudknot_syssize_scaling.pkl', 'wb') as fp:
    pickle.dump(cloudknot_syssize_scaling, fp)

In [17]:
knot.clobber(clobber_pars=True, clobber_repo=True, clobber_image=True)

