In [None]:
import requests
import time
from fair_research_login import NativeClient

client = NativeClient(client_id='7414f0b4-7d05-4bb6-bb00-076fa3f17cf5')
tokens = client.login(
    requested_scopes=['urn:globus:auth:scope:transfer.api.globus.org:all',
                      "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all", 
                      'email', 'openid'],
    no_local_server=True,
    no_browser=True)

transfer_token = tokens['transfer.api.globus.org']['access_token']
funcx_token = tokens['funcx_service']['access_token']
headers = {'Authorization': f"Bearer {funcx_token}",'Transfer': transfer_token, 'FuncX': f"{funcx_token}"}
print(f"Headers: {headers}")

In [None]:
from xtracthub.xcs import XtractConnection
xconn = XtractConnection(funcx_token)

In [None]:
import matplotlib.pyplot as plt
import csv

### Plotting Threads vs. Time for Sigularity and Docker
For this experiment I will measure the time that it takes for XCS to build a fixed number of containers for various numbers of threads. I will then scale up the number of containers. The upload speed will be capped in order to prevent too much variance for Docker.

In [None]:
import os
csv_name = f'thread_time_results_v2.csv'
if os.path.exists("./" + csv_name):
    print(f"{csv_name} already exists, do you want to overwrite?")
    if input() == "no":
        csv_name = None
    else:
        pass

with open(csv_name, mode='w') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(["type", "threads", "containers", "time", "fails"])

In [None]:
import datetime
import time
import uuid
from IPython.display import clear_output

with open(csv_name, mode='a') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    for containers in range(10, 60, 10):
        for threads in range(5, 55, 5):
            print(f"Containers {containers}")
            print(f"Threads {threads}")
            requests.post("http://149.165.168.132/change_thread", json={"threads": threads})
            print("Starting Singularity")

            definition_ids = []
            for i in range(containers):
                file_name = "my_test.def"
                file_path = "./examples/my_example.txt"
                definition_id = xconn.register_container(file_name, file_path)
                definition_ids.append(definition_id)

            build_ids = []
            start_time = datetime.datetime.now()
            for idx, definition_id in enumerate(definition_ids):
                build_id = xconn.build(definition_id, "singularity", "my_test_{}.sif".format(idx))
                build_ids.append(build_id)
                print(build_id)

            keep_printing = True
            while keep_printing:
                clear_output(True)
                is_done = []
                statuses = []
                finish_times = []
                for idx, build_id in enumerate(build_ids):
                    status = xconn.get_status(build_id)
                    print(status)
                    if status["build_status"] == "success":
                        is_done.append(True)
                        finish_times.append(status["build_time"])
                        statuses.append("success")
                    elif status["build_status"] == "failed":
                        is_done.append(True)
                        statuses.append("failed")
                    else:
                        is_done.append(False)
                if all(is_done):
                    keep_printing = False
                time.sleep(1)
            
            finish_times = list(map(lambda x: datetime.datetime.strptime(x, "%m/%d/%Y, %H:%M:%S"), finish_times))
            finish_times = list(map(lambda x: x - datetime.timedelta(hours=1), finish_times))
            total_time = max(list(map(lambda x: (x - start_time).total_seconds(), finish_times)))
            
            csv_writer.writerow(["singularity", threads, containers, total_time, statuses.count("failed")])

In [None]:
import time
import uuid
from IPython.display import clear_output
results = []

with open(f'thread_time_results.csv', mode='a') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    
    containers=10
    threads=5
    print("Starting Docker")
    requests.post("http://149.165.168.132/change_thread", json={"threads": threads})
    definition_ids = []
    for i in range(containers):
        file_name = "Dockerfile"
        file_path = "./examples/matio_dockerfile"
        definition_id = xconn.register_container(file_name, file_path)
        definition_ids.append(definition_id)

    build_ids = []
    for idx, definition_id in enumerate(definition_ids):
        build_id = xconn.build(definition_id, "docker", "my_test_{}".format(idx))
        build_ids.append(build_id)
        print(build_id)

    keep_printing = True
    start_time = time.time()
    while keep_printing:
        clear_output(True)
        is_done = []
        statuses = []
        for idx, build_id in enumerate(build_ids):
            status = xconn.get_status(build_id)
            print(status)
            if status["build_status"] == "success":
                is_done.append(True)
                statuses.append("success")
            elif status["build_status"] == "failed":
                is_done.append(True)
                statuses.append("failed")
            else:
                is_done.append(False)
        if all(is_done):
            keep_printing = False
        time.sleep(1)

    csv_writer.writerow(["docker", threads, containers, time.time() - start_time, statuses.count("failed")])
    print("Starting Singularity")

    definition_ids = []
    for i in range(containers):
        file_name = "my_test.def"
        file_path = "./examples/my_example.txt"
        definition_id = xconn.register_container(file_name, file_path)
        definition_ids.append(definition_id)

    build_ids = []
    for idx, definition_id in enumerate(definition_ids):
        build_id = xconn.build(definition_id, "singularity", "my_test_{}.sif".format(idx))
        build_ids.append(build_id)
        print(build_id)

    keep_printing = True
    start_time = time.time()
    while keep_printing:
        clear_output(True)
        is_done = []
        statuses = []
        for idx, build_id in enumerate(build_ids):
            status = xconn.get_status(build_id)
            print(status)
            if status["build_status"] == "success":
                is_done.append(True)
                statuses.append("success")
            elif status["build_status"] == "failed":
                is_done.append(True)
                statuses.append("failed")
            else:
                is_done.append(False)
        if all(is_done):
            keep_printing = False
        time.sleep(1)

    csv_writer.writerow(["singularity", threads, containers, time.time() - start_time, statuses.count("failed")])

In [None]:
data = []
with open('thread_time_results_v2.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        data.append(row)
data.pop(0)
data = list(filter(lambda x: x[0] == "singularity", data))

for i in list(set([x[2] for x in data])):
    cont_data = list(filter(lambda x: x[2] == i, data))
    x = [i[1] for i in cont_data]
    y = [i[3] for i in cont_data]
    print(y)
    y = list(map(int, list(map(float, y))))
    print(y)
    plt.scatter(x, y)
    plt.title(f"{i} containers, Singularity")
    plt.xlabel("Threads")
    plt.ylabel("Time")
    plt.show()

In [None]:
definition_ids = []

for i in range(10):
    file_name = "Dockerfile"
    file_path = "./examples/matio_dockerfile"
    definition_id = xconn.register_container(file_name, open(file_path, "rb"))
    definition_ids.append(definition_id)
    print(definition_id)

In [None]:
build_ids = []

for idx, definition_id in enumerate(definition_ids):
    build_id = xconn.build(definition_id, "docker", "my_test_{}".format(idx))
    build_ids.append(build_id)
    print(build_id)


print(build_ids)

In [None]:
keep_printing = True
import time
from IPython.display import clear_output
while keep_printing:
    clear_output(True)
    is_done = []
    for idx, build_id in enumerate(build_ids):
        status = xconn.get_status(build_id)
        print(status)
        if status["build_status"] in ["success", "failed"]:
            is_done.append(True)
        else:
            is_done.append(False)
        print(time.time())
    if all(is_done):
        keep_printing = False
    time.sleep(5)

In [None]:
import os
t0 = time.time()
# Example for pulling a container
for build_id in build_ids:
    container_path = os.path.join(os.path.abspath("."), "my_test.tar")
    response = xconn.pull(build_id, container_path)

    if os.path.exists(container_path):
        print("Successfully pulled container to {}".format(container_path))
    else:
        print(response)
    print("Pulled in {}".format(time.time() - t0))

In [None]:
t0 = time.time()
build_ids = []
# Example for building a Docker container with a git repo
for i in range(10):
    git_repo = "https://github.com/rewong03/xtract_file_service"
    container_name = f"xfs{i}"
    build_id = xconn.repo2docker(container_name, git_repo=git_repo)
    build_ids.append(build_id)
    print(build_id)
    print("Response received in {}".format(time.time() - t0))

In [None]:
t0 = time.time()
# Example for getting the status of a container
status = xconn.get_status(build_id)
print(status)
print("Got status in {}".format(time.time() - t0))

In [None]:
build_ids = ['26bda2ab-1bd1-4bb1-be04-20f4e243f47b']