In [2]:
import subprocess
import tempfile
import os
import pickle

def run_vsflow(filename_and_content):
    input_file_name, file_content = filename_and_content

    print("got here")
    # Write the file content to a temporary input file
    with tempfile.NamedTemporaryFile(delete=False, mode="w+", suffix=".sdf") as tmp_input_file:
        tmp_input_file.write(file_content)
        tmp_input_file.flush()
        input_file_path = tmp_input_file.name

    # Create a temporary output file
    with tempfile.NamedTemporaryFile(delete=False, mode="wb+", suffix=".vsdb") as tmp_output_file:
        temp_file_path = os.path.abspath(tmp_output_file.name)

    # The vsflow command uses the temporary input file
    command = ["vsflow", "preparedb", "-i", input_file_path, "-o", temp_file_path, "-s", "-can", "-np", "4"]
    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    pickle_data = None

    if result.returncode == 0:
        print("vsflow executed successfully.")
        if os.path.getsize(temp_file_path) > 0:
            with open(temp_file_path, 'rb') as f:
                pickle_data = pickle.dumps(f.read())
        else:
            print(f"The output file {temp_file_path} is empty.")
    else:
        print("vsflow failed to execute:")
        print(result.stdout)
        print(result.stderr)
    try:
        os.remove(input_file_path)
    except OSError as e:
        print(f"Error: {input_file_path} : {e.strerror}")

    return input_file_name, pickle_data

In [3]:
destination_directory = "outputs"

input_files = ["fda.sdf",
               "fda copy.sdf", 
               "fda copy 2.sdf", 
               "fda copy 3.sdf", 
               "fda copy 4.sdf", 
               "fda copy 5.sdf",
               "fda copy 6.sdf",
               "fda copy 7.sdf",
               "fda copy 8.sdf",
               "fda copy 9.sdf",
               "fda copy 10.sdf",
               "fda copy 11.sdf",
               "fda copy 12.sdf",
               "fda copy 13.sdf",
               "fda copy 14.sdf",
               "fda copy 15.sdf",
               "fda copy 16.sdf",
               "fda copy 17.sdf",
               "fda copy 18.sdf",
               "fda copy 19.sdf",
               ]



file_contents_with_names = []
for input_file in input_files:
    with open(input_file, 'r') as f:
        file_content = f.read()
        file_contents_with_names.append((input_file, file_content))

In [3]:
for pickle_object in  map(run_vsflow, file_contents_with_names):
    input_file_name, object = pickle_object
    with open(os.path.join(destination_directory, os.path.splitext(input_file_name)[0] + ".vsdb"), 'wb') as out_file:
        out_file.write(object)
    break

got here
vsflow executed successfully.


In [1]:
import coiled

coiled.create_software_environment(
    name="vsflow_env",
    conda={
        "channels": ["conda-forge"],
        "dependencies": ["python=3.9", 
                         "dask[complete]", 
                         "coiled",
                         "rdkit",
                         "fpdf",
                         "pdfrw",
                         "xlsxwriter",
                         "xlrd",
                         "pymol-open-source",
                         "molvs",
                         "matplotlib",
        ]
    },
    pip=[
        "git+https://github.com/czodrowskilab/VSFlow.git",
    ],
)


--- Logs from remote build follow ---
[2023-11-10 15:24:45,663][INFO    ][cloud-env.build] Downloading environment definition
[2023-11-10 15:24:45,770][INFO    ][cloud-env.build] Installing environment.yaml: 
channels:
  - conda-forge
dependencies:
  - python=3.9
  - dask[complete]
  - coiled
  - rdkit
  - fpdf
  - pdfrw
  - xlsxwriter
  - xlrd
  - pymol-open-source
  - molvs
  - matplotlib
[2023-11-10 15:24:45,770][INFO    ][cloud-env.subproc] /bin/micromamba create -f /tmp/tmpmv614r5q.yml -r /opt/coiled/rt -p /opt/coiled/env -y --no-pyc --json
[2023-11-10 15:25:24,670][INFO    ][cloud-env.subproc] /bin/micromamba -r /opt/coiled/rt clean -a
                                           __
          __  ______ ___  ____ _____ ___  / /_  ____ _
         / / / / __ `__ \/ __ `/ __ `__ \/ __ \/ __ `/
        / /_/ / / / / / / /_/ / / / / / / /_/ / /_/ /
       / .___/_/ /_/ /_/\__,_/_/ /_/ /_/_.___/\__,_/
      /_/
Collect information..
Cleaning index cache..
Cleaning lock files..
  Package 

{'id': 48097,
 'created': '2023-11-10T15:24:42.763159+00:00',
 'updated': '2023-11-10T15:24:42.767724+00:00',
 'name': 'vsflow_env',
 'spec_count': 1,
 'latest_spec': {'id': 50055,
  'created': '2023-11-10T15:24:42.766814+00:00',
  'updated': '2023-11-10T15:24:42.775419+00:00',
  'md5': '461dd4b5477824e7dcdbc9243d3753ae',
  'gpu_enabled': False,
  'latest_build': {'id': 39319,
   'created': '2023-11-10T15:24:42.771852+00:00',
   'updated': '2023-11-10T15:24:42.773326+00:00',
   'state': 'queued',
   'reason': 'Pending'},
  'container_uri': None,
  'architecture': 'x86_64'}}

In [4]:

cluster = coiled.Cluster(n_workers=10,
                         name = "testing",
                         software = "vsflow_env")
client = cluster.get_client()


# from dask.distributed import LocalCluster

# cluster = LocalCluster(n_workers=3)

# client = cluster.get_client()

Output()

In [5]:
client

0,1
Connection method: Cluster object,Cluster type: coiled.Cluster
Dashboard: https://cluster-sngum.dask.host/FNuDrQUfp9kVBCNQ/status,

0,1
Dashboard: https://cluster-sngum.dask.host/FNuDrQUfp9kVBCNQ/status,Workers: 10
Total threads: 40,Total memory: 148.43 GiB

0,1
Comm: tls://10.0.43.49:8786,Workers: 10
Dashboard: http://10.0.43.49:8787/status,Total threads: 40
Started: 2 minutes ago,Total memory: 148.43 GiB

0,1
Comm: tls://10.0.37.10:34615,Total threads: 4
Dashboard: http://10.0.37.10:8787/status,Memory: 14.85 GiB
Nanny: tls://10.0.37.10:39539,
Local directory: /scratch/dask-scratch-space/worker-b95k9p3q,Local directory: /scratch/dask-scratch-space/worker-b95k9p3q

0,1
Comm: tls://10.0.39.190:36257,Total threads: 4
Dashboard: http://10.0.39.190:8787/status,Memory: 14.85 GiB
Nanny: tls://10.0.39.190:37891,
Local directory: /scratch/dask-scratch-space/worker-qo0aqxfg,Local directory: /scratch/dask-scratch-space/worker-qo0aqxfg

0,1
Comm: tls://10.0.36.184:41147,Total threads: 4
Dashboard: http://10.0.36.184:8787/status,Memory: 14.83 GiB
Nanny: tls://10.0.36.184:45821,
Local directory: /scratch/dask-scratch-space/worker-o6dc7wka,Local directory: /scratch/dask-scratch-space/worker-o6dc7wka

0,1
Comm: tls://10.0.44.54:34775,Total threads: 4
Dashboard: http://10.0.44.54:8787/status,Memory: 14.85 GiB
Nanny: tls://10.0.44.54:36499,
Local directory: /scratch/dask-scratch-space/worker-kun83zqz,Local directory: /scratch/dask-scratch-space/worker-kun83zqz

0,1
Comm: tls://10.0.45.224:35195,Total threads: 4
Dashboard: http://10.0.45.224:8787/status,Memory: 14.84 GiB
Nanny: tls://10.0.45.224:46575,
Local directory: /scratch/dask-scratch-space/worker-es35d04q,Local directory: /scratch/dask-scratch-space/worker-es35d04q

0,1
Comm: tls://10.0.42.109:35263,Total threads: 4
Dashboard: http://10.0.42.109:8787/status,Memory: 14.84 GiB
Nanny: tls://10.0.42.109:43885,
Local directory: /scratch/dask-scratch-space/worker-kqjj7hj1,Local directory: /scratch/dask-scratch-space/worker-kqjj7hj1

0,1
Comm: tls://10.0.47.133:42295,Total threads: 4
Dashboard: http://10.0.47.133:8787/status,Memory: 14.84 GiB
Nanny: tls://10.0.47.133:38567,
Local directory: /scratch/dask-scratch-space/worker-7fcelh5t,Local directory: /scratch/dask-scratch-space/worker-7fcelh5t

0,1
Comm: tls://10.0.42.165:42203,Total threads: 4
Dashboard: http://10.0.42.165:8787/status,Memory: 14.85 GiB
Nanny: tls://10.0.42.165:39803,
Local directory: /scratch/dask-scratch-space/worker-478zymcp,Local directory: /scratch/dask-scratch-space/worker-478zymcp

0,1
Comm: tls://10.0.42.159:46859,Total threads: 4
Dashboard: http://10.0.42.159:8787/status,Memory: 14.84 GiB
Nanny: tls://10.0.42.159:36535,
Local directory: /scratch/dask-scratch-space/worker-dt2m60v6,Local directory: /scratch/dask-scratch-space/worker-dt2m60v6

0,1
Comm: tls://10.0.33.210:33727,Total threads: 4
Dashboard: http://10.0.33.210:8787/status,Memory: 14.84 GiB
Nanny: tls://10.0.33.210:43867,
Local directory: /scratch/dask-scratch-space/worker-w3s1qpj7,Local directory: /scratch/dask-scratch-space/worker-w3s1qpj7


In [6]:
file_contents_with_names = client.scatter(file_contents_with_names)

pickle_object_futures = client.map(run_vsflow, file_contents_with_names)

from dask.distributed import as_completed

for future in as_completed(pickle_object_futures):
    input_file_name, object = future.result()
    with open(os.path.join(destination_directory, os.path.splitext(input_file_name)[0] + ".vsdb"), 'wb') as out_file:
        out_file.write(object)

TypeError: a bytes-like object is required, not 'NoneType'

In [7]:
# client.restart()