In [58]:
import os
import sys
import importlib

# this can disapear once plex is a pip package
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
import plex.sdk
importlib.reload(plex.sdk)

os.environ["PLEX_ACCESS_TOKEN"] = "mellon"
os.environ["PLEX_ENV"] = "stage"
os.environ["DOCKER_DEFAULT_PLATFORM"] = "linux/x86_64/v8"

In [63]:
import csv

def create_pdbind_io_dict(csv_path, pdbbind_data_root, size=float('inf')):
    io_graph = []

    i = 0
    with open(csv_path, 'r') as csvfile:
        csvreader = csv.DictReader(csvfile)

        for row in csvreader:
            protein_path = os.path.join(pdbbind_data_root, row['protein_path'])
            ligand_path = os.path.join(pdbbind_data_root, row['ligand_description'])

            if not os.path.exists(protein_path) or not os.path.exists(ligand_path):
                print(f"Skipping row {row['complex_name']} due to missing file(s).")
                continue

            io_subgraph = [{
                "tool": "tools/equibind.json",
                "inputs": {
                    "protein": {
                        "class": "File",
                        "filepath": protein_path
                    },
                    "small_molecule": {
                        "class": "File",
                        "filepath": ligand_path
                    }
                },
                "outputs": {
                    "best_docked_small_molecule": {
                        "class": "File",
                        "filepath": ""
                    },
                    "protein": {
                        "class": "File",
                        "filepath": ""
                    }
                },
                "state": "created",
                "errMsg": ""
            },
            {
                "tool": "tools/oddt.json",
                "inputs": {
                    "protein": {
                        "class": "File",
                        "filepath": f"${{{i * 3}[protein]}}"
                    },
                    "small_molecule": {
                        "class": "File",
                        "filepath": f"${{{i * 3}[best_docked_small_molecule]}}"
                    }
                },
                "outputs": {
                    "best_docked_small_molecule": {
                        "class": "File",
                        "filepath": ""
                    },
                    "protein": {
                        "class": "File",
                        "filepath": ""
                    }
                },
                "state": "created",
                "errMsg": "" 
            },
            {
                "outputs": {
                    "scored_small_molecule": {
                        "class": "File",
                        "filepath": ""
                    },
                    "scores": {
                        "class": "File",
                        "filepath": ""
                    }
                },
                "tool": "tools/openbabel/rmsd-openbabel.json",
                "inputs": {
                    "reference_structure": {
                        "class": "File",
                        "filepath": f"${{{i * 3}[protein]}}"
                    },
                    "comparison_structure": {
                        "class": "File",
                        "filepath": f"${{{i * 3}[best_docked_small_molecule]}}"
                    }
                },
                "state": "created",
                "errMsg": ""
            }]

            print(i)
            i += 1
            io_graph += io_subgraph
            if i >= size:
                break
    return io_graph

# for local mac
io_graph = create_pdbind_io_dict("/Users/mcmenemy/Documents/diffdock_testdata.csv", "/Users/mcmenemy/Documents", 2)
print(f"Created IO Graph of length {len(io_graph)}")
# for linux
# io_graph = create_pdbind_io_dict("/home/ubuntu/datasets/diffdock_testdata.csv", "/home/ubuntu", 2)


0
1
Created IO Graph of length 6


In [64]:
from plex.sdk import run_plex

run_plex(io_graph, local=True, verbose=True, concurrency=2)

Plex version (v0.6.1) up to date.
BACALHAU_API_HOST not set, using default host
toolPath 
Running IPWL io path
Created working directory:  /Users/mcmenemy/code/plex/736a551f-f9ed-4d31-bf35-d3916173f37a
Reading IO Entries from:  /var/folders/yh/vwqzmpsd55xchsjvhxvbnqvh0000gn/T/tmput34gslo/io_data.json
Initialized IO file at:  /Users/mcmenemy/code/plex/736a551f-f9ed-4d31-bf35-d3916173f37a/io.json
Processing IO Entries
/Users/mcmenemy/code/plex/736a551f-f9ed-4d31-bf35-d3916173f37a
/Users/mcmenemy/code/plex/736a551f-f9ed-4d31-bf35-d3916173f37a/io.json
Starting to process IO entry 5 
Starting to process IO entry 2 
IO Subgraph at 5 is still waiting on inputs to complete 
Success processing IO entry 5 
Starting to process IO entry 3 
IO Subgraph at 2 is still waiting on inputs to complete 
Success processing IO entry 2 
Starting to process IO entry 4 
IO Subgraph at 4 is still waiting on inputs to complete 
Success processing IO entry 4 
Starting to process IO entry 0 
Generated docker cmd: 