In [33]:
import os
import sys
import importlib

# this can disapear once plex is a pip package
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
import plex.sdk
importlib.reload(plex.sdk)

os.environ["PLEX_ACCESS_TOKEN"] = "mellon"
os.environ["PLEX_ENV"] = "stage"

In [34]:
import csv
import os
import json

def create_pdbind_io_dict(csv_path):
    io_data = []
    
    with open(csv_path, 'r') as csvfile:
        csvreader = csv.DictReader(csvfile)
        
        for row in csvreader:
            protein_path = os.path.join("/home/ubuntu/", row['protein_path'])
            ligand_path = os.path.join("/home/ubuntu/", row['ligand_description'])
            
            if not os.path.exists(protein_path) or not os.path.exists(ligand_path):
                print(f"Skipping row {row['complex_name']} due to missing file(s).")
                continue
            
            entry = {
                "tool": "tools/equibind.json",
                "inputs": {
                    "protein": {
                        "class": "File",
                        "filepath": protein_path
                    },
                    "small_molecule": {
                        "class": "File",
                        "filepath": ligand_path
                    }
                },
                "outputs": {
                    "best_docked_small_molecule": {
                        "class": "File",
                        "filepath": ""
                    },
                    "protein": {
                        "class": "File",
                        "filepath": ""
                    }
                },
                "state": "created",
                "errMsg": ""
            }
            
            io_data.append(entry)
    
    return io_data

# Example usage
csv_path = '/home/ubuntu/datasets/diffdock_testdata.csv'
io_sig = create_pdbind_io_dict(csv_path)


In [35]:
from plex.sdk import run_plex

run_plex(io_sig, concurrency=2)

Plex version (v0.6.1) up to date.
BACALHAU_API_HOST not set, using default host
toolPath 
Running IPWL io path
Created working directory:  /home/ubuntu/plex/4ce7171d-87c8-49ae-b728-cb480fded82a
Reading IO Entries from:  /tmp/tmp5ws3tt5g/io_data.json
Initialized IO file at:  /home/ubuntu/plex/4ce7171d-87c8-49ae-b728-cb480fded82a/io.json
Processing IO Entries
/home/ubuntu/plex/4ce7171d-87c8-49ae-b728-cb480fded82a
/home/ubuntu/plex/4ce7171d-87c8-49ae-b728-cb480fded82a/io.json
Starting to process IO entry 7 
Starting to process IO entry 3 
Job running...
Bacalhau job id: 3ca90601-2bfb-40f0-82ae-6c0ab2eea9dc 
////ðŸŒ±____////
Job running...
Bacalhau job id: 6eaec00a-d692-40f0-b73a-00dc62c66b52 
////ðŸŒ±____////
////_ðŸŒ±___////
////_ðŸŒ±___////
////__ðŸŒ±__////
////__ðŸŒ±__////
////___ðŸŒ±_////
////___ðŸŒ±_////
////____ðŸŒ±////
Computing default go-libp2p Resource Manager limits based on:
    - 'Swarm.ResourceMgr.MaxMemory': "32 GB"
    - 'Swarm.ResourceMgr.MaxFileDescriptors': 524288

Appl

In [1]:
# generating statistics on the success rate of the runs
import json
import pandas as pd

def get_state_counts(json_filepath):
    # Load the JSON data from the file
    with open(json_filepath, 'r') as f:
        data = json.load(f)
    
    # Extract the "state" and "errMsg" values from each JSON object
    state_errMsg_list = [{'state': item['state'], 'errMsg': item['errMsg']} for item in data]
    
    # Convert the list of dictionaries to a Pandas DataFrame
    df = pd.DataFrame(state_errMsg_list)
    
    # Count the occurrences of each unique "state" and "errMsg" combination
    counts_df = df.groupby(['state', 'errMsg']).size().reset_index(name='count')
    
    return counts_df, df

# Example usage
json_filepath = '/home/ubuntu/plex/0e1b24c5-870e-4a58-9b61-a302cecbbcd0/io.json'
state_counts_df, complete_df = get_state_counts(json_filepath)
print(state_counts_df)

        state                                             errMsg  count
0   completed                                                       289
1      failed  failed to write to '/home/ubuntu/plex/0e1b24c5...      1
2      failed  failed to write to '/home/ubuntu/plex/0e1b24c5...      1
3      failed  failed to write to '/home/ubuntu/plex/0e1b24c5...      1
4      failed  failed to write to '/home/ubuntu/plex/0e1b24c5...      1
5      failed  failed to write to '/home/ubuntu/plex/0e1b24c5...      1
6      failed  failed to write to '/home/ubuntu/plex/0e1b24c5...      1
7      failed  failed to write to '/home/ubuntu/plex/0e1b24c5...      1
8      failed  failed to write to '/home/ubuntu/plex/0e1b24c5...      1
9      failed  failed to write to '/home/ubuntu/plex/0e1b24c5...      1
10     failed  failed to write to '/home/ubuntu/plex/0e1b24c5...      1
11     failed  failed to write to '/home/ubuntu/plex/0e1b24c5...      1
12     failed  failed to write to '/home/ubuntu/plex/0e1b24c5...

In [6]:
complete_df[complete_df['state'] == 'failed']

Unnamed: 0,state,errMsg
0,failed,no output data found for: [best_docked_small_m...
8,failed,no output data found for: [best_docked_small_m...
11,failed,no output data found for: [best_docked_small_m...
19,failed,no output data found for: [best_docked_small_m...
39,failed,no output data found for: [best_docked_small_m...
...,...,...
355,failed,no output data found for: [best_docked_small_m...
356,failed,failed to write to '/home/ubuntu/plex/0e1b24c5...
358,failed,no output data found for: [best_docked_small_m...
361,failed,no output data found for: [best_docked_small_m...


In [10]:
def resubmit_failed_states(json_filepath):
    # Load the JSON data from the file
    with open(json_filepath, 'r') as f:
        data = json.load(f)
    
    # Filter the JSON list to include only entries with a failed state
    failed_entries = [entry for entry in data if entry['state'] == 'failed']
    
    # Create the io_sig object for each failed entry
    io_sig = []
    for entry in failed_entries:
        # Extract the relevant information from the JSON entry
        tool = entry['tool']
        inputs = entry['inputs']
        outputs = entry['outputs']
        state = 'created'  # Set the state to 'created' for resubmission
        errMsg = ''
        
        # Create a new entry for the io_sig object
        new_entry = {
            'tool': tool,
            'inputs': inputs,
            'outputs': outputs,
            'state': state,
            'errMsg': errMsg
        }
        
        # Append the new entry to the io_sig object
        io_sig.append(new_entry)
    
    return io_sig

# Example usage
json_filepath = '/home/ubuntu/plex/0e1b24c5-870e-4a58-9b61-a302cecbbcd0/io.json'
io_sig = resubmit_failed_states(json_filepath)


In [14]:
from plex.sdk import run_plex

run_plex(io_sig, concurrency=6)

Plex version (v0.6.1) up to date.
BACALHAU_API_HOST not set, using default host
toolPath 
Running IPWL io path
Created job directory:  /home/ubuntu/plex/da5d8e24-e72f-4be6-ab14-f65b0cc6ec70
Reading IO Entries from:  /tmp/tmp1a2k3gk6/io_data.json
Initialized IO file at:  /home/ubuntu/plex/da5d8e24-e72f-4be6-ab14-f65b0cc6ec70/io.json
Processing IO Entries
Starting to process IO entry 7 
Starting to process IO entry 3 
Starting to process IO entry 0 
Starting to process IO entry 1 
Starting to process IO entry 19 
Starting to process IO entry 13 
Job running...
////ðŸŒ±____////
Job running...
////ðŸŒ±____////
Job running...
////ðŸŒ±____////
Job running...
////ðŸŒ±____////
Job running...
Job running...
////ðŸŒ±____////
////ðŸŒ±____////
////_ðŸŒ±___////
////_ðŸŒ±___////
////_ðŸŒ±___////
////_ðŸŒ±___////
////_ðŸŒ±___////
////_ðŸŒ±___////
////__ðŸŒ±__////
////__ðŸŒ±__////
////__ðŸŒ±__////
////__ðŸŒ±__////
////__ðŸŒ±__////
////__ðŸŒ±__////
////___ðŸŒ±_////
////___ðŸŒ±_////
////___ðŸŒ±_////
///

In [9]:
print(complete_df)

         state                                             errMsg
0       failed  no output data found for: [best_docked_small_m...
1    completed                                                   
2    completed                                                   
3    completed                                                   
4    completed                                                   
..         ...                                                ...
358     failed  no output data found for: [best_docked_small_m...
359  completed                                                   
360  completed                                                   
361     failed  no output data found for: [best_docked_small_m...
362     failed  failed to write to '/home/ubuntu/plex/0e1b24c5...

[363 rows x 2 columns]


In [None]:
run_plex(io_sig, concurrency=6)