Skip to content

Commit

Permalink
Merge pull request #27 from nlesc-nano/dev
Browse files Browse the repository at this point in the history
make ceiba-cli generic
  • Loading branch information
felipeZ committed Feb 22, 2021
2 parents 6130a4d + 34707e5 commit e3d245a
Show file tree
Hide file tree
Showing 43 changed files with 347 additions and 427 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@
Change Log
##########


0.3.0 [Unreleased]
******************

Changed
-------
* Make library more generic by removing references to the smiles (#26)

New
---
* Accept jobs as a list of JSON objects (#26)

0.2.0 [Unreleased]
******************

Expand Down
85 changes: 21 additions & 64 deletions ceibacli/actions/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,96 +10,53 @@
import json
import logging
from collections import defaultdict
from typing import Any, DefaultDict
from typing import Any, DefaultDict, Dict, List

import numpy as np
import pandas as pd

from ..authentication import fetch_cookie
from ..client import query_server
from ..client.mutations import create_job_mutation
from ..client.queries import create_properties_query
from ..utils import (Options, generate_smile_identifier,
json_properties_to_dataframe)
from ..utils import Options, format_json, generate_identifier

logger = logging.getLogger(__name__)


def fetch_candidates(opts: Options) -> pd.DataFrame:
def retrieve_jobs(opts: Options) -> List[Dict[str, Any]]:
"""Retrieve candidates to compute from the server."""
query = create_properties_query(opts.target_collection)
reply = query_server(opts.web, query)
return json_properties_to_dataframe(reply["properties"])
with open(opts.jobs, 'r') as handler:
jobs = json.load(handler)

if not isinstance(jobs, list):
raise RuntimeError("Jobs must be a list of JSON objects")

return jobs


def add_jobs(opts: Options) -> None:
"""Add new jobs to the server."""
opts.cookie = fetch_cookie()
# Get the data to create the jobs
df_candidates = fetch_candidates(opts)
# Create the mutation to add the jobs in the server
mutations = (create_mutations(opts, smile) for smile in df_candidates["smile"])
logger.info("New Jobs:")
for query in mutations:
reply = query_server(opts.web, query)
logger.info(reply['createJob']['text'])
for job in retrieve_jobs(opts):
mutation = create_mutations(opts, job)
reply = query_server(opts.web, mutation)
logger.info({reply['createJob']['text']})


def create_mutations(opts: Options, smile: str) -> str:
"""Create a list of mutations with the new jobs."""
def create_mutations(opts: Options, job: Dict[str, Any]) -> str:
"""Create a mutations with the new job."""
job_info = defaultdict(lambda: "null") # type: DefaultDict[str, Any]
prop_info = defaultdict(lambda: "null") # type: DefaultDict[str, Any]
metadata = format_json(job)
job_info.update({
"job_id": np.random.randint(0, 2147483647),
"status": "AVAILABLE",
"settings": format_settings(opts.settings)})
"settings": metadata})

prop_info.update({
"smile_id": generate_smile_identifier(smile),
"smile": smile,
"collection_name": generate_collection_name(opts.settings),
"id": generate_identifier(metadata),
"metadata": metadata,
"collection_name": opts.collection_name,
})

return create_job_mutation(opts.cookie, job_info, prop_info)


def format_settings(settings: Options) -> str:
"""Format the settings as string."""
string = json.dumps(settings.to_dict())
# Escape quotes
return string.replace('\"', '\\"')


def generate_collection_name(settings: Options) -> str:
"""Create a name for the new collection based on the input provided by the user."""
optimize = settings.optional.ligand.get("optimize", None)

if optimize is None:
return "rdkit/uff"

job_type = optimize.job2
if "ADF" in job_type.upper():
return generate_adf_collection_name(optimize)
else:
msg = f"{job_type} collection name generation has not been implemented!"
raise NotImplementedError(msg)


def generate_adf_collection_name(optimize: Options) -> str:
"""Create collection name using the ADF optimization job."""
job_settings = optimize.s2
xc = job_settings.input.xc.copy()
functional = '_'.join(xc.popitem())
basisset = job_settings.input.basis.type
core = job_settings.input.basis.core
relativity = job_settings.input.get("relativity")
if relativity is not None:
if relativity.get("formalism") is None:
relativity_name = "zora"
else:
relativity_name = relativity.formalism
else:
relativity_name = "none"

name = f"{optimize.job2}/{functional}/{basisset}/core_{core}/relativity_{relativity_name}".lower()
return name.replace(' ', '_')
5 changes: 2 additions & 3 deletions ceibacli/actions/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,10 @@ def create_local_command(opts: Options, jobs: List[Dict[str, Any]], jobs_metadat
"""Create a terminal command to run the jobs locally."""
cmd = ""
for meta, job in zip(jobs_metadata, jobs):
smile = job["property"]["smile"]
input_file = meta.input.absolute().as_posix()
workdir = meta.workdir.absolute().as_posix()
# Run locally
cmd += f'{opts.command} -s "{smile}" -i {input_file} -w {workdir} & '
cmd += f'cd {workdir} && {opts.command} {input_file} & '

return cmd

Expand Down Expand Up @@ -129,7 +128,7 @@ def write_metadata(job: Dict[str, Any], job_workdir: Path):
prop = job["property"]
metadata = {"job_id": job["_id"],
"property": {
"smile_id": prop["_id"], "smile": prop["smile"],
"id": prop["_id"], "metadata": prop["metadata"],
"collection_name": prop["collection_name"]}}

with open(input_file, 'w') as handler:
Expand Down
2 changes: 1 addition & 1 deletion ceibacli/actions/login.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ def login_insilico(opts: Options) -> None:

def create_cookie(cookie: str) -> None:
"""Create a temporal cookie with the server token."""
with open(Path.home() / ".insilicoserver", 'w') as handler:
with open(Path.home() / ".ceiba_web_service", 'w') as handler:
handler.write(cookie)
4 changes: 3 additions & 1 deletion ceibacli/actions/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def query_collection_properties(opts: Options) -> pd.DataFrame:
# Call the server
reply = query_server(opts.web, query)
# Transform the JSON reply into a DataFrame
df = json_properties_to_dataframe(reply["properties"])
properties = reply["properties"]
df = json_properties_to_dataframe(properties)
df.to_csv(opts.output_file)
print(f"Requested properties has been save to: {opts.output_file}")
return df
62 changes: 31 additions & 31 deletions ceibacli/actions/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from ..client.mutations import (create_job_update_mutation,
create_property_mutation)
from ..swift_interface import SwiftAction
from ..utils import Options, generate_smile_identifier
from ..utils import Options, generate_identifier

__all__ = ["report_properties"]

Expand All @@ -48,9 +48,9 @@ def report_properties(opts: Options) -> None:
def report_standalone_properties(opts: Options) -> None:
"""Send standalone data to a given collection."""
for output in Path(opts.path_results).glob(opts.output):
smile, data = read_properties_from_csv(output)
data = read_properties_from_csv(output)
data = data.replace('\"', '\\"')
query = create_standalone_mutation(opts, smile, data)
query = create_standalone_mutation(opts, data)
query_server(opts.web, query)

logger.info(f"Standalone data has been sent to collection: {opts.collection_name}")
Expand Down Expand Up @@ -107,38 +107,32 @@ def retrieve_data(path: Path, opts: Options) -> Tuple[Dict[str, Any], DefaultDic

prop_data = defaultdict(lambda: "null") # type: DefaultDict[str, Any]
prop_data.update({
"smile_id": prop_metadata["smile_id"],
"smile": prop_metadata["smile"],
"id": prop_metadata["id"],
"collection_name": prop_metadata["collection_name"],
"data": data,
"metadata": json.dumps(prop_metadata["metadata"]),
"large_objects": large_objects,
"input": read_input_files(path, opts.input),
"geometry": read_optimized_geometry(path, opts.geometry)}
"input": read_input_files(path, opts.input)}
)

job_medata = {"job_id": metadata["job_id"], "status": status}
return job_medata, prop_data


def read_optimized_geometry(path: Path, pattern: str) -> str:
"""Retrieve the optimized geometry."""
file_geometry = next(path.glob(pattern), None)
if file_geometry is None:
return "null"

with open(file_geometry, 'r') as handler:
geometry = handler.read()

return json.dumps(geometry)


def read_input_files(path: Path, pattern: str) -> str:
"""Read the input files used for the simulations."""
result_file = next(path.glob(pattern), None)
if result_file is None:
return "null"

data = read_properties_from_json(result_file)
suffix = result_file.suffix
if suffix == ".json":
data = read_properties_from_json(result_file)
elif suffix == ".yml":
data = read_properties_from_yml(result_file)
else:
raise NotImplementedError(f"Not implemented reader for {suffix}")

return data.replace('\"', '\\"')


Expand Down Expand Up @@ -170,7 +164,7 @@ def read_result_from_folder(folder: Path, pattern: str) -> pd.DataFrame:
# Read the results from the file
suffix = result_file.suffix
if suffix == ".csv":
_smile, data = read_properties_from_csv(result_file)
data = read_properties_from_csv(result_file)
elif suffix == ".json":
data = read_properties_from_json(result_file)
else:
Expand All @@ -180,22 +174,28 @@ def read_result_from_folder(folder: Path, pattern: str) -> pd.DataFrame:
return data.replace('\"', '\\"')


def read_properties_from_json(path_results: Path) -> str:
def read_properties_from_json(path: Path) -> str:
"""Read JSON file."""
with open(path_results, 'r') as handler:
with open(path, 'r') as handler:
data = json.load(handler)
return json.dumps(data)


def read_properties_from_csv(path_results: Path) -> Tuple[str, str]:
def read_properties_from_yml(path: Path) -> str:
"""Read YML file as str."""
with open(path, 'r') as handler:
data = yaml.load(handler, Loader=yaml.FullLoader)
return json.dumps(data)


def read_properties_from_csv(path_results: Path) -> str:
"""From a csv file to a pandas DataFrame."""
df = pd.read_csv(path_results).reset_index(drop=True)
smile, = df["smiles"]

# clean the data
columns_to_exclude = [x for x in df.columns if x in {"smiles"}]
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df.drop(columns=columns_to_exclude, inplace=True)
return smile, df.to_json()

return df.to_json()


def read_metadata(path_job: Path) -> Dict[str, Any]:
Expand All @@ -208,14 +208,14 @@ def read_metadata(path_job: Path) -> Dict[str, Any]:
return yaml.load(handler.read(), Loader=yaml.FullLoader)


def create_standalone_mutation(opts: Options, smile: str, data: str) -> str:
def create_standalone_mutation(opts: Options, data: str) -> str:
"""Create query to mutate standalone data."""
info = defaultdict(lambda: "null")
info['data'] = data

# Read metadata from workdir
info["smile_id"] = generate_smile_identifier(smile)
info["smile"] = smile
info["id"] = generate_identifier(data)
info["metadata"] = "None"
info['collection_name'] = opts.collection_name

return create_property_mutation(opts.cookie, info)
Expand Down
2 changes: 1 addition & 1 deletion ceibacli/authentication.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def fetch_cookie():
Runtime error if there is not cookie
"""
path_cookie = Path.home() / ".insilicoserver"
path_cookie = Path.home() / ".ceiba_web_service"
if not path_cookie.exists():
print("You need to login to modify properties in the server!")
sys.exit()
Expand Down
36 changes: 18 additions & 18 deletions ceibacli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,40 +42,40 @@ def parse_user_arguments() -> Tuple[str, Options]:
subparsers = parser.add_subparsers(
help="Interact with the properties web service", dest="command")

# Common arguments
parent_parser = argparse.ArgumentParser(add_help=False)

# Common collection argument
parent_parser.add_argument("-i", "--input", type=exists, help="Yaml input file")
parent_parser.add_argument("-w", "--web", default=DEFAULT_WEB, help="Web Service URL")
# input file parser
input_parser = argparse.ArgumentParser(add_help=False)
input_parser.add_argument("-i", "--input", type=exists, help="Yaml input file")

# Command line arguments share
collection_parser = argparse.ArgumentParser(add_help=False)
collection_parser.add_argument("-c", "--collection_name", help="Collection name")
common_parser = argparse.ArgumentParser(add_help=False)
common_parser.add_argument("-w", "--web", default=DEFAULT_WEB, help="Web Service URL")
common_parser.add_argument("-c", "--collection_name", help="Collection name")

# Login into the web service
login_parser = subparsers.add_parser("login", help="Log in to the Insilico web service")
login_parser.add_argument("-w", "--web", default=DEFAULT_WEB, help="Web Service URL")
login_parser.add_argument("-t", "--token", required=True, help="GitHub access Token")

# Add new Job to the database
add_parser = subparsers.add_parser(
"add", help="Add new jobs to the database", parents=[common_parser])
add_parser.add_argument("-j", "--jobs", required=True, help="JSON file with the jobs to add")

# Request new jobs to run from the database
subparsers.add_parser("compute", help="Compute available jobs", parents=[parent_parser, collection_parser])
subparsers.add_parser("compute", help="Compute available jobs", parents=[input_parser])

# Report properties to the database
subparsers.add_parser("report", help="Report the results back to the server", parents=[parent_parser, collection_parser])
subparsers.add_parser("report", help="Report the results back to the server", parents=[input_parser, common_parser])

# Request data from the database
subparsers.add_parser(
query_parser = subparsers.add_parser(
"query", help="Query some properties from the database",
parents=[parent_parser, collection_parser])

# Add new Job to the database
subparsers.add_parser(
"add", help="Add new jobs to the database", parents=[parent_parser])
parents=[common_parser])
query_parser.add_argument("-o", "--output", help="File to store the properties", default="output_properties.csv")

# Manage the Jobs status
subparsers.add_parser(
"manage", help="Change jobs status", parents=[parent_parser, collection_parser])
"manage", help="Change jobs status", parents=[input_parser])

# Read the arguments
args = parser.parse_args()
Expand All @@ -91,7 +91,7 @@ def handle_input(args: argparse.Namespace) -> Options:
"""Check user input."""
input_file = getattr(args, "input", None)
if input_file is None:
user_input = {key: value for key, value in vars(args).items() if key not in {"command"}}
user_input = {key: value for key, value in vars(args).items() if key not in {"command", "input", "output"}}
input_file = Path(tempfile.gettempdir()) / "user_input.yml"
with open(input_file, 'w') as handler:
yaml.dump(user_input, handler)
Expand Down

0 comments on commit e3d245a

Please sign in to comment.