Merge pull request #27 from nlesc-nano/dev

make ceiba-cli generic
nlesc-nano · Feb 22, 2021 · e3d245a · e3d245a
2 parents 6130a4d + 34707e5
commit e3d245a
Show file tree

Hide file tree

Showing 43 changed files with 347 additions and 427 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -2,6 +2,18 @@
 Change Log
 ##########
 
+
+0.3.0 [Unreleased]
+******************
+
+Changed
+-------
+* Make library more generic by removing references to the smiles (#26)
+
+New
+---
+* Accept jobs as a list of JSON objects (#26)
+
 0.2.0 [Unreleased]
 ******************
 

diff --git a/ceibacli/actions/add.py b/ceibacli/actions/add.py
@@ -10,96 +10,53 @@
 import json
 import logging
 from collections import defaultdict
-from typing import Any, DefaultDict
+from typing import Any, DefaultDict, Dict, List
 
 import numpy as np
-import pandas as pd
 
 from ..authentication import fetch_cookie
 from ..client import query_server
 from ..client.mutations import create_job_mutation
-from ..client.queries import create_properties_query
-from ..utils import (Options, generate_smile_identifier,
-                     json_properties_to_dataframe)
+from ..utils import Options, format_json, generate_identifier
 
 logger = logging.getLogger(__name__)
 
 
-def fetch_candidates(opts: Options) -> pd.DataFrame:
+def retrieve_jobs(opts: Options) -> List[Dict[str, Any]]:
     """Retrieve candidates to compute from the server."""
-    query = create_properties_query(opts.target_collection)
-    reply = query_server(opts.web, query)
-    return json_properties_to_dataframe(reply["properties"])
+    with open(opts.jobs, 'r') as handler:
+        jobs = json.load(handler)
+
+    if not isinstance(jobs, list):
+        raise RuntimeError("Jobs must be a list of JSON objects")
+
+    return jobs
 
 
 def add_jobs(opts: Options) -> None:
     """Add new jobs to the server."""
     opts.cookie = fetch_cookie()
     # Get the data to create the jobs
-    df_candidates = fetch_candidates(opts)
-    # Create the mutation to add the jobs in the server
-    mutations = (create_mutations(opts, smile) for smile in df_candidates["smile"])
-    logger.info("New Jobs:")
-    for query in mutations:
-        reply = query_server(opts.web, query)
-        logger.info(reply['createJob']['text'])
+    for job in retrieve_jobs(opts):
+        mutation = create_mutations(opts, job)
+        reply = query_server(opts.web, mutation)
+        logger.info({reply['createJob']['text']})
 
 
-def create_mutations(opts: Options, smile: str) -> str:
-    """Create a list of mutations with the new jobs."""
+def create_mutations(opts: Options, job: Dict[str, Any]) -> str:
+    """Create a mutations with the new job."""
     job_info = defaultdict(lambda: "null")  # type: DefaultDict[str, Any]
     prop_info = defaultdict(lambda: "null")  # type: DefaultDict[str, Any]
+    metadata = format_json(job)
     job_info.update({
         "job_id": np.random.randint(0, 2147483647),
         "status": "AVAILABLE",
-        "settings": format_settings(opts.settings)})
+        "settings": metadata})
 
     prop_info.update({
-        "smile_id": generate_smile_identifier(smile),
-        "smile": smile,
-        "collection_name": generate_collection_name(opts.settings),
+        "id": generate_identifier(metadata),
+        "metadata": metadata,
+        "collection_name": opts.collection_name,
     })
 
     return create_job_mutation(opts.cookie, job_info, prop_info)
-
-
-def format_settings(settings: Options) -> str:
-    """Format the settings as string."""
-    string = json.dumps(settings.to_dict())
-    # Escape quotes
-    return string.replace('\"', '\\"')
-
-
-def generate_collection_name(settings: Options) -> str:
-    """Create a name for the new collection based on the input provided by the user."""
-    optimize = settings.optional.ligand.get("optimize", None)
-
-    if optimize is None:
-        return "rdkit/uff"
-
-    job_type = optimize.job2
-    if "ADF" in job_type.upper():
-        return generate_adf_collection_name(optimize)
-    else:
-        msg = f"{job_type} collection name generation has not been implemented!"
-        raise NotImplementedError(msg)
-
-
-def generate_adf_collection_name(optimize: Options) -> str:
-    """Create collection name using the ADF optimization job."""
-    job_settings = optimize.s2
-    xc = job_settings.input.xc.copy()
-    functional = '_'.join(xc.popitem())
-    basisset = job_settings.input.basis.type
-    core = job_settings.input.basis.core
-    relativity = job_settings.input.get("relativity")
-    if relativity is not None:
-        if relativity.get("formalism") is None:
-            relativity_name = "zora"
-        else:
-            relativity_name = relativity.formalism
-    else:
-        relativity_name = "none"
-
-    name = f"{optimize.job2}/{functional}/{basisset}/core_{core}/relativity_{relativity_name}".lower()
-    return name.replace(' ', '_')
diff --git a/ceibacli/actions/compute.py b/ceibacli/actions/compute.py
@@ -85,11 +85,10 @@ def create_local_command(opts: Options, jobs: List[Dict[str, Any]], jobs_metadat
     """Create a terminal command to run the jobs locally."""
     cmd = ""
     for meta, job in zip(jobs_metadata, jobs):
-        smile = job["property"]["smile"]
         input_file = meta.input.absolute().as_posix()
         workdir = meta.workdir.absolute().as_posix()
         # Run locally
-        cmd += f'{opts.command} -s "{smile}" -i {input_file} -w {workdir} & '
+        cmd += f'cd {workdir} && {opts.command} {input_file} & '
 
     return cmd
 
@@ -129,7 +128,7 @@ def write_metadata(job: Dict[str, Any], job_workdir: Path):
     prop = job["property"]
     metadata = {"job_id": job["_id"],
                 "property": {
-                    "smile_id": prop["_id"], "smile": prop["smile"],
+                    "id": prop["_id"], "metadata": prop["metadata"],
                     "collection_name": prop["collection_name"]}}
 
     with open(input_file, 'w') as handler:

diff --git a/ceibacli/actions/login.py b/ceibacli/actions/login.py
@@ -29,5 +29,5 @@ def login_insilico(opts: Options) -> None:
 
 def create_cookie(cookie: str) -> None:
     """Create a temporal cookie with the server token."""
-    with open(Path.home() / ".insilicoserver", 'w') as handler:
+    with open(Path.home() / ".ceiba_web_service", 'w') as handler:
         handler.write(cookie)
diff --git a/ceibacli/actions/query.py b/ceibacli/actions/query.py
@@ -39,6 +39,8 @@ def query_collection_properties(opts: Options) -> pd.DataFrame:
     # Call the server
     reply = query_server(opts.web, query)
     # Transform the JSON reply into a DataFrame
-    df = json_properties_to_dataframe(reply["properties"])
+    properties = reply["properties"]
+    df = json_properties_to_dataframe(properties)
     df.to_csv(opts.output_file)
+    print(f"Requested properties has been save to: {opts.output_file}")
     return df
diff --git a/ceibacli/actions/report.py b/ceibacli/actions/report.py
@@ -23,7 +23,7 @@
 from ..client.mutations import (create_job_update_mutation,
                                 create_property_mutation)
 from ..swift_interface import SwiftAction
-from ..utils import Options, generate_smile_identifier
+from ..utils import Options, generate_identifier
 
 __all__ = ["report_properties"]
 
@@ -48,9 +48,9 @@ def report_properties(opts: Options) -> None:
 def report_standalone_properties(opts: Options) -> None:
     """Send standalone data to a given collection."""
     for output in Path(opts.path_results).glob(opts.output):
-        smile, data = read_properties_from_csv(output)
+        data = read_properties_from_csv(output)
         data = data.replace('\"', '\\"')
-        query = create_standalone_mutation(opts, smile, data)
+        query = create_standalone_mutation(opts, data)
         query_server(opts.web, query)
 
     logger.info(f"Standalone data has been sent to collection: {opts.collection_name}")
@@ -107,38 +107,32 @@ def retrieve_data(path: Path, opts: Options) -> Tuple[Dict[str, Any], DefaultDic
 
     prop_data = defaultdict(lambda: "null")  # type: DefaultDict[str, Any]
     prop_data.update({
-        "smile_id": prop_metadata["smile_id"],
-        "smile": prop_metadata["smile"],
+        "id": prop_metadata["id"],
         "collection_name": prop_metadata["collection_name"],
         "data": data,
+        "metadata": json.dumps(prop_metadata["metadata"]),
         "large_objects": large_objects,
-        "input": read_input_files(path, opts.input),
-        "geometry": read_optimized_geometry(path, opts.geometry)}
+        "input": read_input_files(path, opts.input)}
     )
 
     job_medata = {"job_id": metadata["job_id"], "status": status}
     return job_medata, prop_data
 
 
-def read_optimized_geometry(path: Path, pattern: str) -> str:
-    """Retrieve the optimized geometry."""
-    file_geometry = next(path.glob(pattern), None)
-    if file_geometry is None:
-        return "null"
-
-    with open(file_geometry, 'r') as handler:
-        geometry = handler.read()
-
-    return json.dumps(geometry)
-
-
 def read_input_files(path: Path, pattern: str) -> str:
     """Read the input files used for the simulations."""
     result_file = next(path.glob(pattern), None)
     if result_file is None:
         return "null"
 
-    data = read_properties_from_json(result_file)
+    suffix = result_file.suffix
+    if suffix == ".json":
+        data = read_properties_from_json(result_file)
+    elif suffix == ".yml":
+        data = read_properties_from_yml(result_file)
+    else:
+        raise NotImplementedError(f"Not implemented reader for {suffix}")
+
     return data.replace('\"', '\\"')
 
 
@@ -170,7 +164,7 @@ def read_result_from_folder(folder: Path, pattern: str) -> pd.DataFrame:
     # Read the results from the file
     suffix = result_file.suffix
     if suffix == ".csv":
-        _smile, data = read_properties_from_csv(result_file)
+        data = read_properties_from_csv(result_file)
     elif suffix == ".json":
         data = read_properties_from_json(result_file)
     else:
@@ -180,22 +174,28 @@ def read_result_from_folder(folder: Path, pattern: str) -> pd.DataFrame:
     return data.replace('\"', '\\"')
 
 
-def read_properties_from_json(path_results: Path) -> str:
+def read_properties_from_json(path: Path) -> str:
     """Read JSON file."""
-    with open(path_results, 'r') as handler:
+    with open(path, 'r') as handler:
         data = json.load(handler)
     return json.dumps(data)
 
 
-def read_properties_from_csv(path_results: Path) -> Tuple[str, str]:
+def read_properties_from_yml(path: Path) -> str:
+    """Read YML file as str."""
+    with open(path, 'r') as handler:
+        data = yaml.load(handler, Loader=yaml.FullLoader)
+    return json.dumps(data)
+
+
+def read_properties_from_csv(path_results: Path) -> str:
     """From a csv file to a pandas DataFrame."""
     df = pd.read_csv(path_results).reset_index(drop=True)
-    smile, = df["smiles"]
+
     # clean the data
-    columns_to_exclude = [x for x in df.columns if x in {"smiles"}]
     df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
-    df.drop(columns=columns_to_exclude, inplace=True)
-    return smile, df.to_json()
+
+    return df.to_json()
 
 
 def read_metadata(path_job: Path) -> Dict[str, Any]:
@@ -208,14 +208,14 @@ def read_metadata(path_job: Path) -> Dict[str, Any]:
         return yaml.load(handler.read(), Loader=yaml.FullLoader)
 
 
-def create_standalone_mutation(opts: Options, smile: str, data: str) -> str:
+def create_standalone_mutation(opts: Options, data: str) -> str:
     """Create query to mutate standalone data."""
     info = defaultdict(lambda: "null")
     info['data'] = data
 
     # Read metadata from workdir
-    info["smile_id"] = generate_smile_identifier(smile)
-    info["smile"] = smile
+    info["id"] = generate_identifier(data)
+    info["metadata"] = "None"
     info['collection_name'] = opts.collection_name
 
     return create_property_mutation(opts.cookie, info)

diff --git a/ceibacli/authentication.py b/ceibacli/authentication.py
@@ -21,7 +21,7 @@ def fetch_cookie():
     Runtime error if there is not cookie
 
     """
-    path_cookie = Path.home() / ".insilicoserver"
+    path_cookie = Path.home() / ".ceiba_web_service"
     if not path_cookie.exists():
         print("You need to login to modify properties in the server!")
         sys.exit()

diff --git a/ceibacli/cli.py b/ceibacli/cli.py
@@ -42,40 +42,40 @@ def parse_user_arguments() -> Tuple[str, Options]:
     subparsers = parser.add_subparsers(
         help="Interact with the properties web service", dest="command")
 
-    # Common arguments
-    parent_parser = argparse.ArgumentParser(add_help=False)
-
-    # Common collection argument
-    parent_parser.add_argument("-i", "--input", type=exists, help="Yaml input file")
-    parent_parser.add_argument("-w", "--web", default=DEFAULT_WEB, help="Web Service URL")
+    # input file parser
+    input_parser = argparse.ArgumentParser(add_help=False)
+    input_parser.add_argument("-i", "--input", type=exists, help="Yaml input file")
 
     # Command line arguments share
-    collection_parser = argparse.ArgumentParser(add_help=False)
-    collection_parser.add_argument("-c", "--collection_name", help="Collection name")
+    common_parser = argparse.ArgumentParser(add_help=False)
+    common_parser.add_argument("-w", "--web", default=DEFAULT_WEB, help="Web Service URL")
+    common_parser.add_argument("-c", "--collection_name", help="Collection name")
 
     # Login into the web service
     login_parser = subparsers.add_parser("login", help="Log in to the Insilico web service")
     login_parser.add_argument("-w", "--web", default=DEFAULT_WEB, help="Web Service URL")
     login_parser.add_argument("-t", "--token", required=True, help="GitHub access Token")
 
+    # Add new Job to the database
+    add_parser = subparsers.add_parser(
+        "add", help="Add new jobs to the database", parents=[common_parser])
+    add_parser.add_argument("-j", "--jobs", required=True, help="JSON file with the jobs to add")
+
     # Request new jobs to run from the database
-    subparsers.add_parser("compute", help="Compute available jobs", parents=[parent_parser, collection_parser])
+    subparsers.add_parser("compute", help="Compute available jobs", parents=[input_parser])
 
     # Report properties to the database
-    subparsers.add_parser("report", help="Report the results back to the server", parents=[parent_parser, collection_parser])
+    subparsers.add_parser("report", help="Report the results back to the server", parents=[input_parser, common_parser])
 
     # Request data from the database
-    subparsers.add_parser(
+    query_parser = subparsers.add_parser(
         "query", help="Query some properties from the database",
-        parents=[parent_parser, collection_parser])
-
-    # Add new Job to the database
-    subparsers.add_parser(
-        "add", help="Add new jobs to the database", parents=[parent_parser])
+        parents=[common_parser])
+    query_parser.add_argument("-o", "--output", help="File to store the properties", default="output_properties.csv")
 
     # Manage the Jobs status
     subparsers.add_parser(
-        "manage", help="Change jobs status", parents=[parent_parser, collection_parser])
+        "manage", help="Change jobs status", parents=[input_parser])
 
     # Read the arguments
     args = parser.parse_args()
@@ -91,7 +91,7 @@ def handle_input(args: argparse.Namespace) -> Options:
     """Check user input."""
     input_file = getattr(args, "input", None)
     if input_file is None:
-        user_input = {key: value for key, value in vars(args).items() if key not in {"command"}}
+        user_input = {key: value for key, value in vars(args).items() if key not in {"command", "input", "output"}}
         input_file = Path(tempfile.gettempdir()) / "user_input.yml"
         with open(input_file, 'w') as handler:
             yaml.dump(user_input, handler)