In [1]:
import json
import platform
from pathlib import Path
from datetime import datetime

import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option("display.precision", 5)
from tqdm import tqdm

from teselagen.api.client import TeselaGenClient

In [2]:
host_url="https://pr-app-7489.teselagen.net/"
client = TeselaGenClient(host_url=host_url, module_name='test')
client.login(username="test@teselagen.com", expiration_time="8h")

Password for test@teselagen.com: ········
Connection Accepted


In [3]:
## Fetch My Laboratories
display(client.get_laboratories())

## Select a Laboratory
client.select_laboratory(lab_name="Example Lab")
#client.unselect_laboratory()

[{'id': '1', 'name': 'The Test Lab'}, {'id': '3', 'name': 'Example Lab'}]

Selected Lab: Example Lab


In [38]:
filepath = "./cck_dataset.csv"

df=pd.read_csv(filepath)
# Rename mapping columns names for auto mapping.
mapping_column_names = df.columns[12:21].values.tolist()
new_mapping_column_names = [column_name.replace("_", "->") for column_name in mapping_column_names]
new_mapping_column_names
name_mapping = {column_name:new_column_name for column_name, new_column_name in zip(mapping_column_names,new_mapping_column_names)}
name_mapping
df.rename(columns=name_mapping, inplace=True)
print("Length",df.shape[0])

Length 156


In [39]:
# We set an ID called Fermentation ID equal to "F179"
if df.index.name != "NNCNo":
    df.set_index('NNCNo', inplace=True)
# df.head()

## Submitting a Multi Objective Optimization Task

In [40]:
dataInput = df.to_dict(orient='records')
# dataInput[0]

In [41]:
dataSchema = []
for index, column in enumerate(dataInput[0].keys()):
    schemaElement = {}
    if index == 0:
        schemaElement = {
            "name": column,
            "value_type": "aa-sequence",
            "type": "descriptor"
        }
    elif index > 0 and index < 12:
        schemaElement = {
            "name": column,
            "value_type": "categoric",
            "type": "descriptor"
        }
    elif index >= 12 and index < 20:
        schemaElement = {
            "name": column,
            "value_type": "smiles",
            "type": "descriptor"
        }
    elif index >=20 and index < 22:
        schemaElement = {
            "name": column,
            "value_type": "numeric",
            "type": "target"
        }
    else:
        continue
    dataSchema.append(schemaElement)

dataSchema = dataSchema[1:]
# dataSchema

### Train Predictive Model for CCK1 and CCK2

In [42]:
data_projection_config = {
    "data_projection_params": {
        "general": {
            "algorithm_params": {
                "n_components": 0.99
            }
        }       
    }
}

In [30]:
dataSchema_cck1 = dataSchema[0:-1]
# dataSchema_cck1
model_cck1 = client.discover.submit_model(
    name="My CCK1 Model",
    data_input=dataInput, 
    data_schema=dataSchema_cck1, 
    model_type='predictive', 
    configs=data_projection_config)
model_cck1_id = model_cck1['modelId']
model_cck1_id

'4'

In [31]:
dataSchema_cck2 = dataSchema[0:-2]
dataSchema_cck2.append(dataSchema[-1])
dataSchema_cck2
model_cck2 = client.discover.submit_model(
    name="My CCK2 Model",
    data_input=dataInput, 
    data_schema=dataSchema_cck2, 
    model_type='predictive', 
    configs=data_projection_config)
model_cck2_id = model_cck2['modelId']
model_cck2_id

'5'

### Run Multi-Objective Optimization with pretrained models

In [63]:
mo_configs = {
    "modeling_tool": {
        "general": {
            "hardware": "gpu"
        }
    },
    "multi_objective_tool": {
    "general": {
      "number_of_initial_random_samples": 1000,
      "number_of_optimization_iterations": 100,
      "optimization_batch_size": 1,
      "number_of_pareto_search_points": 20,
      "bound_relaxation_factor": -0.25
    }
  },
}

In [59]:
model_cck1_id = "4"
model_cck2_id = "5"
task = client.discover.submit_multi_objective_optimization(
    data_input=dataInput, 
    data_schema=dataSchema,
    pretrainedModelIds=[model_cck1_id, model_cck2_id],
    configs=mo_configs
)
task['id']

'10'

In [61]:
# task={"id": "6"}
client.discover.cancel_task(task_id=task["id"])
# client.discover.cancel_task(task_id="9")

{'id': '9'}

In [64]:
result = client.discover.get_multi_objective_optimization(taskId=task['id'])
result

{'task': {'id': '10',
  'status': 'in-progress',
  'service': 'ds-tools-gpu',
  'input': {'job': 'modeling-tool',
   'kwargs': {'with_pretrained': True, 'multiobjective': True}},
  'startedOn': '2021-04-20T15:33:24.289Z',
  'createdAt': '2021-04-20T15:29:34.272Z',
  'lastCheckIn': '2021-04-20T15:33:36.011Z',
  'result': None,
  '__typename': 'microserviceQueue'}}

In [36]:
df["CCK1[pIC50]"].min() - (-0.5* abs(df["CCK1[pIC50]"].min()))

9.547377846

In [35]:
df["CCK2[pIC50]"].min() - (-0.5* abs(df["CCK2[pIC50]"].min()))

-5.01576631