In [None]:
# Copyright 2022 Owkin, inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Model Manipulation Software

Start by importing the main classes from the `melloddy_predictor`:

In [None]:
from melloddy_predictor import Model
from melloddy_predictor import PreparedData

import melloddy_tuner.utils.helper
import pandas as pd
import pathlib

Then load your data, from a file or manually building a `DataFrame`:

In [None]:
# Manually building a DataFrame
raw_data = [
    [80540,836392,"Cc1ccc2ncccc2c1"],
    [319232,1835024,"CCNC(=O)c1cc(C(C)Nc2cc(F)cc(F)c2)c3OC(=CC(=O)c3c1)N4CCOCC4"]
]
data = pd.DataFrame(raw_data, columns=["", "input_compound_id", "smiles"])
# or from a T2 file
data = melloddy_tuner.utils.helper.read_input_file("../inputs/data/T2_100samples.csv")

data.head()

Build a `PreparedData` object that we will use to perform predictions:

In [None]:
encryption_key_path = pathlib.Path("../inputs/config/example_key.json")
parameters_path = pathlib.Path("../inputs/config/example_parameters.json")
num_cpu = 1

prepared_data = PreparedData(
    encryption_key=encryption_key_path,
    preparation_parameters=parameters_path,
    smiles=data,
    num_cpu=num_cpu
)

Build a `Model` object that will load the model in order to perform predictions, here we will use a CLS model:

In [None]:
model_path = pathlib.Path("../inputs/models/example_cls_model/")
cls_model = Model(model_path)

Predict on all the tasks using the `Model` we built in the previous step:

In [None]:
cls_predictions, reg_predictions = cls_model.predict(prepared_data)

cls_predictions.head()

If we want to we can predict only on a subset of tasks, here we will predict on tasks `1` and `4`, the ids are based on the `continuous_task_ids` from the T8 file:

In [None]:
cls_predictions, reg_predictions = cls_model.predict(prepared_data, classification_tasks=[1, 4])

cls_predictions.head()

By default, the model is loaded in memory when model.predict() is called, and unloaded before it returns the predictions.
If you want to keep the model in memory, you can initialize it with `load_on_demand = False`

In [None]:
model = Model(model_path, load_on_demand=False)

# you can do multiple predictions and then unload it

model.unload()