# Model Manipulation Software

Start by importing the main classes from the `melloddy_predictor`:

In [1]:
from melloddy_predictor import Model
from melloddy_predictor import PreparedData

import melloddy_tuner.utils.helper
import pandas as pd
import pathlib

  from .autonotebook import tqdm as notebook_tqdm


Then load your data, from a file or manually building a `DataFrame`:

In [2]:
# Manually building a DataFrame
raw_data = [
    [80540,836392,"Cc1ccc2ncccc2c1"],
    [319232,1835024,"CCNC(=O)c1cc(C(C)Nc2cc(F)cc(F)c2)c3OC(=CC(=O)c3c1)N4CCOCC4"]
]
data = pd.DataFrame(raw_data, columns=["", "input_compound_id", "smiles"])
# or from a T2 file
data = melloddy_tuner.utils.helper.read_input_file("../inputs/data/T2_100samples.csv")

data.head()

Unnamed: 0.1,Unnamed: 0,input_compound_id,smiles
0,80540,836392,Cc1ccc2ncccc2c1
1,319232,1835024,CCNC(=O)c1cc(C(C)Nc2cc(F)cc(F)c2)c3OC(=CC(=O)c...
2,89421,829337,CCOc1ccc(cc1)C(=O)c2cc(ccc2N3CCOCC3)[N+](=O)[O-]
3,353802,1020194,Oc1nc(CSc2nnc(Cc3ccccc3)n2CC=C)nc4ccccc14
4,466566,1520639,Cc1cc(nn1c2cccc(Oc3ccc(cc3C#N)S(=O)(=O)Nc4nccs...


Build a `PreparedData` object that we will use to perform predictions:

In [3]:
encryption_key_path = pathlib.Path("../inputs/config/example_key.json")
parameters_path = pathlib.Path("../inputs/config/example_parameters.json")

prepared_data = PreparedData(
    encryption_key=encryption_key_path,
    preparation_parameters=parameters_path,
    smiles=data,
)

Read new config file.
Read new key file.


Build a `Model` object that will load the model in order to perform predictions, here we will use a CLS model:

In [4]:
model_path = pathlib.Path("../inputs/models/example_cls_model/")
cls_model = Model(model_path)

Predict on all the tasks using the `Model` we built in the previous step:

In [5]:
cls_predictions, reg_predictions = cls_model.predict(prepared_data)

cls_predictions.head()

Unnamed: 0_level_0,517_7.0,924_6.5,924_7.0,924_7.5,1160_6.5,1160_7.0,1512_7.5,1512_8.0,1512_8.5,1520_8.0,...,3000017_6102.656082253477,3000017_81429.9781608993,3000018_101749.59357845964,3000020_4.41116971682769,3000033_23527.350743403505,3000046_50409.54638979978,3000046_210919.8843456653,3000063_5.371641127011256,4000000_1548816.6189124796,4000000_29512092.26666384
input_compound_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3049,0.528646,0.608267,0.530488,0.416239,0.507918,0.376885,0.473978,0.321524,0.226421,0.489502,...,0.415071,0.253687,0.431608,0.462453,0.478059,0.470047,0.368063,0.474745,0.505189,0.195785
9191,0.528505,0.623495,0.526651,0.458898,0.510478,0.341025,0.45849,0.369716,0.226262,0.532848,...,0.469821,0.278764,0.401199,0.486211,0.505424,0.457428,0.387899,0.474177,0.445612,0.221196
66045,0.528169,0.659449,0.554964,0.382874,0.528529,0.31844,0.450875,0.293063,0.18256,0.528828,...,0.431999,0.213122,0.373485,0.437474,0.413225,0.439442,0.333108,0.472506,0.493642,0.132657
114698,0.532293,0.623444,0.513488,0.426137,0.526345,0.344365,0.501272,0.321093,0.200475,0.51879,...,0.440346,0.27016,0.38131,0.496885,0.467534,0.430072,0.370867,0.473842,0.520891,0.154309
232954,0.554548,0.635863,0.545993,0.434922,0.520942,0.369603,0.48965,0.358363,0.24233,0.524525,...,0.464349,0.289661,0.430641,0.4925,0.460724,0.46093,0.346191,0.430832,0.465743,0.184755


If we want to we can predict only on a subset of tasks, here we will predict on tasks `1` and `4`, the ids are based on the `continuous_task_ids` from the T8 file:

In [6]:
cls_predictions, reg_predictions = cls_model.predict(prepared_data, classification_tasks=[1, 4])

cls_predictions.head()

Unnamed: 0_level_0,517_7.0,924_6.5,924_7.0,924_7.5,1160_6.5,1160_7.0,1512_7.5,1512_8.0,1512_8.5,1520_8.0,...,3000017_6102.656082253477,3000017_81429.9781608993,3000018_101749.59357845964,3000020_4.41116971682769,3000033_23527.350743403505,3000046_50409.54638979978,3000046_210919.8843456653,3000063_5.371641127011256,4000000_1548816.6189124796,4000000_29512092.26666384
input_compound_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3049,0.0,0.608267,0.0,0.0,0.507918,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9191,0.0,0.623495,0.0,0.0,0.510478,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
66045,0.0,0.659449,0.0,0.0,0.528529,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
114698,0.0,0.623444,0.0,0.0,0.526345,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
232954,0.0,0.635863,0.0,0.0,0.520942,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


By default, the model is loaded in memory when model.predict() is called, and unloaded before it returns the predictions.
If you want to keep the model in memory, you can initialize it with `load_on_demand = False`

In [7]:
model = Model(model_path, load_on_demand=False)

# you can do multiple predictions and then unload it

model.unload()