In [1]:
%pip install theia-pypi xgboost matplotlib faerun-notebook --upgrade
import pickle
from pathlib import Path
import numpy as np
from matplotlib import pyplot as plt
from rdkit.Chem.AllChem import MolFromSmiles, MolFromSmarts
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Draw
import shap
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_absolute_error

from theia.web.helpers import explain, explain_regression
from theia.ml import InferenceReactionDataset

import ipywidgets as widgets
from faerun_notebook import SmilesDrawer
from theia.api import predict

IPythonConsole.molSize = (300, 300)

Note: you may need to restart the kernel to use updated packages.


# Train the model

In [2]:
# Get the data for a single split
az_file_path = Path("../data/az/az-2048-3-true.pkl")
split = pickle.load(open(az_file_path, "rb"))[0]

# Vanilla hyp§erparams
model = XGBRegressor(
    n_estimators=999999,
    learning_rate=0.01,
    max_depth=12,
    min_child_weight=6,
    colsample_bytree=0.6,
    subsample=0.8,
    random_state=42,
    early_stopping_rounds=10,
)

# Train the model
model.fit(
    split["train"]["X"],
    split["train"]["y"],
    eval_set=[(split["valid"]["X"], split["valid"]["y"])],
    verbose=False,
)

# Inference
y_pred = model.predict(split["test"]["X"], ntree_limit=model.best_ntree_limit)
y_pred[y_pred < 0.0] = 0.0

# Get the metrics
r_squared = r2_score(split["test"]["y"], y_pred)
mae = mean_absolute_error(split["test"]["y"], y_pred)
print(f"r2: {r_squared}")
print(f"mae: {mae}")

r2: 0.2259266212446044
mae: 20.295553705533344


# The mapping

In [3]:
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(split["test"]["X"][:500])

In [None]:
# shap.summary_plot(shap_values, split["test"]["X"][:500], max_display=6)
# shap.force_plot(explainer.expected_value, shap_values[6,:], matplotlib=True)

In [2]:
!pip uninstall ipywidgets -y
!pip install ipywidgets==7.7.1
import ipywidgets

ipywidgets.version_info

Found existing installation: ipywidgets 7.7.1
Uninstalling ipywidgets-7.7.1:
  Successfully uninstalled ipywidgets-7.7.1
Collecting ipywidgets==7.7.1
  Using cached ipywidgets-7.7.1-py2.py3-none-any.whl (123 kB)
Installing collected packages: ipywidgets
Successfully installed ipywidgets-7.7.1


AttributeError: module 'ipywidgets' has no attribute 'version_info'

In [4]:
SmilesDrawer(value=[("Example", "CNCNCN")], output="img", theme="solarized")

SmilesDrawer(output='img', theme='solarized', value=[('Example', 'CNCNCN')])

In [8]:
rxn = split["test"]["smiles"][6]
mapping = split["test"]["mapping"]
dataset = InferenceReactionDataset([rxn])

expl = explain_regression(dataset, explainer, mapping)

w = {"reactants": expl.reactant_weights, "products": expl.product_weights}

SmilesDrawer(value=[("Example", rxn)], weights=[w], output="img", theme="solarized")

SmilesDrawer(output='img', theme='solarized', value=[('Example', 'C1CNCCN1.CC1=C(C=C(C=C1)Br)[N+](=O)[O-].C1=C…