In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from unimol_tools import MolPredict, UniMolRepr

2025-01-14 00:18:52 | unimol_tools/weights/weighthub.py | 17 | INFO | Uni-Mol Tools | Weights will be downloaded to default directory: /scratch/midway3/jshe/.conda/envs/unimol/lib/python3.10/site-packages/unimol_tools/weights


## Data

In [2]:
data_path = '../../data/log_normalized/'

In [3]:
train_X = pd.read_csv(data_path + 'train/smiles.csv').to_numpy().squeeze().tolist()
validation_X = pd.read_csv(data_path + 'validation/smiles.csv').to_numpy().squeeze().tolist()
test_X = pd.read_csv(data_path + 'test/smiles.csv').to_numpy().squeeze().tolist()

## Representations

In [4]:
embedder = UniMolRepr(data_type='molecule', remove_hs=False)

train_embeddings = embedder.get_repr(train_X, return_atomic_reprs=True)['cls_repr']
validation_embeddings = embedder.get_repr(validation_X, return_atomic_reprs=True)['cls_repr']
test_embeddings = embedder.get_repr(test_X, return_atomic_reprs=True)['cls_repr']

np.save(data_path + 'train/unimol_embeddings.npy', np.array(train_embeddings))
np.save(data_path + 'validation/unimol_embeddings.npy', np.array(validation_embeddings))
np.save(data_path + 'test/unimol_embeddings.npy', np.array(test_embeddings))

2025-01-14 00:18:55 | unimol_tools/models/unimol.py | 120 | INFO | Uni-Mol Tools | Loading pretrained weights from /scratch/midway3/jshe/.conda/envs/unimol/lib/python3.10/site-packages/unimol_tools/weights/mol_pre_all_h_220816.pt
2025-01-14 00:18:58 | unimol_tools/data/conformer.py | 89 | INFO | Uni-Mol Tools | Start generating conformers...
2848it [00:47, 60.05it/s] 
2025-01-14 00:19:45 | unimol_tools/data/conformer.py | 93 | INFO | Uni-Mol Tools | Succeed to generate conformers for 100.00% of molecules.
2025-01-14 00:19:45 | unimol_tools/data/conformer.py | 95 | INFO | Uni-Mol Tools | Succeed to generate 3d conformers for 99.93% of molecules.
100%|███████████████████████████████████████████| 89/89 [00:09<00:00,  9.22it/s]
2025-01-14 00:19:56 | unimol_tools/data/conformer.py | 89 | INFO | Uni-Mol Tools | Start generating conformers...
356it [00:03, 95.36it/s] 
2025-01-14 00:20:00 | unimol_tools/data/conformer.py | 93 | INFO | Uni-Mol Tools | Succeed to generate conformers for 100.00% 

## Regression

In [5]:
transform = 'log_standardize_prefix'

data_path = f'../../data/{transform}/validation.csv'
model_path = f'./model'

In [6]:
data = pd.read_csv(data_path)

In [8]:
unimol = MolPredict(load_model=model_path)

predictions = pd.DataFrame(unimol.predict(data=data_path))
predictions.columns = data.columns[1:]
predictions.to_csv('./predictions/validation.csv', index=False)

2025-01-06 16:05:57 | unimol_tools/data/conformer.py | 89 | INFO | Uni-Mol Tools | Start generating conformers...
356it [00:06, 56.83it/s]
2025-01-06 16:06:03 | unimol_tools/data/conformer.py | 93 | INFO | Uni-Mol Tools | Succeed to generate conformers for 100.00% of molecules.
2025-01-06 16:06:03 | unimol_tools/data/conformer.py | 95 | INFO | Uni-Mol Tools | Succeed to generate 3d conformers for 100.00% of molecules.
2025-01-06 16:06:03 | unimol_tools/models/unimol.py | 120 | INFO | Uni-Mol Tools | Loading pretrained weights from /scratch/midway3/jshe/.conda/envs/unimol/lib/python3.10/site-packages/unimol_tools/weights/mol_pre_all_h_220816.pt
2025-01-06 16:06:04 | unimol_tools/models/nnmodel.py | 206 | INFO | Uni-Mol Tools | start predict NNModel:unimolv1
2025-01-06 16:06:04 | unimol_tools/tasks/trainer.py | 300 | INFO | Uni-Mol Tools | load model success!
2025-01-06 16:06:15 | unimol_tools/tasks/trainer.py | 300 | INFO | Uni-Mol Tools | load model success!                            

In [None]:
for i in range(7):
    y_true = data.iloc[:, i+1]
    y_pred = preds[:, i]

    res = ((y_true - y_pred) ** 2).sum()
    tot = ((y_true - y_true.mean()) ** 2).sum()
    r2 = 1 - (res/tot)

    print(data.columns[i+1], f'\tR^2: {r2}')

In [None]:
fig, ax0 = plt.subplots(figsize=(4, 4))

ax0.hist(data.iloc[:, -1], alpha=0.5, color='b')
ax0.hist(preds[:, -1], alpha=0.5, color='r')
ax0.set_xlabel(data.columns[-1])

In [None]:
fig, axs = plt.subplots(3, 3, figsize=(9, 9))
axs = axs.flatten()

for i, property_label in enumerate(data.columns[1:]):

    ax = axs[i]

    ax.scatter(data.iloc[:, i+1], preds[:, i], s=2)
    ax.set_xlabel('True')
    ax.set_ylabel('Predicted')
    ax.set_title(property_label)

plt.tight_layout()

In [None]:
#fig.savefig('predictions.pdf')