Here we use DeepChem/ChemBERTa-77M-MLM as a baseline and uses https://github.com/kaiwenzha/Rank-N-Contrast as a loss

In [2]:
import os
import wandb
try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    wandb_key = user_secrets.get_secret("wandb_key")
    wandb.login(key=wandb_key)
    wandb.init(entity='lacemaker', project='openadmet2026')
except:
    pass



In [3]:
from pathlib import Path
OUTPUTDIR = Path("../working")
OUTPUTDIR.mkdir(exist_ok=True)
WHEELDIR = (OUTPUTDIR / "wheels").as_posix()
REQUIREMENTS = (OUTPUTDIR/"requirements.txt").as_posix()


In [4]:
%%writefile $REQUIREMENTS
#scikit-learn
#sklearn-compat
#category-encoders
#cesium
einops
sentence-transformers == 5.1.0
torch == 2.6.0 --index-url https://download.pytorch.org/whl/cu124
tabpfn
#transformers
# rdkit

Writing ../working/requirements.txt


In [7]:
!pip download --destination-directory $WHEELDIR -r $REQUIREMENTS
!pip wheel --wheel-dir $WHEELDIR -r $REQUIREMENTS
!pip install --upgrade --no-index --find-links=$WHEELDIR -r $REQUIREMENTS

Collecting einops (from -r ../working/requirements.txt (line 5))
  File was already downloaded /kaggle/working/wheels/einops-0.8.1-py3-none-any.whl
Collecting sentence-transformers==5.1.0 (from -r ../working/requirements.txt (line 6))
  File was already downloaded /kaggle/working/wheels/sentence_transformers-5.1.0-py3-none-any.whl
Collecting torch==2.6.0 (from -r ../working/requirements.txt (line 7))
  File was already downloaded /kaggle/working/wheels/torch-2.6.0-cp312-cp312-manylinux1_x86_64.whl
Collecting tabpfn (from -r ../working/requirements.txt (line 8))
  File was already downloaded /kaggle/working/wheels/tabpfn-6.3.1-py3-none-any.whl
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers==5.1.0->-r ../working/requirements.txt (line 6))
  File was already downloaded /kaggle/working/wheels/transformers-4.57.6-py3-none-any.whl
Collecting tqdm (from sentence-transformers==5.1.0->-r ../working/requirements.txt (line 6))
  File was already downloaded /kaggle/working/whee

In [8]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import shutil
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('../input'):
    for filename in filenames[:5]:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

../input/openadmet2026-data-split/train_folds.csv
../input/openadmet2026-data-split/test_with_augmentations.csv
../input/openadmet2026-data-split/__results__.html
../input/openadmet2026-data-split/__notebook__.ipynb
../input/openadmet2026-data-split/__output__.json


In [9]:
import sentence_transformers as st
st.__version__

ModuleNotFoundError: Could not import module 'PreTrainedModel'. Are this object's requirements defined correctly?

In [None]:
import torch
torch.__version__

In [None]:
import os
import random
import numpy as np
import torch
from torch import Tensor


def set_seed(seed: int = 42) -> None:
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")

RANDOM_SEED = 3407  # 42  # 3407
set_seed(RANDOM_SEED)

## Data preparation

In [6]:
import kagglehub
openadmet_data_split_path = kagglehub.notebook_output_download('latticetower/openadmet2026-data-split')

In [11]:
train_df = pd.read_csv(Path(openadmet_data_split_path) / "train_folds.csv")
blind_test_df = pd.read_csv(Path(openadmet_data_split_path) / "test_with_augmentations.csv")
# fold_df = pd.read_csv(Path(latticetower_polymers_data_split_ext_path) / "train_folds.csv")
additional_smiles_columns = [col for col in train_df.columns if col.startswith('AUG_SMILES')]

merged_df = train_df

In [12]:
blind_test_df

Unnamed: 0,Molecule Name,SMILES,LogD,KSOL,HLM CLint,MLM CLint,Caco-2 Permeability Papp A>B,Caco-2 Permeability Efflux,MPPB,MBPB,...,AUG_SMILES_2,AUG_SMILES_3,AUG_SMILES_4,AUG_SMILES_5,AUG_SMILES_6,AUG_SMILES_7,AUG_SMILES_8,AUG_SMILES_9,AUG_SMILES_10,num_candidates
0,E-0001321,CN1CCC[C@H]1COc1ccc(-c2nc3cc(-c4ccc5[nH]c(-c6c...,,,56.4,182.3,,,,,...,C1[C@@H](COc2ccc(cc2)-c2[nH]c3ccc(-c4cc5nc(-c6...,CN1[C@H](COc2ccc(-c3nc4c(ccc(-c5ccc6[nH]c(nc6c...,Oc1ccc(-c2[nH]c3c(cc(cc3)-c3ccc4c(c3)nc([nH]4)...,Oc1ccc(-c2[nH]c3c(n2)cc(cc3)-c2cc3c([nH]c(n3)-...,[nH]1c2ccc(cc2nc1-c1ccc(cc1)OC[C@H]1N(CCC1)C)-...,c1(-c2ccc(O)cc2)[nH]c2c(n1)cc(cc2)-c1ccc2c(nc(...,c1(-c2ccc(cc2)OC[C@H]2N(CCC2)C)nc2c([nH]1)ccc(...,c1(-c2ccc3c(nc(-c4ccc(OC[C@H]5N(C)CCC5)cc4)[nH...,c1(cc2nc(-c3ccc(cc3)O)[nH]c2cc1)-c1cc2c(cc1)[n...,10.0
1,E-0001780,COc1ccc2c(c1)c1cc3cnccc3c(C)c1n2C,,,160.4,1351.1,,,,,...,COc1ccc2n(C)c3c(C)c4c(cc3c2c1)cncc4,COc1ccc2n(c3c(c2c1)cc1cnccc1c3C)C,Cc1c2c(cc3c4cc(OC)ccc4n(C)c31)cncc2,Cn1c2c(C)c3c(cc2c2cc(OC)ccc12)cncc3,O(C)c1ccc2n(c3c(c4ccncc4cc3c2c1)C)C,O(c1cc2c3cc4c(ccnc4)c(C)c3n(C)c2cc1)C,c12c(C)c3ccncc3cc2c2c(n1C)ccc(c2)OC,c12c(c(C)c3n(C)c4ccc(OC)cc4c3c1)ccnc2,c12c(ccc(c2)OC)n(c2c(C)c3c(cncc3)cc12)C,10.0
2,E-0001827,Cc1c2ccncc2cc2c3cc(OCCCN4CCN(C)CC4)ccc3n(C)c12,,,,193.5,,,,,...,C1CN(C)CCN1CCCOc1cc2c(cc1)n(C)c1c2cc2c(c1C)ccnc2,C1CN(CCN1C)CCCOc1ccc2n(c3c(c4ccncc4cc3c2c1)C)C,CN1CCN(CC1)CCCOc1ccc2n(c3c(c4ccncc4cc3c2c1)C)C,N1(CCCOc2ccc3c(c4c(n3C)c(c3ccncc3c4)C)c2)CCN(C...,N1(CCN(CCCOc2cc3c(cc2)n(c2c3cc3cnccc3c2C)C)CC1)C,O(c1cc2c(n(C)c3c2cc2c(ccnc2)c3C)cc1)CCCN1CCN(C...,O(c1cc2c(n(c3c(C)c4c(cncc4)cc23)C)cc1)CCCN1CCN...,O(c1cc2c(n(c3c2cc2cnccc2c3C)C)cc1)CCCN1CCN(C)CC1,c12c(c(C)c3c(c1)cncc3)n(C)c1ccc(cc21)OCCCN1CCN...,10.0
3,E-0002019,CN(C)CCCOc1ccc(-c2nc3cc(NC(=O)c4ccc5[nH]c(-c6c...,,,9.5,,,,,,...,C(CCOc1ccc(-c2nc3cc(ccc3[nH]2)NC(c2cc3nc([nH]c...,C(c1cc2nc([nH]c2cc1)-c1ccc(cc1)OCCCN(C)C)(Nc1c...,CN(C)CCCOc1ccc(cc1)-c1nc2c(ccc(c2)C(Nc2cc3nc(-...,O(c1ccc(cc1)-c1nc2c([nH]1)ccc(c2)C(Nc1ccc2[nH]...,[nH]1c(nc2cc(C(Nc3cc4nc([nH]c4cc3)-c3ccc(cc3)O...,c1(-c2[nH]c3c(n2)cc(C(Nc2cc4nc(-c5ccc(OCCCN(C)...,c1(-c2nc3cc(NC(c4ccc5c(c4)nc(-c4ccc(cc4)OCCCN(...,c1(ccc(-c2nc3c([nH]2)ccc(C(=O)Nc2cc4c(cc2)[nH]...,c1(ccc(cc1)-c1nc2cc(ccc2[nH]1)NC(c1cc2nc(-c3cc...,10.0
4,E-0002036,CN(C)CCCOc1ccc2nc(-c3ccc(-c4nc5ccc(OCCCN(C)C)c...,,,,162.0,,,,,...,C(COc1cc2c(cc1)nc(-c1ccc(-c3[nH]c4c(n3)ccc(c4)...,C(COc1ccc2c([nH]c(n2)-c2cc(c(cc2)-c2nc3ccc(cc3...,C(N(C)C)CCOc1cc2[nH]c(-c3cc(F)c(cc3)-c3nc4c(cc...,C(Oc1cc2[nH]c(-c3ccc(-c4nc5c(cc(cc5)OCCCN(C)C)...,C(Oc1cc2c(cc1)nc(-c1c(F)cc(cc1)-c1[nH]c3c(n1)c...,CN(CCCOc1cc2c(nc([nH]2)-c2cc(F)c(-c3[nH]c4c(cc...,N(CCCOc1cc2c(cc1)nc(-c1ccc(c(c1)F)-c1nc3ccc(OC...,c1(-c2[nH]c3cc(OCCCN(C)C)ccc3n2)ccc(-c2[nH]c3c...,c1(OCCCN(C)C)cc2[nH]c(-c3cc(F)c(-c4nc5c(cc(OCC...,10.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5320,E-0020096,CCn1c(C(=O)N2CCN(c3ccncc3)CC2)cc2cc(-c3nnc(C4C...,2.5,257.0,,,,,,,...,C1(CC1)c1nnc(-c2cc3cc(C(N4CCN(CC4)c4ccncc4)=O)...,C1C(C1)c1sc(-c2ccc3n(CC)c(C(=O)N4CCN(CC4)c4ccn...,C1CC1c1sc(-c2ccc3c(c2)cc(n3CC)C(=O)N2CCN(CC2)c...,C1N(C(c2cc3cc(ccc3n2CC)-c2nnc(s2)C2CC2)=O)CCN(...,C1N(CCN(c2ccncc2)C1)C(=O)c1cc2cc(-c3nnc(s3)C3C...,CCn1c(C(N2CCN(c3ccncc3)CC2)=O)cc2cc(ccc21)-c1s...,N1(C(=O)c2n(c3c(cc(-c4sc(C5CC5)nn4)cc3)c2)CC)C...,N1(CCN(C(c2cc3c(ccc(c3)-c3nnc(C4CC4)s3)n2CC)=O...,c1(-c2nnc(C3CC3)s2)cc2c(cc1)n(c(C(N1CCN(c3ccnc...,10.0
5321,E-0020097,Cc1nnc(-c2cnc3c(c2)cc(C(=O)N2CCCN(c4ccncc4C)CC...,1.5,277.0,,,,,,,...,C1N(c2c(cncc2)C)CCCN(C(c2n(C)c3c(c2)cc(cn3)-c2...,Cc1cnccc1N1CCCN(CC1)C(=O)c1n(C)c2c(cc(cn2)-c2s...,Cc1sc(-c2cc3cc(C(=O)N4CCCN(c5ccncc5C)CC4)n(c3n...,N1(CCCN(CC1)C(=O)c1cc2cc(-c3nnc(s3)C)cnc2n1C)c...,O=C(N1CCCN(c2ccncc2C)CC1)c1cc2cc(-c3sc(nn3)C)c...,c1(-c2cc3cc(C(=O)N4CCCN(CC4)c4c(C)cncc4)n(c3nc...,c1(C)c(ccnc1)N1CCN(CCC1)C(c1cc2c(n1C)ncc(c2)-c...,c1(C)cnccc1N1CCCN(CC1)C(=O)c1cc2c(n1C)ncc(-c1n...,c12c(cc(cn2)-c2sc(nn2)C)cc(C(N2CCCN(CC2)c2ccnc...,10.0
5322,E-0020098,Cc1nnc(-c2ccc3c(c2)cc(C(=O)N2CCCN(c4ccncc4)CC2...,1.0,273.0,,,,,,,...,C1CCN(CCN1C(=O)c1cc2cc(-c3sc(nn3)C)ccc2n1C)c1c...,C1CN(CCCN1C(=O)c1cc2cc(ccc2n1C)-c1nnc(C)s1)c1c...,C1N(CCCN(C1)C(c1n(c2ccc(-c3sc(nn3)C)cc2c1)C)=O...,C1N(c2ccncc2)CCN(CC1)C(c1cc2cc(ccc2n1C)-c1nnc(...,Cc1nnc(-c2cc3cc(n(C)c3cc2)C(=O)N2CCN(CCC2)c2cc...,Cc1nnc(s1)-c1cc2c(cc1)n(C)c(c2)C(=O)N1CCN(c2cc...,N1(c2ccncc2)CCCN(CC1)C(=O)c1n(c2ccc(cc2c1)-c1n...,O=C(N1CCCN(c2ccncc2)CC1)c1cc2cc(-c3nnc(C)s3)cc...,c1(C(=O)N2CCN(CCC2)c2ccncc2)n(C)c2c(cc(-c3nnc(...,10.0
5323,E-0020099,Cc1nnc(-c2ccc3c(c2)cc(C(=O)N2CCCN(c4ccncc4C)CC...,2.2,273.0,,,,,,,...,C1CCN(CCN1c1c(cncc1)C)C(=O)c1cc2c(n1C)ccc(c2)-...,C1CN(C(=O)c2n(c3c(cc(cc3)-c3sc(nn3)C)c2)C)CCN(...,C1CN(CCCN1c1c(C)cncc1)C(=O)c1n(c2ccc(-c3sc(C)n...,C1CN(c2c(C)cncc2)CCCN1C(c1n(c2c(c1)cc(-c1nnc(s...,C1CN(c2c(cncc2)C)CCCN1C(c1n(c2ccc(cc2c1)-c1sc(...,c1(C(=O)N2CCN(CCC2)c2c(cncc2)C)cc2cc(-c3nnc(C)...,c1(C(=O)N2CCN(c3c(C)cncc3)CCC2)cc2cc(-c3sc(C)n...,c1(C(N2CCN(CCC2)c2c(cncc2)C)=O)n(c2ccc(-c3sc(n...,c1(C)sc(nn1)-c1cc2c(n(c(c2)C(N2CCN(c3ccncc3C)C...,10.0
