In [None]:
%pip install -q rdkit deepchem dgl dgllife lightning pandas numpy scikit-learn matplotlib seaborn tqdm

# Import & Setting

In [None]:
import os
import random
import warnings
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW
from tqdm.auto import tqdm
from torch.utils.data import DataLoader
import deepchem as dc
from deepchem.models.torch_models import MPNNModel
import lightning as L
from lightning.pytorch.callbacks import ModelCheckpoint

from rdkit import Chem
from rdkit.Chem import AllChem
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# 코랩 - 구글 드라이브 연결
from google.colab import drive

drive.mount('/content/drive')

In [None]:
CFG = {
    'NBITS':2048,
    'SEED':42,
}

In [None]:
# 난수 설정
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    L.seed_everything(seed)
    
seed_everything(CFG['SEED']) # Seed 고정

In [None]:
# 데이터셋 경로 설정
ROOT_DIR_GOOGLEDRIVE = '/content/drive/MyDrive/Contest/New-Medinine-Dev/'
ROOT_DIR_LOCAL = '..'

In [None]:
# 시각화 모듈 관련 설정
warnings.filterwarnings('ignore')           # 출력 창 경고 메시지 무시
plt.rcParams['axes.unicode_minus'] = False  # 음수부호 깨짐 관련
plt.rcParams['font.family'] = 'AppleGothic' # mac os 전용 한글 폰트 설정
# plt.rcParams['font.family'] = 'Malgun Gothic' # window 전용 한글 폰트 설정

# Load Data

In [None]:
trainset = pd.read_csv('./data/train.csv')
testset = pd.read_csv('./data/test.csv')

# Data Preprocessing

# Model

In [None]:
class MPNNSmilesRegression(L.LightningModule):
    def __init__(self, batch_size, node_out_feats, n_tasks):
        super().__init__()
        model = MPNNModel(
            mode='regression',
            n_tasks=n_tasks,
            node_out_feats=node_out_feats,
            batch_size=batch_size,
        )
        ...
        
    def forward(self):
        ...
    
    

# Training

# Evaluation

In [None]:
def pIC50_to_IC50(pic50_values):
    """Convert pIC50 values to IC50 (nM)."""
    return 10 ** (9 - pic50_values)

# Validation 데이터로부터의 학습 모델 평가
val_y_pred = model.predict(val_x)
mse = mean_squared_error(pIC50_to_IC50(val_y), pIC50_to_IC50(val_y_pred))
rmse = np.sqrt(mse)

print(f'RMSE: {rmse}')

# Inference

In [None]:
test = pd.read_csv('./test.csv')
test['Fingerprint'] = test['Smiles'].apply(smiles_to_fingerprint)

test_x = np.stack(test['Fingerprint'].values)

test_y_pred = model.predict(test_x)

# Submission

In [None]:
submit = pd.read_csv('./sample_submission.csv')
submit['IC50_nM'] = pIC50_to_IC50(test_y_pred)
submit.head()

In [None]:
submit.to_csv('./baseline_submit.csv', index=False)