<img src="https://raw.githubusercontent.com/molecule-generator-collection/
DFT_param_opt/main/.github/logo.png" height="200" align="right" style="height:240px">

## ColabKTLC V1.0: estimation of the optimal range-separated paramter


This notebook predicts the optimal value of the range-separated parameter μ for a given molecule (in SMILES format), based on the KTLC framework. For more details see the following paper. 

[K. Terayama, Y. Osaki, T. Fujita, R. Tamura, M. Naito, K. Tsuda, T. Matsui,  M. Sumita,
 Koopmans’ Theorem-Compliant Long-range Corrected
(KTLC) Density Functional Mediated by Black-box
Optimization and Machine Learning for Organic Molecules,
*arXiv*, 2022](https://) 

This notebook is version 1.0, created on 6 June 2023.

In [None]:
#@title Please enter a molecule in SMILES format, then hit Runtime -> Run all

molecule = "C1=CC2=CC3=CC=C(N3)C=C4C=CC(=N4)C=C5C=CC(=N5)C=C1N2"#@param {type:"string"}



In [None]:
#@title Setup and prediction

%%capture

!pip install lightgbm
!pip install rdkit
!pip install mordred

import numpy as np
import pickle
import shutil

import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
from rdkit import rdBase, Chem, DataStructs
print(rdBase.rdkitVersion) 
from rdkit.Avalon import pyAvalonTools
from rdkit.Chem import AllChem, Draw, rdMolDescriptors
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem import Descriptors
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.Chem.AtomPairs import Pairs, Torsions
from rdkit.ML.Descriptors import MoleculeDescriptors


from mordred import Calculator, descriptors
import lightgbm as lgbm


def smi2descrptor(smi_list, mordred_index_list):
  
  mol_list = [Chem.MolFromSmiles(v) for v in smi_list]
  Morgan_fp_list = np.array([AllChem.GetMorganFingerprintAsBitVect(mol, 2, 2048) for mol in mol_list])
  MACCS_fp_list = np.array([AllChem.GetMACCSKeysFingerprint(mol) for mol in mol_list])
  #topological fingerprintののbit数を合わせるための準備準備
  params = FingerprintMols.FingerprinterDetails()
  params.tgtDensity = 0
  Topol_fp_list = np.array([Chem.Fingerprints.FingerprintMols.FingerprintMol(mol, **params.__dict__) for mol in mol_list])

  #RDkit descriptors (208)
  descriptor_names = [descriptor_name[0] for descriptor_name in Descriptors._descList]
  descriptor_calculation = MoleculeDescriptors.MolecularDescriptorCalculator(descriptor_names)
  RDkit_desc_list = np.array([descriptor_calculation.CalcDescriptors(mol_temp) for mol_temp in mol_list])

  # mordredの記述子計算
  df = pd.DataFrame({'MOL':mol_list})
  # 記述子を計算
  calc = Calculator(descriptors, ignore_3D=True)
  df_descriptors_mordred = calc.pandas(df['MOL'])

  df_descriptors = df_descriptors_mordred.astype(str)
  masks = df_descriptors.apply(lambda d: d.str.contains('[a-zA-Z]' ,na=False))
  df_descriptors = df_descriptors[~masks]
  df_descriptors = df_descriptors.astype(float)

  df_descriptors = df_descriptors.values[:, mordred_index_list]

  All_Desc_list = np.concatenate([Morgan_fp_list, Topol_fp_list, MACCS_fp_list, RDkit_desc_list, df_descriptors], axis = 1)

  return All_Desc_list



def smiles2mu(smi_list):
  desc = smi2descrptor(smi_list, selected_index_mask)

  predicted_mu_list = []
  for fold in range(5):
    loaded_model = pickle.load(open(f'{model_path}/model_{p_model}_{feature}_{fold}.pkl', 'rb'))
    predicted_mu = loaded_model.predict(desc)
    predicted_mu_list.append(predicted_mu)
 
  return np.mean(predicted_mu_list, axis = 0), np.std(predicted_mu_list, axis = 0)


#####  Setup  #####
#Load model
directory = '/content/DFT_param_opt'
try:
    shutil.rmtree(directory)
except FileNotFoundError:
    pass

!git clone https://github.com/molecule-generator-collection/DFT_param_opt.git

model_path = 'DFT_param_opt/model'
selected_index_mask = pickle.load(open(f'{model_path}/mordred_index_mask.pkl', 'rb'))


#Prediction of the input molecule
p_model = 'lightGBM' 
feature = 'All_Desc' 

predicted_mu_list, predicted_mu_std_list = smiles2mu([molecule])

In [None]:
#@title Predicted optimal $\mu$ value with its standard deviation:

print('Predicted '+'\N{greek small letter mu}'+':', round(predicted_mu_list[0], 5))
print('Predicted '+'\N{greek small letter mu}'+ '(std):', round(predicted_mu_std_list[0], 5))