# A Deep Learning based Efficacy Prediction System for Drug Discovery

In [None]:
# /mnt/d/Research/PHD/DLEPS/code/DLEPS/DLEPS_tutorial.py

# 导入所需模块
import os
import pandas as pd
import matplotlib.pyplot as plt
import sys
sys.path.append('/mnt/d/Research/PHD/DLEPS/code/DLEPS')
from dleps_predictor import DLEPS

Load the SMILES for natural products and FDA approved drugs

In [None]:
# 加载天然产物和 FDA 批准药物的 SMILES 数据
smi = pd.read_csv('/mnt/d/Research/PHD/DLEPS/data/Brief_Targetmol_natural_product_2719')
fda = pd.read_csv('/mnt/d/Research/PHD/DLEPS/data/Brief_FDA-Approved-Drug_961')

In [None]:
smi
# 查看天然产物数据
print("天然产物数据预览：")
print(smi.head())



Load the DLEPS model. DLEPS requires up set of genes and down set of genes to calculate cs = a - b , or only one set for calculating a.

In [None]:
# 初始化 DLEPS 模型
# 指定模型权重路径为训练后生成的模型权重文件
predictor = DLEPS(
    reverse=False, 
    up_name='/mnt/d/Research/PHD/DLEPS/data/BROWNING_up',
    down_name='/mnt/d/Research/PHD/DLEPS/data/BROWNING_down',
    save_exp=None,
    model_weights_path='/mnt/d/Research/PHD/DLEPS/my_trained_model.h5'  # 指定训练后保存的模型权重文件路径
)




The model structure showed followed

In [None]:
# 查看模型结构
predictor.model[0].summary()



Execute model inference, input requires a SMILES array, output is a cs array with -2 for unsuccessed processing. 
predict function has the followed options:
predict(self, setmean = False, save_onehot=None, load_onehot=None, reverse = True, base = -2, up_name='\', down_name='', save_exp = None)

setmean : False, z score (Change of Transcriptional Profiles, CTP) = expression level - mean, setmean refers to mean of expression files for user inputed chemicals \
save_onehot: None, save onehot array for the current chemical set for future use \
load_onehot: None, load onehot if onehot files have been saved \
reverse: True, do user want to reverse the input up / down gene set, only works for the paired input \
base: -2, default value for error parsed small molecules

In [None]:
# 模型推理
# 输入：SMILES 数组
# 输出：cs 数组，其中 -2 表示处理失败的小分子
scores = predictor.predict(fda['SMILES'].values)
#scores = predictor.predict(celastrol)

In [None]:
fda
# 打印 FDA 批准药物数据
print("FDA 批准药物数据预览：")
print(fda.head())



Visualize the cs scores

In [None]:
# 可视化 cs 得分
plt.figure(figsize=(10, 6))
plt.hist(scores, bins=50, color='skyblue', edgecolor='black')
plt.title("FDA Approved Drugs CS Scores")
plt.xlabel("CS Scores")
plt.ylabel("Frequency")
plt.show()

In [None]:
# 将预测得分添加到 FDA 数据中
fda['score'] = scores

In [None]:
# 查看更新后的 FDA 数据
print("更新后的 FDA 批准药物数据预览：")
print(fda.head())
fda

In [None]:
# 如果需要保存结果到文件，可以取消以下注释：
# fda = fda.set_index('Unnamed: 0')
# fda.to_csv('../../results/fda_HUA_merge.csv')
# print("FDA 预测结果已保存到 '../../results/fda_HUA_merge.csv'")


In [None]:
# 对天然产物数据进行预测
smi_scores = predictor.predict(smi['SMILES'].values)

In [None]:
# 可视化天然产物的 cs 得分
plt.figure(figsize=(10, 6))
plt.hist(smi_scores, bins=50, color='lightgreen', edgecolor='black')
plt.title("Natural Products CS Scores")
plt.xlabel("CS Scores")
plt.ylabel("Frequency")
plt.show()

In [None]:
# 将预测得分添加到天然产物数据中
smi['score'] = smi_scores

In [None]:
# 设置天然产物数据的索引
# 假设 'Unnamed: 0' 是需要设置为索引的列名，根据实际数据调整
# 如果 'Unnamed: 0' 不存在，请更改为实际的列名或移除此行
smi = smi.set_index('Unnamed: 0')

In [None]:
# 打印更新后的天然产物数据
print("更新后的天然产物数据预览：")
print(smi.head())

In [None]:
# 如果需要保存结果到文件，可以取消以下注释：
# smi.to_csv('../../results/natural_product_scores.csv')
# print("天然产物预测结果已保存到 '../../results/natural_product_scores.csv'")