In [7]:
import torch
from torch import nn
import pandas as pd

# 检查CUDA是否可用，并选择设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")

# 加载模型类（确保它与保存模型时使用的类定义相同）
class CNNLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, cnn_filters, lstm_hidden, num_classes):
        super(CNNLSTM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.conv = nn.Conv1d(embedding_dim, cnn_filters, kernel_size=20)
        self.lstm = nn.LSTM(cnn_filters, lstm_hidden, batch_first=True)
        self.fc = nn.Linear(lstm_hidden, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(0, 2, 1)
        x = self.conv(x)
        x, _ = self.lstm(x.permute(0, 2, 1))
        x = self.fc(x[:, -1, :])
        return torch.sigmoid(x)

# 修改后的加载模型函数
def load_models(model_paths):
    models = {}
    for model_name, model_path in model_paths.items():
        model = CNNLSTM(vocab_size, embedding_dim, cnn_filters, lstm_hidden, num_classes)
        model.load_state_dict(torch.load(model_path, map_location=device))
        model.to(device)
        models[model_name] = model
    return models

# 氨基酸到整数的映射
aa_to_int = {'A':1, 'R':2, 'N':3, 'D':4, 'C':5, 'E':6, 'Q':7, 'G':8, 'H':9, 'I':10, 
             'L':11, 'K':12, 'M':13, 'F':14, 'P':15, 'S':16, 'T':17, 'W':18, 'Y':19, 
             'V':20, 'U':21, 'X':22}  # 假设X代表了所有未知的氨基酸

# 编码函数
def encode_sequence(seq, max_length):
    encoded_seq = [aa_to_int.get(aa, 22) for aa in seq]  
    padding = [0] * (max_length - len(encoded_seq))  
    return torch.tensor([encoded_seq + padding], dtype=torch.long).to(device)  # 注意移到设备上

# 预测函数
def predict(model, sequence, max_length):
    model.eval()
    with torch.no_grad():
        encoded_seq = encode_sequence(sequence, max_length)
        prediction = model(encoded_seq)
        return prediction.item() * 100

# 设置参数
max_length = 1500

embedding_dim = 8
vocab_size = len(aa_to_int) + 1
num_classes = 1
cnn_filters = 64
lstm_hidden = 128

# 假设我们有两个模型路径，可以根据需要添加更多
model_paths = {
    'NGR_inflammatory_1500_10': r"C:\Users\Administrator\Desktop\AI\AOP_MDL\Model\NGR_inflammatory_1500_10.pth",
    'NGR_inflammatory_1500_20': r"C:\Users\Administrator\Desktop\AI\AOP_MDL\Model\NGR_inflammatory_1500_20.pth",
    'NGR_inflammatory_1500_30': r"C:\Users\Administrator\Desktop\AI\AOP_MDL\Model\NGR_inflammatory_1500_30.pth",
    'NGR_inflammatory_1500_40': r"C:\Users\Administrator\Desktop\AI\AOP_MDL\Model\NGR_inflammatory_1500_40.pth",
    'NGR_inflammatory_1500_50': r"C:\Users\Administrator\Desktop\AI\AOP_MDL\Model\NGR_inflammatory_1500_50.pth",
    'NGR_inflammatory_1500_60': r"C:\Users\Administrator\Desktop\AI\AOP_MDL\Model\NGR_inflammatory_1500_60.pth",
   
}

# 加载所有模型
models = load_models(model_paths)

# 加载Excel文件
excel_path = r"C:\Users\Administrator\Desktop\AI\AIP_MDL\Dateset\NGR_inflammatory response\NGR_inflammatory response_1500_prediction.xlsx"
df = pd.read_excel(excel_path)

# 对每一个模型进行预测并将结果添加到DataFrame中
for model_name, model in models.items():
    predictions = []
    for index, row in df.iterrows():
        sequence = row['Sequence']  # 确保这里的'Sequence'是DataFrame中包含序列的列名
        #sequence = row[0] 
        percentage = predict(model, sequence, max_length)
        predictions.append(percentage)
    df[model_name] = predictions

# 将更新后的DataFrame保存回Excel文件
df.to_excel(excel_path, index=False)

print("预测完成，结果已更新至Excel文件。")


使用设备: cuda


KeyboardInterrupt: 