In [1]:
import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog
import torch
from torch import nn

class CNNLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, cnn_filters, lstm_hidden, num_classes):
        super(CNNLSTM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.conv = nn.Conv1d(embedding_dim, cnn_filters, kernel_size=20)
        self.lstm = nn.LSTM(cnn_filters, lstm_hidden, batch_first=True)
        self.fc = nn.Linear(lstm_hidden, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(0, 2, 1)
        x = self.conv(x)
        x, _ = self.lstm(x.permute(0, 2, 1))
        x = self.fc(x[:, -1, :])
        return torch.sigmoid(x)

# 加载模型
def load_model(model_path, model):
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    return model

# 氨基酸到整数的映射
aa_to_int = {'A':1, 'R':2, 'N':3, 'D':4, 'C':5, 'E':6, 'Q':7, 'G':8, 'H':9, 'I':10, 
             'L':11, 'K':12, 'M':13, 'F':14, 'P':15, 'S':16, 'T':17, 'W':18, 'Y':19, 
             'V':20, 'U':21, 'X':22}  # 假设X代表了所有未知的氨基酸

# 编码函数
def encode_sequence(seq, max_length):
    encoded_seq = [aa_to_int.get(aa, 22) for aa in seq]  # 未知氨基酸编码为22
    padding = [0] * (max_length - len(encoded_seq))  # 0 作为padding值
    return torch.tensor([encoded_seq + padding], dtype=torch.long)  # 注意返回一个batch的形式

# 预测函数
def predict(model, sequence, max_length):
    model.eval()
    with torch.no_grad():
        encoded_seq = encode_sequence(sequence, max_length)
        prediction = model(encoded_seq)
        return prediction.item() * 100  # 转换为百分比形式
        
# GUI 创建
class App:
    def __init__(self, root):
        self.root = root
        self.max_length = 1000  # 设置最大长度
        
        # 设置窗口标题和大小
        self.root.title("抗氧化蛋白预测工具")
        self.root.geometry("1200x800")  # 增加窗口高度以容纳更多结果

        # 创建模型路径输入框
        tk.Label(self.root, text="模型路径:").pack()
        self.model_path_entry = tk.Entry(self.root, width=150)
        self.model_path_entry.pack()

        # 创建按钮来选择多个文件
        self.browse_button = tk.Button(self.root, text="浏览多个模型", command=self.browse_models)
        self.browse_button.pack()

        # 创建氨基酸序列输入框
        tk.Label(self.root, text=f"请输入氨基酸序列 (最大长度：{self.max_length}):").pack()
        self.sequence_text = tk.Text(self.root, height=5, width=150)
        self.sequence_text.pack()

        # 创建预测按钮
        self.predict_button = tk.Button(self.root, text="运行预测", command=self.run_predictions)
        self.predict_button.pack()

        # 创建一个滚动条
        self.scrollbar = tk.Scrollbar(self.root)
        self.scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

        # 创建文本框显示结果，关联滚动条
        self.result_text = tk.Text(self.root, height=40, width=150, yscrollcommand=self.scrollbar.set)
        self.result_text.pack()
        self.scrollbar.config(command=self.result_text.yview)

    def browse_models(self):
        # 使用文件选择对话框选择多个模型文件
        file_paths = filedialog.askopenfilenames()
        model_paths = ",".join(file_paths)  # 将多个文件路径合并为一个字符串，以逗号分隔
        self.model_path_entry.delete(0, tk.END)  # 清除当前输入框内容
        self.model_path_entry.insert(0, model_paths)  # 将文件路径插入输入框

    def run_predictions(self):
        # 获取输入的模型路径和序列
        model_paths_str = self.model_path_entry.get()
        unknown_sequence = self.sequence_text.get("1.0", tk.END).strip()

        # 分割模型路径字符串为列表
        model_paths = model_paths_str.split(",")

        # 检查输入合法性
        if not model_paths or not unknown_sequence:
            messagebox.showerror("错误", "请确保填写所有必要信息。")
            return
        
        # 清空结果显示区域
        self.result_text.delete('1.0', tk.END)

        # 对每个模型进行预测
        for model_path in model_paths:
            try:
                # 加载模型和进行预测
                cnn_filters = 64
                lstm_hidden = 128
                embedding_dim = 8
                vocab_size = len(aa_to_int) + 1  # 加1是因为padding的0也算一个"词"
                num_classes = 1  # 输出一个概率值
            
                # 创建模型实例
                model = CNNLSTM(vocab_size, embedding_dim, cnn_filters, lstm_hidden, num_classes)
                # 加载模型
                model = load_model(model_path, model)
                
                percentage = predict(model, unknown_sequence, self.max_length)  # 假设这里调用了预测函数
                
                # 显示预测结果
                self.result_text.insert(tk.END, f"模型 {model_path} 的预测结果是：{percentage:.2f}%\n")
            except Exception as e:
                self.result_text.insert(tk.END, f"模型 {model_path} 预测失败：{str(e)}\n")

# 主函数
def main():
    root = tk.Tk()
    app = App(root)
    root.mainloop()

if __name__ == "__main__":
    main()
