In [1]:
# %%
import sys
import os
sys.path.append('../')
import torch
import pandas as pd
from webapp.utils import UniProtParserMysql, EasIFAInferenceAPI, retrain_ec_site_model_state_path, cmd, get_structure_html_and_active_data




In [2]:
# %%
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

ECSitePred = EasIFAInferenceAPI(
            model_checkpoint_path=retrain_ec_site_model_state_path, device=device
        )

unprot_mysql_parser = UniProtParserMysql(
        mysql_config_path="./mysql_config.json"
    )     # You don't need to worry about the warning of mysql. It doesn't affect the prediction


def inference_from_uniprot_ids(uniprot_id_list:list, ECSitePred:EasIFAInferenceAPI, unprot_mysql_parser:UniProtParserMysql):
    
    all_results_df = pd.DataFrame()

    for uniprot_id in uniprot_id_list:
        query_data, _, _ = (
            unprot_mysql_parser.query_from_uniprot(uniprot_id)
        )
        uniprot_id, query_results_df, msg, _ = query_data
        if query_results_df is None:
            msg = uniprot_id + ': ' + msg 
            print(msg)
            continue
        enzyme_aa_length = query_results_df["aa_length"].tolist()[0]
        if ECSitePred.max_enzyme_aa_length < enzyme_aa_length:
            query_results_df['predicted_results'] = [None for _ in range(len(query_results_df))]
            continue

        predicted_results = []

        for idx, row in enumerate(query_results_df.itertuples()):
            rxn = row[2]
            enzyme_structure_path = row[3]
            if not os.path.exists(enzyme_structure_path):
                enzyme_structure_path = os.path.join(
                    unprot_mysql_parser.unprot_parser.alphafolddb_folder,
                    f"AF-{uniprot_id}-F1-model_v4.pdb",
                )
                cmd(
                    unprot_mysql_parser.unprot_parser.download_alphafolddb_url_template.format(
                        enzyme_structure_path, uniprot_id
                    )
                )


            pred_active_site_labels = ECSitePred.inference(
                    rxn=rxn, enzyme_structure_path=enzyme_structure_path
                )
            del ECSitePred.caculated_sequence
            predicted_results.append(pred_active_site_labels)
            _, active_data = get_structure_html_and_active_data(
                enzyme_structure_path=enzyme_structure_path,
                site_labels=pred_active_site_labels,
                view_size=(600, 600),
            )
            # active_data_df = pd.DataFrame(active_data, columns=['Residue Index', 'Residue Name', 'Color', 'Active  Site Type'])
        query_results_df['predicted_results'] = predicted_results
        all_results_df = pd.concat([all_results_df, query_results_df], axis=0)
    return all_results_df






Loaded checkpoint from /home/xiaoruiwang/data/ubuntu_work_beta/single_step_work/EasIFA_v2/checkpoints/enzyme_site_type_predition_model/train_in_uniprot_ecreact_cluster_split_merge_dataset_limit_100_at_2024-05-26-02-48-38/global_step_86000
[Errno 2] No such file or directory: './mysql_config.json'


In [3]:
# %%
uniprot_id_list = ['O30144', 'O14657', 'O15269']   # Replace it with your own list of UniProt ids that you want to predict



In [4]:
# %%
all_results_df = inference_from_uniprot_ids(uniprot_id_list, ECSitePred, unprot_mysql_parser)

all_results_df


[Errno 2] No such file or directory: './mysql_config.json'
[Errno 2] No such file or directory: './mysql_config.json'
[Errno 2] No such file or directory: './mysql_config.json'


Unnamed: 0,ec,rxn_smiles,pdb_fpath,aa_length,predicted_results
0,7.3.2.6,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O...,/home/xiaoruiwang/data/ubuntu_work_beta/single...,240,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
0,UNK,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O...,/home/xiaoruiwang/data/ubuntu_work_beta/single...,336,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
0,2.3.1.50,CCCCCCCCCCCCCCCC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C...,/home/xiaoruiwang/data/ubuntu_work_beta/single...,473,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,UNK,CCCCCCCCCCCCCCCCCC(=O)SCCNC(=O)CCNC(=O)[C@H](O...,/home/xiaoruiwang/data/ubuntu_work_beta/single...,473,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,UNK,CCCCCCCCCCCCCC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C...,/home/xiaoruiwang/data/ubuntu_work_beta/single...,473,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,UNK,CCCCCCCCCCCC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(...,/home/xiaoruiwang/data/ubuntu_work_beta/single...,473,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
