In [2]:
%pip install pyhmmer

Collecting pyhmmer
  Downloading pyhmmer-0.11.4-cp312-cp312-macosx_11_0_arm64.whl.metadata (16 kB)
Downloading pyhmmer-0.11.4-cp312-cp312-macosx_11_0_arm64.whl (4.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.6/4.6 MB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hInstalling collected packages: pyhmmer
Successfully installed pyhmmer-0.11.4
Note: you may need to restart the kernel to use updated packages.


In [1]:
import pyhmmer
from typing import Dict, List
import re

def hmm_consensus_to_fasta(hmm_file_path, outpath):
    """
    Extract all HMM names and consensus seqs from a multi-HMM flat file.
    
    Parameters:
    -----------
    hmm_file_path : str
        Path to the HMM file
    outpath: str
        path to out .fasta file
    Returns:
    --------
    dict 
        HMM ID: consensus
    """

    hmm_dict: Dict[str, str] = {}
    
    with open(outpath, "w") as outf:
        with pyhmmer.plan7.HMMFile(hmm_file_path) as hmm_file:
            for hmm in hmm_file:

                # full ID
                hmm_name = hmm.name.decode()

                #parse name
                annot_name = re.sub("(.*?)-", "", hmm_name, count = 1).rstrip("-").lower()

                consensus_seq = hmm.consensus
                
                #print to out file in .fasta format
                print(f">{hmm_name} {annot_name}", file = outf)
                print(f"{consensus_seq.upper()}", file = outf)

                if not hmm_name in hmm_dict:
                    hmm_dict[hmm_name] = consensus_seq
    return hmm_dict

In [3]:
virion_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/mike_tisza/github_repos/Cenote-Taker3/hmmscan_DBs/v3.1.1/Virion_HMMs.h3m",
    "../../data/annotation/hmms/ct3_v311_virion_consensus.faa"
)

In [4]:
dnrep_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/mike_tisza/github_repos/Cenote-Taker3/hmmscan_DBs/v3.1.1/DNA_rep_HMMs.h3m",
    "../../data/annotation/hmms/ct3_v311_dnarep_consensus.faa"
)

In [5]:
rdrp_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/mike_tisza/github_repos/Cenote-Taker3/hmmscan_DBs/v3.1.1/RDRP_HMMs.h3m",
    "../../data/annotation/hmms/ct3_v311_rdrp_consensus.faa"
)

In [6]:
phrog_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/mike_tisza/github_repos/Cenote-Taker3/hmmscan_DBs/v3.1.1/phrogs_for_ct.h3m",
    "../../data/annotation/hmms/ct3_v311_phrogs_consensus.faa"
)

In [7]:
#Useful_Annotation_HMMs.h3m
useful_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/mike_tisza/github_repos/Cenote-Taker3/hmmscan_DBs/v3.1.1/Useful_Annotation_HMMs.h3m",
    "../../data/annotation/hmms/ct3_v311_useful_models_consensus.faa"
)

In [8]:
pharokka_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/mike_tisza/sandbox/pharokka_db/all_phrogs.h3m",
    "../../data/annotation/hmms/pharokka_phrogs_consensus.faa"
)

In [9]:
#
metc_vog_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/miniforge3/envs/metacerberus/lib/python3.12/site-packages/meta_cerberus/DB/VOG.hmm.gz",
    "../../data/annotation/hmms/metc_vog_consensus.faa"
)

In [10]:
metc_tigr_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/miniforge3/envs/metacerberus/lib/python3.12/site-packages/meta_cerberus/DB/TIGRFAM.hmm.gz",
    "../../data/annotation/hmms/metc_tigr_consensus.faa"
)

In [11]:
metc_cog_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/miniforge3/envs/metacerberus/lib/python3.12/site-packages/meta_cerberus/DB/COG.hmm.gz",
    "../../data/annotation/hmms/metc_cog_consensus.faa"
)

In [16]:
metc_KOFAM_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/miniforge3/envs/metacerberus/lib/python3.12/site-packages/meta_cerberus/DB/KOFam_all.hmm.gz",
    "../../data/annotation/hmms/metc_kofam_consensus.faa"
)

In [14]:
metc_GVDB_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/miniforge3/envs/metacerberus/lib/python3.12/site-packages/meta_cerberus/DB/GVDB.hmm.gz",
    "../../data/annotation/hmms/metc_GVDB_consensus.faa"
)

In [15]:
metc_pvog_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/miniforge3/envs/metacerberus/lib/python3.12/site-packages/meta_cerberus/DB/PVOG.hmm.gz",
    "../../data/annotation/hmms/metc_PVOG_consensus.faa"
)

In [17]:
metc_pfam_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/miniforge3/envs/metacerberus/lib/python3.12/site-packages/meta_cerberus/DB/PFAM.hmm.gz",
    "../../data/annotation/hmms/metc_pfam_consensus.faa"
)

In [18]:
metc_pgap_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/miniforge3/envs/metacerberus/lib/python3.12/site-packages/meta_cerberus/DB/PGAP.hmm.gz",
    "../../data/annotation/hmms/metc_pgap_consensus.faa"
)

In [19]:
metc_phrog_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/miniforge3/envs/metacerberus/lib/python3.12/site-packages/meta_cerberus/DB/PHROG.hmm.gz",
    "../../data/annotation/hmms/metc_phrog_consensus.faa"
)

In [2]:
ct2_rna_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/mike_tisza/github_repos/Cenote-Taker2/hmmscan_DBs/rna_virus_rdrp_capsid_hmms1.h3m",
    "../../data/annotation/hmms/ct2_rna_consensus.faa"
)

In [3]:
ct2_useful_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/mike_tisza/github_repos/Cenote-Taker2/hmmscan_DBs/useful_hmms_baits_and_not2a.h3m",
    "../../data/annotation/hmms/ct2_useful_consensus.faa"
)

In [4]:
ct2_specific_cons = hmm_consensus_to_fasta(
    "/Users/michaeltisza/mike_tisza/github_repos/Cenote-Taker2/hmmscan_DBs/virus_specific_baits_plus_missed6a.h3m",
    "../../data/annotation/hmms/ct2_specific_consensus.faa"
)