In [None]:
#Preferrably use High-RAM on colab, os.cpu_count() >= 8
import os
import subprocess
from google.colab import drive
drive.mount('/content/drive')
%pip install Bio
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
import shutil

# Running ***Cawlign***

In [None]:
# Install dependencies
!apt-get update -qq
!apt-get install -y cmake g++ make git

# Clone the repo
!git clone https:/github.com/veg/cawlign.git
%cd cawlign

# Build cawlign
!cmake ./
!make

# Install (optional, otherwise you can just call ./cawlign directly)
!make install


In [None]:
reference_dir = "/content/drive/MyDrive/Paper_Results/Reference_files/NT Files"
def remove_last_three_bases(reference_dir):
    # Loop through all fasta files in the directory
    for file in os.listdir(reference_dir):
        if file.endswith(".fasta"):
            file_path = os.path.join(reference_dir, file)

            record = SeqIO.read(file_path, "fasta")

            record.seq = record.seq[:-3] if len(record.seq) >= 3 else record.seq

            SeqIO.write(record, file_path, "fasta")
            print(f"Trimmed stop codon in place: {file}")

remove_last_three_bases(reference_dir)

In [None]:
# Paths
cawlign_exec = "cawlign"

scoring_matrix = "/content/cawlign/res/scoring/BLOSUM62_reso_copy"


input_dir = "/content/drive/MyDrive/Paper_Results/Input_files_Avian_All_NT"
output_dir = "/content/drive/MyDrive/Paper_Results/Cawligner_output_files/Avian"

#input_dir = "/content/drive/MyDrive/Paper_Results/Input_files_Mammal_All_NT"
#output_dir = "/content/drive/MyDrive/Paper_Results/Cawligner_output_files/Mammal"

#After Removing the stop codon
reference_dir = "/content/drive/MyDrive/Paper_Results/Reference_files/NT Files"


# Ensure output folder exists
os.makedirs(output_dir, exist_ok=True)

# Loop through all fasta files in input_dir
for file in os.listdir(reference_dir):
    if file.endswith(".fasta"):
        gene = file.split("_")[0]
        print("Gene:", gene)
        output_path = os.path.join(output_dir, f"{gene}_aligned.fasta")
        reference_path = os.path.join(reference_dir, f"{gene}_ref.fasta")

        # Check if reference exists
        if not os.path.exists(reference_path):
            print(f"Skipping {gene} ‚Äî reference file not found: {reference_path}")
            continue

        # Build the command
        cmd = [
            cawlign_exec,
            "-r", reference_path,
            "-o", output_path,
            "-s", scoring_matrix,
            "-t", "codon",
            input_path
        ]

        print(f"Running cawlign for gene: {gene}")

        try:
          result = subprocess.run(
              cmd,
              check=True,
              stdout=subprocess.PIPE,
              stderr=subprocess.PIPE,
              text=True  # returns strings instead of bytes
          )
          print("‚úÖ cawlign output:")
          print(result.stdout)

        except subprocess.CalledProcessError as e:
            print("‚ùå cawlign failed!")
            print("Command:", e.cmd)
            print("Return code:", e.returncode)
            print("Stdout:\n", e.stdout)
            print("Stderr:\n", e.stderr)

print("Done")


# Running the ***tn93-cluster***

In [None]:
!git clone https:/github.com/veg/tn93.git
%cd tn93
!cmake ./
!make install

In [None]:
# === PATHS ===
tn93_exec = "tn93-cluster"   # <-- update this

species = "Mammal"
print(species)
prior_clade_dir = "/content/drive/MyDrive/Paper_Results/Input_files_"+species+"_Prior_Clades_Compiled_NT"
filtered_nt_dir = "/content/drive/MyDrive/Paper_Results/Cawligner_output_files/"+species+"/Prior_clades_filtered"
aligned_nt_dir = "/content/drive/MyDrive/Paper_Results/Cawligner_output_files/"+species
tn93_output_dir = "/content/drive/MyDrive/Paper_Results/TN93_results/"+species
current_nt_dir = "/content/drive/MyDrive/Paper_Results/Cawligner_output_files/" + species + "/Current_clades_filtered"
print(prior_clade_dir)
# tn93-cluster parameters
tn93_params = ["-l", "500", "-t", "0.005", "-f"]

# Ensure output folders exist
os.makedirs(filtered_nt_dir, exist_ok=True)
os.makedirs(aligned_nt_dir, exist_ok=True)
os.makedirs(tn93_output_dir, exist_ok=True)
os.makedirs(current_nt_dir, exist_ok=True)

def seq_filter(aligned_nt_fasta, prior_clade_fasta, filtered_fasta_path, current_fasta_path):
    """
    Select sequences from aligned_nt_fasta that are present in prior_clade_fasta
    and save them in filtered_fasta_path.
    Save the current ones in the current clade at current_fasta_path.
    """

    # Load raw NT sequences into a dictionary
    nt_dict = {rec.id: rec.seq for rec in SeqIO.parse(prior_clade_fasta, "fasta")}

    filtered_nt_records = []
    skipped_nt_records = []
    count_skips = 0
    count_kept = 0

    for aa_record in SeqIO.parse(aligned_nt_fasta, "fasta"):
        if aa_record.id not in nt_dict:
            skipped_nt_records.append(aa_record)
            count_skips += 1
        else:
            filtered_nt_records.append(aa_record)
            count_kept += 1

    # Save both
    #SeqIO.write(filtered_nt_records, filtered_fasta_path, "fasta")
    SeqIO.write(skipped_nt_records, current_fasta_path, "fasta")

    print(f"‚úÖ Done: Background Clade {count_kept}, Focal Clade {count_skips}, total in compiled file: {len(list(SeqIO.parse(aligned_nt_fasta, "fasta")))}")
    return filtered_nt_records, skipped_nt_records


# === MAIN PIPELINE ===
for file in os.listdir(aligned_nt_dir):
    if file.endswith(".fasta"):
        aligned_fasta_path = os.path.join(aligned_nt_dir, file)
        gene = file.split("_")[0]
        #if gene != "HA":  # <-- optional filter
        #    continue
        print(gene)

        if gene == "NS1" or gene == "NS2":
            prior_clade_path = os.path.join(prior_clade_dir, f"Compiled_prior_NS.fasta")
        elif gene == "M1" or gene == "M2":
            prior_clade_path = os.path.join(prior_clade_dir, f"Compiled_prior_M.fasta")
        else:
            prior_clade_path = os.path.join(prior_clade_dir, f"Compiled_prior_{gene}.fasta")

        filtered_fasta_path = os.path.join(filtered_nt_dir, f"Filtered_aligned_{gene}.fasta")
        current_fasta_path = os.path.join(current_nt_dir, f"Filtered_current_{gene}.fasta")

        # Step 1: filter & save skipped ones
        filtered_nt_records, skipped_nt_records = seq_filter(aligned_fasta_path, prior_clade_path,
                                                             filtered_fasta_path, current_fasta_path)


        continue
        # Step 2: run tn93-cluster on the filtered ones
        tn93_output_path = os.path.join(tn93_output_dir, f"{gene}_tn93.fasta")
        cmd = [tn93_exec] + tn93_params + [filtered_fasta_path]

        print(f"üöÄ Running tn93-cluster for {gene}")
        try:
            result = subprocess.run(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                check=True
            )
            with open(tn93_output_path, "w") as out_f:
                out_f.write(result.stdout)

            print(f"‚úÖ {tn93_exec} finished successfully")

        except subprocess.CalledProcessError as e:
            print(f"‚ùå {tn93_exec} failed!")
            print("Command:", e.cmd)
            print("Return code:", e.returncode)
            print("Stdout:\n", e.stdout)
            print("Stderr:\n", e.stderr)
        print()
print("Done")

# Running ***HyPhy cln***

*While Running a HyPhy tool make sure the input files are already in the HyPhy directory*

In [None]:
#Check out the hyphy github page to use the latest version
!git clone --branch 2.5.83 https:/github.com/veg/hyphy.git
%cd hyphy
!cmake ./
!make -j $(nproc)
!make install

In [None]:

species = "Avian"  # or "Mammal"
print("Species:", species)
current_nt_dir = f"/content/hyphy/{species}/Current_clades_filtered"
output_dir = f"/content/hyphy/cln_results/{species}"

# make sure output dir exists
os.makedirs(output_dir, exist_ok=True)

for file in os.listdir(current_nt_dir):
    if file.endswith(".fasta"):  # only fasta files
        input_file = os.path.join(current_nt_dir, file)
        #gene = file.split("_")[0] #Filtered_current_HA.fasta

        output_file = os.path.join(output_dir, file.replace(".fasta", "_cln.fasta"))

        print(f"Running hyphy cln on {file} ...")

        cmd = [
            "hyphy", "cln"
        ]
        proc = subprocess.run(
            cmd,
            input=f"1\n{input_file}\n3\n{output_file}\n",
            text=True,
            capture_output=True
        )

        if proc.returncode != 0:
            print(f"‚ùå Error on {file}: {proc.stderr}")
        else:
            print(f"‚úÖ Finished {file}, output -> {output_file}")


# ***Combining Files***

In [None]:
#To combine the outputs from the cln tool and the tn93 representative sequences into one fasta file
folders = ["Avian", "Mammal"]

base_in = "/content/drive/MyDrive/Paper_Results/IQ_Tree_Input/"
base_filtered = "/content/drive/MyDrive/Paper_Results/cln_results/"
base_out = "/content/drive/MyDrive/Paper_Results/IQ_Tree_Input/"  # save new files here
os.makedirs(base_out, exist_ok=True)

for folder in folders:
    aln_dir = os.path.join(base_in, folder)
    filt_dir = os.path.join(base_filtered, folder)
    out_dir = os.path.join(base_out, folder)
    os.makedirs(out_dir, exist_ok=True)

    # find all combined FASTAs in alignment input
    for fname in os.listdir(aln_dir):
        if fname.endswith("_combined.fasta"):
            aln_path = os.path.join(aln_dir, fname)

            # guess which filtered file to use (you can adjust this if needed)
            gene = fname.replace("_combined.fasta", "")
            filt_fname = f"Filtered_current_{gene}_cln.fasta"
            filt_path = os.path.join(filt_dir, filt_fname)

            if not os.path.exists(filt_path):
                print(f"‚ö†Ô∏è No filtered file for {folder}/{gene}, skipping")
                continue

            # collect IDs from filtered FASTA
            filtered_ids = {rec.id for rec in SeqIO.parse(filt_path, "fasta")}

            # update IDs in alignment FASTA if they are in the filtered set
            updated_records = []
            for rec in SeqIO.parse(aln_path, "fasta"):
                if rec.id in filtered_ids:
                    rec.id = rec.id + "_2.3.3.4b"
                    rec.description = rec.id  # keep description in sync
                updated_records.append(rec)

            # save updated alignment FASTA
            out_path = os.path.join(out_dir, fname)
            SeqIO.write(updated_records, out_path, "fasta")
            print(f"‚úÖ {folder}/{gene}: saved labeled FASTA -> {out_path}")



# Running ***IQTree 2***

In [None]:
# Download IQ-TREE 2.4.0 for Linux (Intel)

!wget -q https:/github.com/iqtree/iqtree2/releases/download/v2.4.0/iqtree-2.4.0-Linux-intel.tar.gz -O iqtree2.tgz

# Extract
!tar -xzf iqtree2.tgz

# Move binary
!mv iqtree-2.4.0-Linux-intel/bin/iqtree2 /usr/local/bin/

# Make it executable
!chmod +x /usr/local/bin/iqtree2

In [None]:
#cd into hyphy to use the labeltree.bf script
#%cd hyphy/

In [None]:
import os
import subprocess
import time

species = ["Mammal", "Avian"]
base_out = "/content/hyphy/IQ_Tree_output"
os.makedirs(base_out, exist_ok=True)

# Detect available CPU cores
threads = str(os.cpu_count())
print(f"Using {threads} threads for IQ-TREE")

start_total = time.time()

for folder in species:
    print(f"={folder}=")
    iqtree_input_path = f"/content/hyphy/IQ_Tree_Input/{folder}"
    iqtree_out_dir = os.path.join(base_out, "IQ_Tree_tree", folder)
    hyphy_out_dir = os.path.join(base_out, "Hyphy_labeled_tree", folder)
    os.makedirs(iqtree_out_dir, exist_ok=True)
    os.makedirs(hyphy_out_dir, exist_ok=True)

    for fname in os.listdir(iqtree_input_path):
        if fname.endswith("_combined.fasta"):
            aln_path = os.path.join(iqtree_input_path, fname)
            gene = fname.replace("_combined.fasta", "")

            print(f"\n=== Processing {folder}/{gene} ===")

            start_gene = time.time()

            # Create per-gene output folders
            gene_iqtree_dir = os.path.join(iqtree_out_dir, gene)
            gene_hyphy_dir = os.path.join(hyphy_out_dir, gene)
            os.makedirs(gene_iqtree_dir, exist_ok=True)
            os.makedirs(gene_hyphy_dir, exist_ok=True)

            # === Step 1: Run IQ-TREE ===
            print("  ‚Üí Running IQ-TREE")
            prefix = os.path.join(gene_iqtree_dir, gene)
            break

            subprocess.run([
                "iqtree2",
                "-s", aln_path,
                "-m", "GTR+I+G",
                "-T", "8",
                "-pre", prefix
            ], check=True)

        print("  ‚Üí IQ-TREE done")

        # === Step 1.5: Copy the whole IQ_Tree_output folder to Drive ===
        drive_dest = f"/content/drive/MyDrive/Paper_Results/IQ_Tree_output/IQ_Tree_tree/{folder}"
        os.makedirs(drive_dest, exist_ok=True)
        break

        subprocess.run([
            "cp", "-r",
            gene_iqtree_dir,
            drive_dest
        ], check=True)

        print(f"  ‚Üí Copied {gene_iqtree_dir} to {drive_dest}")

        treefile = prefix + ".treefile"
        labeled_tree = os.path.join(gene_hyphy_dir, gene + "_labeled.tree")

        # === Step 2: Run HyPhy LabelTree ===
        print("  ‚Üí Running HyPhy LabelTree")
        try:
            subprocess.run([
                "hyphy", "res/TemplateBatchFiles/lib/label-tree.bf",
                "--tree", treefile,
                "--regexp", "2.3.3.4b",
                "--label", "2.3.3.4b",
                "--internal-nodes", "All descendants",
                "--leaf-nodes", "Label",
                "--output", labeled_tree
            ],
            check=True,
            text=True,
            capture_output=True
            )
            print(f"‚úÖ {folder}/{gene}: labeled tree saved -> {labeled_tree}")
        except subprocess.CalledProcessError as e:
            print(f"‚ùå HyPhy failed for {folder}/{gene}")
            print("Return code:", e.returncode)
            print("Standard output:\n", e.stdout)
            print("Standard error:\n", e.stderr)

        # === Step 2.5: Copy this gene's HyPhy labeled results to Drive ===
        drive_dest_hyphy = f"/content/drive/MyDrive/Paper_Results/IQ_Tree_output/Hyphy_labeled_tree/{folder}"
        os.makedirs(drive_dest_hyphy, exist_ok=True)

        subprocess.run([
            "cp", "-r",
            gene_hyphy_dir,
            drive_dest_hyphy
        ], check=True)

        print(f"  ‚Üí Copied {gene_hyphy_dir} to {drive_dest_hyphy}")

        elapsed_gene = time.time() - start_gene
        print(f"‚è± Runtime for {gene}: {elapsed_gene:.2f} seconds\n")

elapsed_total = time.time() - start_total
print(f"\n=== All jobs finished in {elapsed_total/60:.2f} minutes ===")




#Running Hyphy ***Busted***

In [None]:
#Move Hyphy_labeled_tree to cpu
!cp -r "/content/drive/MyDrive/Paper_Results/IQ_Tree_output/Hyphy_labeled_tree" "/content/hyphy/"
#Move IQ_Tree_Input to cpu
!cp -r "/content/drive/MyDrive/Paper_Results/IQ_Tree_Input/" "/content/hyphy/"
#To move the alignment input from drive to cpu
!cp -r "/content/drive/MyDrive/Paper_Results/Busted_Alignment_Input/" "/content/hyphy"
!mkdir -p "/content/hyphy/Busted_results/"

In [None]:
#IQTree will replace special characters in the fasta ids with underscores. This takes care of that
from Bio import SeqIO
fasta_in = "/content/hyphy/IQ_Tree_Input/Avian/PB2_combined.fasta"
fasta_out = "/content/hyphy/Busted_Alignment_Input/Avian/PB2_combined.fasta"
count_replaced = 0

with open(fasta_out, "w") as out:
    for record in SeqIO.parse(fasta_in, "fasta"):
        original_id = record.id
        # check for unwanted characters
        if any(char in record.id for char in [",", "(", ")", "'", '"']):
            record.id = (
                record.id
                .replace(",", "_")
                .replace("(", "_")
                .replace(")", "_")
                .replace("'", "_")
                .replace('"', "_")
            )
            count_replaced += 1
        record.description = ""
        SeqIO.write(record, out, "fasta")

print(f"‚úÖ Done. {count_replaced} record IDs contained commas or parentheses and were replaced.")


In [None]:
!cp -r "/content/drive/MyDrive/Paper_Results/Busted_results/Avian/PB2.json" "/content/hyphy/Busted_results/Avian/"
!cp -r "/content/drive/MyDrive/Paper_Results/Busted_results/Avian/PB2_intermediate.json" "/content/hyphy/Busted_results/Avian/"

In [None]:
#Samplt gene
#For intermediate fits
!hyphy busted \
  --alignment "/content/hyphy/Busted_Alignment_Input/Avian/PB2_combined.fasta" \
  --tree "/content/hyphy/Hyphy_labeled_tree/Avian/PB2/PB2_labeled.tree" \
  --branches "2.3.3.4b" \
  --output "/content/hyphy/Busted_results/Avian/PB2.json" \
  --kill-zero-lengths Yes \
  --intermediate-fits "/content/hyphy/Busted_results/Avian/PB2_intermediate.json"


#To move the alignment input from cpu to drive
!cp -r "/content/hyphy/Busted_Alignment_Input/Avian" "/content/drive/MyDrive/Paper_Results/Busted_Alignment_Input/"

#To move intermediate json from cpu to drive
!cp "/content/hyphy/Busted_results/Avian/PB2_intermediate.json" "/content/drive/MyDrive/Paper_Results/Busted_results/Avian/"

#To move final json output from cpu to drive
!cp "/content/hyphy/Busted_results/Avian/PB2.json" "/content/drive/MyDrive/Paper_Results/Busted_results/Avian/"

In [None]:
#To move the alignment input from cpu to drive
!cp -r "/content/hyphy/Busted_Alignment_Input/Avian" "/content/drive/MyDrive/Paper_Results/Busted_Alignment_Input/"

#To move intermediate json from cpu to drive
!cp "/content/hyphy/Busted_results/Avian/PB2_intermediate.json" "/content/drive/MyDrive/Paper_Results/Busted_results/Avian/"

#To move final json output from cpu to drive
!cp "/content/hyphy/Busted_results/Avian/PB2.json" "/content/drive/MyDrive/Paper_Results/Busted_results/Avian/"

#Running Hyphy ***MEME***

In [None]:
#Move Hyphy_labeled_tree to cpu
!cp -r "/content/drive/MyDrive/Paper_Results/IQ_Tree_output/Hyphy_labeled_tree" "/content/hyphy/"
#Move IQ_Tree_Input to cpu
!cp -r "/content/drive/MyDrive/Paper_Results/IQ_Tree_Input/" "/content/hyphy/"
#To move the alignment input from drive to cpu
!cp -r "/content/drive/MyDrive/Paper_Results/Busted_Alignment_Input/" "/content/hyphy"
#To move the MEME_results results from drive to cpu
!cp -r "/content/drive/MyDrive/Paper_Results/MEME_results/" "/content/hyphy/"

In [None]:
import os
import subprocess
import shutil

# === Paths ===
species = "Mammal"
alignment_dir = f"/content/hyphy/Busted_Alignment_Input/{species}"
tree_base_dir = f"/content/hyphy/Hyphy_labeled_tree/{species}"
output_base_dir = f"/content/hyphy/MEME_results/{species}"
drive_base_dir = f"/content/drive/MyDrive/Paper_Results/MEME_results/{species}"

# Ensure output directories exist
#os.makedirs(output_base_dir, exist_ok=True)
#os.makedirs(drive_base_dir, exist_ok=True)

# Loop through all alignment files
for file in os.listdir(alignment_dir):
    if not file.endswith(".fasta"):
        continue

    gene_name = file.replace("_combined.fasta", "")
    alignment_path = os.path.join(alignment_dir, file)
    tree_path = os.path.join(tree_base_dir, gene_name, f"{gene_name}_labeled.tree")
    output_json = os.path.join(output_base_dir, f"{gene_name}.json")
    intermediate_json = os.path.join(output_base_dir, f"{gene_name}_intermediate.json")

    # Skip if tree does not exist
    if not os.path.exists(tree_path):
        print(f"‚ö†Ô∏è Skipping {gene_name} ‚Äî tree not found")
        continue

    print(f"\n=== Running MEME for {gene_name} ===")

    # Run MEME
    cmd = [
        "hyphy", "meme",
        "--alignment", alignment_path,
        "--tree", tree_path,
        "--branches", "2.3.3.4b",
        "--pvalue", "0.05",
        "--rates", "2",
        "--output", output_json,
        "--kill-zero-lengths", "Yes",
        "--intermediate-fits", intermediate_json
    ]

    try:
        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
        print(result.stdout)
        print(f"‚úÖ MEME completed for {gene_name}")

        # Copy JSON to Google Drive
        shutil.copy2(output_json, drive_base_dir)
        print(f"üìÅ Copied {gene_name}.json to Drive successfully.\n")

    except subprocess.CalledProcessError as e:
        print(f"‚ùå Error running MEME for {gene_name}")
        print("---- STDERR ----")
        print(e.stderr)
        print("---- STDOUT ----")
        print(e.stdout)
    except Exception as e:
        print(f"‚ö†Ô∏è Copy error for {gene_name}: {e}")


In [None]:
!cp -r "/content/hyphy/MEME_results/" "/content/drive/MyDrive/Paper_Results/"

#Running Hyphy ***FEL***

In [None]:
#Move Hyphy_labeled_tree to cpu
!cp -r "/content/drive/MyDrive/Paper_Results/IQ_Tree_output/Hyphy_labeled_tree" "/content/hyphy/"
#Move IQ_Tree_Input to cpu
!cp -r "/content/drive/MyDrive/Paper_Results/IQ_Tree_Input/" "/content/hyphy/"
#To move the alignment input from drive to cpu
!cp -r "/content/drive/MyDrive/Paper_Results/Busted_Alignment_Input/" "/content/hyphy"
#Make output FEL_results directory
!mkdir -p "/content/hyphy/FEL_results/Mammal"


In [None]:
"""
!hyphy FEL \
  --alignment "/content/hyphy/Busted_Alignment_Input/Mammal/PB2_combined.fasta" \
  --tree "/content/hyphy/Hyphy_labeled_tree/Mammal/PB2/PB2_labeled.tree" \
  --branches "2.3.3.4b" \
  --output "/content/hyphy/FEL_results/Mammal/PB2.json" \
  --kill-zero-lengths Yes \
  --intermediate-fits "/content/hyphy/FEL_results/Mammal/PB2_intermediate.json"
"""

In [None]:
import os
import subprocess
import shutil

# === Paths ===
species = "Mammal"
alignment_dir = "/content/hyphy/Busted_Alignment_Input/"+species
tree_base_dir = "/content/hyphy/Hyphy_labeled_tree/"+species
output_base_dir = "/content/hyphy/FEL_results/"+species
drive_base_dir = "/content/drive/MyDrive/Paper_Results/FEL_results/"+species

# === Ensure output directories exist ===
os.makedirs(output_base_dir, exist_ok=True)
os.makedirs(drive_base_dir, exist_ok=True)

# === Loop through all FASTA alignment files ===
for file in os.listdir(alignment_dir):
    if not file.endswith(".fasta"):
        continue

    gene_name = file.replace("_combined.fasta", "")
    alignment_path = os.path.join(alignment_dir, file)
    tree_path = os.path.join(tree_base_dir, gene_name, f"{gene_name}_labeled.tree")
    output_json = os.path.join(output_base_dir, f"{gene_name}.json")
    intermediate_json = os.path.join(output_base_dir, f"{gene_name}_intermediate.json")

    # === Check if tree exists ===
    if not os.path.exists(tree_path):
        print(f"‚ö†Ô∏è Skipping {gene_name} ‚Äî tree not found")
        continue

    if gene_name != 'M1':
      continue

    if gene_name in ['HA']:
      print(f"\n=== Skipping {species}/{gene_name} - Completed ===")
      continue

    print(f"\n=== Processing {species}/{gene_name} ===")

    # === Run HyPhy FEL ===
    cmd = [
        "hyphy", "FEL",
        "--alignment", alignment_path,
        "--tree", tree_path,
        "--branches", "2.3.3.4b",
        "--output", output_json,
        "--pvalue", "0.05",
        "--kill-zero-lengths", "Yes",
        "--intermediate-fits", intermediate_json
    ]

    try:
        subprocess.run(cmd, check=True)
        print(f"‚úÖ Completed FEL for {gene_name}")

        # === Copy results to Google Drive ===
        shutil.copy2(intermediate_json, drive_base_dir)
        shutil.copy2(output_json, drive_base_dir)
        print(f"üìÅ Moved {gene_name} results to Drive successfully.\n")

    except subprocess.CalledProcessError:
        print(f"‚ùå Error running FEL for {species}/{gene_name}")
    except Exception as e:
        print(f"‚ö†Ô∏è Copy error for {species}/{gene_name}: {e}")


In [None]:
#To move the alignment input from cpu to drive
#!cp -r "/content/hyphy/Busted_Alignment_Input/Avian" "/content/drive/MyDrive/Paper_Results/Busted_Alignment_Input/"

#To move intermediate json from cpu to drive
!cp "/content/hyphy/FEL_results/Mammal/HA_intermediate.json" "/content/drive/MyDrive/Paper_Results/FEL_results/Mammal/"

#To move final json output from cpu to drive
!cp "/content/hyphy/FEL_results/Mammal/HA.json" "/content/drive/MyDrive/Paper_Results/FEL_results/Mammal/"

#Running Hyphy ***RELAX***

In [None]:
#Move Hyphy_labeled_tree to cpu
!cp -r "/content/drive/MyDrive/Paper_Results/IQ_Tree_output/Hyphy_labeled_tree" "/content/hyphy/"
#Move IQ_Tree_Input to cpu
!cp -r "/content/drive/MyDrive/Paper_Results/IQ_Tree_Input/" "/content/hyphy/"
#To move the alignment input from drive to cpu
!cp -r "/content/drive/MyDrive/Paper_Results/Busted_Alignment_Input/" "/content/hyphy"
#To move the Relax_results folder from drive to cpu
#!cp -r "/content/drive/MyDrive/Paper_Results/Relax_results/" "/content/hyphy/"

To find relax results: pval and K - Search for "LRT" in the json file

In [None]:
import os
import subprocess
import shutil

# === Paths ===
species = "Mammal"
alignment_dir = f"/content/hyphy/Busted_Alignment_Input/{species}"
tree_base_dir = f"/content/hyphy/Hyphy_labeled_tree/{species}"
output_base_dir = f"/content/hyphy/Relax_results_2/{species}"
drive_base_dir = f"/content/drive/MyDrive/Paper_Results/Relax_results_2/{species}"

# Ensure output directories exist
#os.makedirs(output_base_dir, exist_ok=True)
#os.makedirs(drive_base_dir, exist_ok=True)

# Loop through all alignment files
for file in os.listdir(alignment_dir):
    if not file.endswith(".fasta"):
        continue

    gene_name = file.replace("_combined.fasta", "")
    alignment_path = os.path.join(alignment_dir, file)
    tree_path = os.path.join(tree_base_dir, gene_name, f"{gene_name}_labeled.tree")
    output_base_dir = f"/content/hyphy/Relax_results_2/{species}"
    output_json = os.path.join(output_base_dir, f"{gene_name}.json")
    intermediate_json = os.path.join(output_base_dir, f"{gene_name}_intermediate.json")

    # Skip if tree does not exist
    if not os.path.exists(tree_path):
        print(f"‚ö†Ô∏è Skipping {gene_name} ‚Äî tree not found")
        continue

    print(f"\n=== Running RELAX for {gene_name} ===")

    # Run Relax
    cmd = [
        "hyphy", "relax",
        "--alignment", alignment_path,
        "--tree", tree_path,
        "--test", "2.3.3.4b",
        "--output", output_json,
        "--intermediate-fits", intermediate_json
    ]

    try:
        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
        print(result.stdout)
        print(f"‚úÖ Relax completed for {gene_name}")

        # Copy JSON to Google Drive
        #shutil.copy2(output_json, drive_base_dir)
        print(f"üìÅ Copied {gene_name}.json to Drive successfully.\n")

    except subprocess.CalledProcessError as e:
        print(f"‚ùå Error running Relax for {gene_name}")
        print("---- STDERR ----")
        print(e.stderr)
        print("---- STDOUT ----")
        print(e.stdout)
    except Exception as e:
        print(f"‚ö†Ô∏è Copy error for {gene_name}: {e}")


In [None]:
#To move the Relax_results folder from cpu to drive
!cp -r "/content/hyphy/Relax_results/" "/content/drive/MyDrive/Paper_Results/"