In [1]:
import pandas as pd
import os
import subprocess
from Comparative_Analysis import Utilities as util
from Bio import AlignIO
from tqdm.auto import tqdm

In [2]:
project_dir = 'D:/Project_Data/Project_7'
dataset_loc = project_dir + '/NCBI_Dataset_Corynebacteriales'
r_scape_output_loc = project_dir + '/R_Scape_Results'
merged_file_loc = dataset_loc + '/merged_file.txt'
intergenic_alignment_loc = project_dir + '/UTR_Alignments/Full_Ortholog'
wsl_merged_file_loc = util.wslname(merged_file_loc)
with open(merged_file_loc, 'w') as outfile:
    for dir in util.list_dirs(dataset_loc):
        directory = dataset_loc + '/' + dir
        for file in util.list_files(directory):
            if file.endswith("genomic.fna"):
                with open(directory + '/' + file, encoding="utf-8", errors='ignore') as infile:
                    outfile.write(infile.read())

In [3]:
alignment_ids = ['.'.join(x.split('.')[:-1]) for x in util.list_files(intergenic_alignment_loc)]

In [None]:
for alignment_id in tqdm(alignment_ids):
    intergenic_file = alignment_id + '.fasta'
    analysis_directory = r_scape_output_loc + '/' + alignment_id[6:]
    wsl_analysis_directory = util.wslname(analysis_directory)
    if not os.path.exists(analysis_directory):
        os.makedirs(analysis_directory)
    intergenic_region_alignment = intergenic_alignment_loc + '/' + intergenic_file
    alignment = AlignIO.read(intergenic_region_alignment, "fasta")
    AlignIO.write(alignment, analysis_directory + '/initial_align.sto', "stockholm");

    # Initial run with HMM

    subprocess.run('wsl cd ' + wsl_analysis_directory + ' ; cmbuild --noss -F initial_cm.cm initial_align.sto'  , shell=True)
    subprocess.run('wsl cd ' + wsl_analysis_directory + ' ; cmsearch -A initial_search.sto initial_cm.cm ' + wsl_merged_file_loc  , shell=True) 

    # Second run with CM

    subprocess.run('wsl cd ' + wsl_analysis_directory + ' ; cmbuild -F interim_cm.cm initial_search.sto'  , shell=True)
    subprocess.run('wsl cd ' + wsl_analysis_directory + ' ; cmcalibrate interim_cm.cm', shell= True)
    subprocess.run('wsl cd ' + wsl_analysis_directory + ' ; cmsearch -A interim_search.sto interim_cm.cm ' + wsl_merged_file_loc  , shell=True)  
    subprocess.run('wsl cd ' + wsl_analysis_directory + ' ; ~/rscape_v2.0.0.g/bin/R-scape --cacofold --outname rscape_interim interim_search.sto'  , shell=True)

    # Final run with CaCofold CM

    subprocess.run('wsl cd ' + wsl_analysis_directory + ' ; cmbuild -F final_cm.cm rscape_interim.cacofold.sto'  , shell=True)
    subprocess.run('wsl cd ' + wsl_analysis_directory + ' ; cmcalibrate final_cm.cm', shell= True)
    subprocess.run('wsl cd ' + wsl_analysis_directory + ' ; cmsearch -A final_search.sto final_cm.cm ' + wsl_merged_file_loc  , shell=True)  
    subprocess.run('wsl cd ' + wsl_analysis_directory + ' ; ~/rscape_v2.0.0.g/bin/R-scape --cacofold --outname rscape_final final_search.sto'  , shell=True)