In [None]:
import subprocess
import os
import shutil
import time
import re
os.environ['PATH'] = '/data/home/mrichte3/gromacs-2024.2/install/bin:' + os.environ['PATH']
if 'LD_LIBRARY_PATH' in os.environ:
    os.environ['LD_LIBRARY_PATH'] = '/data/home/mrichte3/gromacs-2024.2/install/lib:' + os.environ['LD_LIBRARY_PATH']
else:
    os.environ['LD_LIBRARY_PATH'] = '/data/home/mrichte3/gromacs-2024.2/install/lib'
os.environ['GMX_MAXBACKUP'] = '-1'
os.environ['GMX_MAXCONSTRWARN'] = '-1'

def run_command(command, input_text=None, max_chars=100):
    result = subprocess.run(command, capture_output=True, text=True, input=input_text)
    output = result.stdout + result.stderr
    # print(output)
    for line in output.splitlines():
        if "warning" in line.lower() or "fatal" in line.lower() or "random" in line.lower():
            print(line[:max_chars])

def run_mini(command, input_text=None):
    result = subprocess.run(command, capture_output=True, text=True, input=input_text)
    output = result.stdout + result.stderr
    # print(output)
    for line in output.splitlines():
        if ("steepest descents converged to" in line.lower() or
            "fatal" in line.lower() or
            "error" in line.lower() or
            "steepest descents did not converge" in line.lower()):
            print(line)
            match = re.search(r'(\d+) steps', line)
            if match:
                steps = int(match.group(1))
                return steps == 5001
    return False
    
def run_stucture_setup(input_pdb):
    rm_command = "rm *.gro"
    subprocess.run(rm_command, shell=True)
    rm_command = "rm *.tpr"
    subprocess.run(rm_command, shell=True)
    command = ["gmx", "pdb2gmx", "-f", f"{input_pdb}", "-o", "structure_processed.gro", 
               "-p", "topol.top", "-i", "posre.itp"]
    input_text = "6\n1\n"        ############ 6 1 for custom
    run_command(command, input_text)
    command = ["gmx", "editconf", "-f", "structure_processed.gro", "-o", "structure_box.gro", "-c", "-d", "1.0", "-bt", "cubic"]
    run_command(command)
    command = ["gmx", "solvate", "-cp", "structure_box.gro", "-cs", "spc216.gro", "-o", "structure_solv.gro", "-p", "topol.top"]
    run_command(command)
    command = ["gmx", "grompp", "-f", "ions.mdp", "-c", "structure_solv.gro", "-p", "topol.top", "-o", "ions.tpr", "-maxwarn", "3"]
    run_command(command)
    command = ["gmx", "genion", "-s", "ions.tpr", "-o", "structure_solv_ions.gro", "-p", "topol.top", 
               "-pname", "NA", "-nname", "CL", "-neutral", "-conc", "0.15", "-seed", "12345"]
    input_text = "14\n"
    run_command(command, input_text)
    command = ["gmx", "make_ndx", "-f", "structure_solv_ions.gro", "-o", "index.ndx"]
    input_text = "name 19 SOLV\n1 | 12\nname 20 SOLU\nq\n"
    run_command(command, input_text)



pdb_directory = '/data/home/mrichte3/RNASeq/unmod/'
pdb_files = sorted([f for f in os.listdir(pdb_directory) if f.endswith('.pdb')])
completed_files = {os.path.splitext(f)[0] for f in os.listdir('/data/home/mrichte3/RNASeq/unmod/step5/') if f.endswith('.gro')}
pdb_files = [f for f in pdb_files if os.path.splitext(f)[0] not in completed_files]
print(f"Number of pdb_files to process before truncate: {len(pdb_files)}")
truncate_file = 'ENSG00000105497.pdb'
truncate_index = pdb_files.index(truncate_file) if truncate_file in pdb_files else len(pdb_files)
pdb_files = pdb_files[:truncate_index]
print(f"Number of pdb_files to process after truncate: {len(pdb_files)}")

process_number = 1 
total_files = len(pdb_files)
half_size = total_files // 2
if process_number == 1:
    pdb_files = pdb_files[:half_size]
elif process_number == 2:
    pdb_files = pdb_files[half_size:]

print(f"Number of pdb_files to process: {len(pdb_files)}")

for input_pdb in pdb_files:
    print(f"Current input_pdb: {input_pdb}")
    start_time = time.time()
    run_stucture_setup(os.path.join(pdb_directory, input_pdb))
    if not os.path.exists('structure_solv_ions.gro'):
        continue

    command = ["gmx", "grompp", "-v", "-f", f"step4.0_minimization.mdp", "-o", f"step4.0_minimization.tpr", 
               "-c", f"structure_solv_ions.gro", "-r", f"structure_solv_ions.gro", 
               "-p", "topol.top", "-n", "index.ndx", "-maxwarn", "5"]
    run_mini(command)
    command = ["gmx", "mdrun", "-v", "-deffnm", "step4.0_minimization", "-ntmpi", "1"]
    tries = 0
    while tries < 3 and not run_mini(command):
        tries += 1
    command_grompp = ["gmx", "grompp", "-f", f"step5_production.mdp", "-o", f"step5.tpr",
                      "-c", f"step4.0_minimization.gro", "-p", "topol.top", "-n", "index.ndx", "-maxwarn", "5"]
    run_command(command_grompp)
    command_mdrun = ["gmx", "mdrun", "-v", "-deffnm", "step5", "-ntmpi", "1"]
    run_command(command_mdrun)

    elapsed_time = time.time() - start_time
    print(f"Process {input_pdb} completed in {elapsed_time:.2f} seconds.")
    basename = os.path.splitext(os.path.basename(input_pdb))[0]
    mv_command = ["mv", "step5.gro", f"/data/home/mrichte3/RNASeq/unmod/step5/{basename}.gro"]
    run_command(mv_command)
    rm_command = "rm step*.pdb"
    subprocess.run(rm_command, shell=True)









In [None]:
import subprocess
import os
import shutil
import time
import re
os.environ['PATH'] = '/data/home/mrichte3/gromacs-2024.2/install/bin:' + os.environ['PATH']
if 'LD_LIBRARY_PATH' in os.environ:
    os.environ['LD_LIBRARY_PATH'] = '/data/home/mrichte3/gromacs-2024.2/install/lib:' + os.environ['LD_LIBRARY_PATH']
else:
    os.environ['LD_LIBRARY_PATH'] = '/data/home/mrichte3/gromacs-2024.2/install/lib'
os.environ['GMX_MAXBACKUP'] = '-1'
os.environ['GMX_MAXCONSTRWARN'] = '-1'

def run_command(command, input_text=None, max_chars=100):
    result = subprocess.run(command, capture_output=True, text=True, input=input_text)
    output = result.stdout + result.stderr
    # print(output)
    for line in output.splitlines():
        if "warning" in line.lower() or "fatal" in line.lower() or "random" in line.lower():
            print(line[:max_chars])

def run_mini(command, input_text=None):
    result = subprocess.run(command, capture_output=True, text=True, input=input_text)
    output = result.stdout + result.stderr
    # print(output)
    for line in output.splitlines():
        if ("steepest descents converged to" in line.lower() or
            "fatal" in line.lower() or
            "error" in line.lower() or
            "steepest descents did not converge" in line.lower()):
            print(line)
            match = re.search(r'(\d+) steps', line)
            if match:
                steps = int(match.group(1))
                return steps == 5001
    return False
    
def run_stucture_setup(input_pdb):
    rm_command = "rm *.gro"
    subprocess.run(rm_command, shell=True)
    rm_command = "rm *.tpr"
    subprocess.run(rm_command, shell=True)
    command = ["gmx", "pdb2gmx", "-f", f"{input_pdb}", "-o", "structure_processed.gro", 
               "-p", "topol.top", "-i", "posre.itp"]
    input_text = "6\n1\n"        ############ 6 1 for custom
    run_command(command, input_text)
    command = ["gmx", "editconf", "-f", "structure_processed.gro", "-o", "structure_box.gro", "-c", "-d", "1.0", "-bt", "cubic"]
    run_command(command)
    command = ["gmx", "solvate", "-cp", "structure_box.gro", "-cs", "spc216.gro", "-o", "structure_solv.gro", "-p", "topol.top"]
    run_command(command)
    command = ["gmx", "grompp", "-f", "ions.mdp", "-c", "structure_solv.gro", "-p", "topol.top", "-o", "ions.tpr", "-maxwarn", "3"]
    run_command(command)
    command = ["gmx", "genion", "-s", "ions.tpr", "-o", "structure_solv_ions.gro", "-p", "topol.top", 
               "-pname", "NA", "-nname", "CL", "-neutral", "-conc", "0.15", "-seed", "12345"]
    input_text = "14\n"
    run_command(command, input_text)
    command = ["gmx", "make_ndx", "-f", "structure_solv_ions.gro", "-o", "index.ndx"]
    input_text = "name 19 SOLV\n1 | 12\nname 20 SOLU\nq\n"
    run_command(command, input_text)



pdb_directory = '/data/home/mrichte3/RNASeq/unmod/'
pdb_files = sorted([f for f in os.listdir(pdb_directory) if f.endswith('.pdb')])
completed_files = {f for f in os.listdir('/data/home/mrichte3/RNASeq/unmod/step5/') if f.endswith('.gro')}
for input_pdb in pdb_files:
    basename = os.path.splitext(os.path.basename(input_pdb))[0]
    if basename + '.gro' in completed_files:
        continue
    start_time = time.time()
    run_stucture_setup(os.path.join(pdb_directory, input_pdb))
    if not os.path.exists('structure_solv_ions.gro'):
        continue

    command = ["gmx", "grompp", "-v", "-f", f"step4.0_minimization.mdp", "-o", f"step4.0_minimization.tpr", 
               "-c", f"structure_solv_ions.gro", "-r", f"structure_solv_ions.gro", 
               "-p", "topol.top", "-n", "index.ndx", "-maxwarn", "5"]
    run_mini(command)
    command = ["gmx", "mdrun", "-v", "-deffnm", "step4.0_minimization", "-ntmpi", "1"]
    tries = 0
    while tries < 3 and not run_mini(command):
        tries += 1
    command_grompp = ["gmx", "grompp", "-f", f"step5_production.mdp", "-o", f"step5.tpr",
                      "-c", f"step4.0_minimization.gro", "-p", "topol.top", "-n", "index.ndx", "-maxwarn", "5"]
    run_command(command_grompp)
    command_mdrun = ["gmx", "mdrun", "-v", "-deffnm", "step5", "-ntmpi", "1"]
    run_command(command_mdrun)

    elapsed_time = time.time() - start_time
    print(f"Process {input_pdb} completed in {elapsed_time:.2f} seconds.")
    mv_command = ["mv", "step5.gro", f"/data/home/mrichte3/RNASeq/unmod/step5/{basename}.gro"]
    run_command(mv_command)
    rm_command = "rm step*.pdb"
    subprocess.run(rm_command, shell=True)









In [None]:
##clean up
directories = [
    '/data/home/mrichte3/RNASeq/unmod/step5',
    '/data/home/mrichte3/RNASeq/gna/step5',
    '/data/home/mrichte3/RNASeq/amide/step5'
]

for dir_path in directories:
    for input_pdb in failed_files:
        base_name = os.path.splitext(input_pdb)[0]
        gro_file_to_delete = os.path.join(dir_path, base_name + '.gro')
        if os.path.exists(gro_file_to_delete):
            os.remove(gro_file_to_delete)
            print(f"Deleted: {gro_file_to_delete}")
        else:
            print(f"File not found: {gro_file_to_delete}")

In [None]:
##code to check for failed files
import subprocess
import os
import shutil
import time
import re
os.environ['PATH'] = '/data/home/mrichte3/gromacs-2024.2/install/bin:' + os.environ['PATH']
if 'LD_LIBRARY_PATH' in os.environ:
    os.environ['LD_LIBRARY_PATH'] = '/data/home/mrichte3/gromacs-2024.2/install/lib:' + os.environ['LD_LIBRARY_PATH']
else:
    os.environ['LD_LIBRARY_PATH'] = '/data/home/mrichte3/gromacs-2024.2/install/lib'
os.environ['GMX_MAXBACKUP'] = '-1'
os.environ['GMX_MAXCONSTRWARN'] = '-1'

def run_command(command, input_text=None, max_chars=100):
    result = subprocess.run(command, capture_output=True, text=True, input=input_text)
    output = result.stdout + result.stderr
    # print(output)
    for line in output.splitlines():
        if "warning" in line.lower() or "fatal" in line.lower() or "random" in line.lower():
            print(line[:max_chars])

def run_mini(command, input_text=None):
    result = subprocess.run(command, capture_output=True, text=True, input=input_text)
    output = result.stdout + result.stderr
    # print(output)
    for line in output.splitlines():
        if ("steepest descents converged to" in line.lower() or
            "fatal" in line.lower() or
            "error" in line.lower() or
            "steepest descents did not converge" in line.lower()):
            print(line)
            match = re.search(r'(\d+) steps', line)
            if match:
                steps = int(match.group(1))
                return steps == 5001
    return False
    
def run_stucture_setup(input_pdb):
    rm_command = "rm *.gro"
    subprocess.run(rm_command, shell=True)
    rm_command = "rm *.tpr"
    subprocess.run(rm_command, shell=True)
    command = ["gmx", "pdb2gmx", "-f", f"{input_pdb}", "-o", "structure_processed.gro", 
               "-p", "topol.top", "-i", "posre.itp"]
    input_text = "6\n1\n"        ############ 6 1 for custom
    run_command(command, input_text)
    command = ["gmx", "editconf", "-f", "structure_processed.gro", "-o", "structure_box.gro", "-c", "-d", "1.0", "-bt", "cubic"]
    run_command(command)
    command = ["gmx", "solvate", "-cp", "structure_box.gro", "-cs", "spc216.gro", "-o", "structure_solv.gro", "-p", "topol.top"]
    run_command(command)
    command = ["gmx", "grompp", "-f", "ions.mdp", "-c", "structure_solv.gro", "-p", "topol.top", "-o", "ions.tpr", "-maxwarn", "3"]
    run_command(command)
    command = ["gmx", "genion", "-s", "ions.tpr", "-o", "structure_solv_ions.gro", "-p", "topol.top", 
               "-pname", "NA", "-nname", "CL", "-neutral", "-conc", "0.15", "-seed", "12345"]
    input_text = "14\n"
    run_command(command, input_text)
    command = ["gmx", "make_ndx", "-f", "structure_solv_ions.gro", "-o", "index.ndx"]
    input_text = "name 19 SOLV\n1 | 12\nname 20 SOLU\nq\n"
    run_command(command, input_text)



pdb_directory = '/data/home/mrichte3/RNASeq/unmod/'
pdb_files = sorted([f for f in os.listdir(pdb_directory) if f.endswith('.pdb')])
completed_files = {f for f in os.listdir('/data/home/mrichte3/RNASeq/unmod/step5/') if f.endswith('.gro')}
failed_files = [] #######################################
for input_pdb in pdb_files:
##############################
    if input_pdb == 'ENSG00000010017.pdb':
        print("Failed input files:", failed_files)
        break
#############################
    basename = os.path.splitext(os.path.basename(input_pdb))[0]
    # if basename + '.gro' in completed_files:
    #     continue
    start_time = time.time()
    run_stucture_setup(os.path.join(pdb_directory, input_pdb))
    if not os.path.exists('structure_solv_ions.gro'):
        failed_files.append(input_pdb)
    continue

    command = ["gmx", "grompp", "-v", "-f", f"step4.0_minimization.mdp", "-o", f"step4.0_minimization.tpr", 
               "-c", f"structure_solv_ions.gro", "-r", f"structure_solv_ions.gro", 
               "-p", "topol.top", "-n", "index.ndx", "-maxwarn", "5"]
    run_mini(command)
    command = ["gmx", "mdrun", "-v", "-deffnm", "step4.0_minimization", "-ntmpi", "1"]
    tries = 0
    while tries < 3 and not run_mini(command):
        tries += 1
    command_grompp = ["gmx", "grompp", "-f", f"step5_production.mdp", "-o", f"step5.tpr",
                      "-c", f"step4.0_minimization.gro", "-p", "topol.top", "-n", "index.ndx", "-maxwarn", "5"]
    run_command(command_grompp)
    command_mdrun = ["gmx", "mdrun", "-v", "-deffnm", "step5", "-ntmpi", "1"]
    run_command(command_mdrun)

    elapsed_time = time.time() - start_time
    print(f"Process {input_pdb} completed in {elapsed_time:.2f} seconds.")
    break
    mv_command = ["mv", "step5.gro", f"/data/home/mrichte3/RNASeq/unmod/step5/{basename}.gro"]
    run_command(mv_command)
    rm_command = "rm step*.pdb"
    subprocess.run(rm_command, shell=True)







