In [1]:
import subprocess
import psutil
import sys
import resource

In [2]:
rusage_denom = 1024.
if sys.platform == "darwin":
    rusage_denom = rusage_denom * rusage_denom

In [3]:
def runglsearch(executable, seq1_file, seq2_file):
    '''
    use POpen to run process in background, get its pid for checking for memory
    poll frequently for memory usage --> can take its average for final result
    get final output of process for "Total scan time:"
    '''
    command = executable + ' -z -1 -f 0 -g -1 -r +1/-1 ' + seq1_file + ' ' + seq2_file
    args = command.split()
    proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    proc_pid = proc.pid
    mem = []
    while proc.poll() == None:
        mem.append((resource.getrusage(resource.RUSAGE_CHILDREN).ru_maxrss / rusage_denom))
    return (mem, proc.communicate())

In [5]:
with open("input_sequences.txt", "r") as fp:
    seq1 = ""
    seq2 = ""
    glsearch_exec = "/Users/pooja/Desktop/fasta-36.3.8g/bin/glsearch36"
    glsearch_file_seq1 = "/Users/pooja/Desktop/fasta-36.3.8g/test/input_seq1"
    glsearch_file_seq2 = "/Users/pooja/Desktop/fasta-36.3.8g/test/input_seq2"
    while True:
        # expected file format: line1: seq1, line2: seq2, line3: blank, and this continues for as many pairs of
        # sequences as need to be aligned
        line = fp.readline()
        if not line:
            break
        seq1 = line.strip()
        seq2 = fp.readline().strip()
        fp.readline()
        # create input file 1 for glsearch
        with open(glsearch_file_seq1, "w") as in1:
            in1.write(">input_seq1\n")
            in1.write(seq1)
        # create input file 2 for glsearch
        with open(glsearch_file_seq2, "w") as in2:
            in2.write(">input_seq2\n")
            in2.write(seq2)
        mem, output = runglsearch(glsearch_exec, glsearch_file_seq1, glsearch_file_seq2)
        time_str = output[0][output[0].find(b'Total Scan time:'):]
        avg_mem = sum(mem) / float(len(mem))
        print('average mem usage (MB): ' + str(avg_mem))
        print('time taken: ' + str(float(time_str.split()[3])))

average mem usage (MB): 9.3203125
time taken: 0.01


References:  
- https://psutil.readthedocs.io/en/latest/#psutil.Process.memory_percent
- http://www.people.virginia.edu/~wrp/fasta/CURRENT/
- http://fa.bianp.net/blog/2013/different-ways-to-get-memory-consumption-or-lessons-learned-from-memory_profiler/
- https://docs.python.org/2/library/subprocess.html#subprocess.Popen.returncode
- https://docs.python.org/2/library/resource.html#resource.getrusage
- https://eli.thegreenplace.net/2017/interacting-with-a-long-running-child-process-in-python/