This notebook runs the nupack simulation on provided sequences

### Imports

In [2]:
import os
import subprocess as sub
import numpy as np
import random as rand

### Functions

In [3]:
BASES = ['A','T','C','G']

In [4]:
def generate_dna(length,number,seed):
    rand.seed(seed)
    output = [''.join([BASES[rand.randint(0,3)] for j in range(length)]) for i in range(number)]
    return output

In [None]:
def nupack_spark(filename,mfe,pairs,prob,output):
    command = ['spark-submit',
              '--class','NupackSim',
              '--master','local[*]',
              'nps.jar']
    if mfe:
        command.append('-mfe')
    if pairs:
        command.append('-pp')
    if prob:
        command.append('-pr')
    command.append(output)
    command.append('-i')
    command.append(filename)
    command.append('-o')
    command.append(output)
    p = sub.Popen(command,stdin=sub.PIPE,stdout=sub.PIPE,stderr=sub.PIPE)
    output,error = p.communicate()
    print(error)
    # due to nature of spark, checking error is meaningless. 
    # note, may take a while to run depending on input and settings

### Main

Generate a file containing random sequences of DNA

In [5]:
l = 32 # length of each sequence
n = 100000 # number of sequences
s = 42 # RNG seed

filename = 'dna_small.txt'

In [6]:
random_dna = generate_dna(l,n,s)
with open(filename,'w') as f:
    for seq in random_dna:
        f.write(seq)
        f.write("\n")

Now you can run a simulation on it

In [None]:
outputFile = filename+"_sim_output.txt"

In [None]:
_,error = nupack_spark(filename,True,False,False,'dna_sim_output.txt')

Validate Output

In [None]:
output_file_path = "dna_sim_output.txt"

with open(test_file_path) as f:
    output = f.readlines()
output = [l.strip() for l in output]

Load validation file

In [None]:
val_file_path = "../DataSets/dna_small_sim_output.txt"

with open(val_file_path) as f:
    val_output = f.readlines()
val_output = [l.strip() for l in val_output]

In [None]:
Z = zip(output,val_output)

comparison = [o == v for o,v in Z]

Find any element that is unequal (this should be empty)

In [None]:
np.where(comparison = False)