In [40]:
# read fasta sequence and convert to string

from Bio import SeqIO

def get_seq(filename):
    record = SeqIO.read(filename, "fasta")
    full_seq = str(record.seq)
    return full_seq

# divide string into list of sliding windows of appropriate length

def get_sliding_windows(full_seq, step, window):
    windows = []
    for i in range(0, len(full_seq)+1, step):
        new_windows = full_seq[i:i+window]
        if len(new_windows) == window:
            windows.append(new_windows)
    return windows

# for loop to run bpps on each item in the list

import arnie
from arnie.bpps import bpps
from arnie.pfunc import pfunc 

def get_bpps_list(windows):
    bpps_list = []
    for i in windows:
        new_bpps = bpps(i, package='contrafold', linear=True, threshknot=True)
        bpps_list.append(new_bpps)
    return bpps_list

# for loop to run MEA on each item in the list to convert to dotbracket structure

from arnie.mea.mea import MEA
from arnie.mea.mea_utils import *

def get_dotbracket_list(bpps_list):
    dotbracket_list = []
    for i in bpps_list:
        new_dotbracket = MEA(i, run_probknot_heuristic=True, theta=0.3)
        new_structure = new_dotbracket.structure
        dotbracket_list.append(new_structure)
    return dotbracket_list

# need a function to go over each dotbracket structure and determine whether it is a pseudoknot or not



In [29]:
SARS_seq = get_seq("SARS_CoV2_reference.fasta")
SARS_windows = get_sliding_windows(SARS_seq, step=100, window=300)
SARS_bpps_list = get_bpps_list(SARS_windows)

In [41]:
SARS_seq_3UTR = get_seq("SARS_last_1000.fasta")
SARS_3UTR_windows = get_sliding_windows(SARS_seq_3UTR, step = 100, window = 300)
SARS_3UTR_bpps = get_bpps_list(SARS_3UTR_windows)
SARS_3UTR_dotbracket = get_dotbracket_list(SARS_3UTR_bpps)