In [34]:
# Helper function to compute the score of a base pair (Ri, Rj)
def computePairScores(Rj, Ri, i, j, probL, probR, probP):
  # set of possible rna base pairs
  possibleBasePairs = set(['AU', 'UA', 'GC', 'CG', 'GU', 'UG'])

  if Ri + Rj in possibleBasePairs:
    return probL[i] + probR[j]
  else:
    return probP[i] + probP[j]


def MaximumProbabilitySum(probL, probR, probP, seq):
  n = len(seq)
  N = [[0 for i in range(n)] for j in range(n)]
  for i in range(n):
    for j in range(n):
      if i == j:
        N[i][j] = probP[i]
      elif j < i:
        N[i][j] = 0
      else:
        N[i][j] = max(
          N[i + 1][j] + probP[i],
          N[i][j - 1] + probP[j], 
          N[i + 1][j - 1] + computePairScores(seq[i], seq[j], i, j, probL, probR, probP),
          max([N[i][k] + N[k + 1][j] for k in range(i, j)]))
  return N

# Backtracking function to find the optimal dot bracket representation of the RNA sequence
# from the DP table
def BackTrack(i, j, fold, N, seq):
  if i >= j:
    return
  elif N[i][j] == N[i + 1][j] + probP[i]:
    print("DOT")
    BackTrack(i + 1, j, fold, N, seq)
  elif N[i][j] == N[i][j - 1] + probP[j]:
    
    print("DOT")

    BackTrack(i, j - 1, fold, N, seq)
  elif N[i][j] == (N[i + 1][j - 1] + computePairScores(seq[i], seq[j], i, j, probL, probR, probP)):
    fold[i] = '('
    fold[j] = ')'
    print("LEFT RIGHT")
    BackTrack(i + 1, j - 1, fold, N, seq)
  else:
    for k in range(i + 1, j - 1):
      if N[i][j] == N[i][k] + N[k + 1][j]:
        BackTrack(i, k, fold, N, seq)
        BackTrack(k + 1, j, fold, N, seq)
        break

  return fold

# Main function to run the Enhanced Nussinov algorithm
def EnhancedNussinov(probL, probR, probP, seq):
  n = len(seq)
  N = MaximumProbabilitySum(probL, probR, probP, seq)
  fold = ['.' for i in range(n)]
  print(N)
  fold = BackTrack(0, n - 1, fold, N, seq)
  return ''.join(fold)


In [2]:
from keras.layers import Conv2D, Dense, MaxPooling2D, Flatten
import keras

In [4]:
model = keras.models.load_model("82.h5")

In [10]:
import numpy as np
import math

def encode(seq):
    n = len(seq)
    l = [[0 for i in range(n)] for j in range(n)]
    for i in range(n):
        for j in range(n):
            if((seq[i] == 'A' and seq[j] == 'U') or (seq[j] == 'A' and seq[i] == 'U')):
                l[i][j] = 2
            elif((seq[i] == 'G' and seq[j] == 'C') or (seq[j] == 'G' and seq[i] == 'C')):
                l[i][j] = 3
            elif((seq[i] == 'G' and seq[j] == 'U') or (seq[j] == 'U' and seq[i] == 'G')):
                l[i][j] = 0.8
            else:
                l[i][j] = 0
    return l

def Gaussian(x):
    return math.exp(-0.5*(x*x))

def matrix_calc(data, sliding_param):
    mat = np.zeros([len(data)+sliding_param,len(data)])
    for i in range(len(data[0])):
        for j in range(len(data[0])):
            coefficient = 0
            for add in range(30):
                if i - add >= 0 and j + add <len(data):
                    score = data[i-add][j+add]
                    if score == 0:
                        break
                    else:
                        coefficient = coefficient + score * Gaussian(add)
                else:
                    break
            if coefficient > 0:
                for add in range(1,30):
                    if i + add < len(data) and j - add >= 0:
                        score = data[i+add][j-add]
                        if score == 0:
                            break
                        else:
                            coefficient = coefficient + score * Gaussian(add)
                    else:
                        break
            mat[[i+int(sliding_param/2)],[j]] = coefficient
    return mat

def slidingWindow(numpy_M, sliding_param, n):
    zeros = np.zeros((sliding_param, n), dtype=int)
    numpy_M = np.append(numpy_M, zeros, axis=0)
    sliding_mats = [numpy_M[i: i + sliding_param, :].tolist() for i in range(n)]
    return sliding_mats

def scale(im, nR, nC):
    nR0 = len(im) 
    nC0 = len(im[0]) 
    arr =  [[ im[int(nR0 * r / nR)][int(nC0 * c / nC)]  
             for c in range(nC)] for r in range(nR)]
    return np.array(arr)

In [22]:
seq = "GGGUCUGUAGCUCAGGUGGUUAGAGCGCACCCCUGAUAAGGGUGAGGUCGGUGGUUCGAGUCCUCCCAGACCCACCA"
windows = slidingWindow(matrix_calc(encode(seq), 19), 19, len(seq))
scaled = np.zeros(shape=(len(seq), 19, 120))
for i in range(len(windows)):
    scaled[i] = scale(windows[i], 19, 120)

In [23]:
print(scaled)

[[[0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  ...
  [0.         0.         0.         ... 0.         0.         2.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 3.         3.         0.        ]]

 [[0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  ...
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 3.         3.         0.        ]
  [3.         3.         3.         ... 0.         0.         0.        ]]

 [[0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.      

In [28]:
x = model.predict(scaled)



In [35]:
print(x)
probL = []
probR = []
probP = []
for i in range(len(x)):
    probL.append(x[i][0])
    probR.append(x[i][1])
    probP.append(x[i][2])
print(EnhancedNussinov(probL, probR, probP, seq))

[[9.99614716e-01 2.95145401e-08 3.85289663e-04]
 [9.99042928e-01 1.42397172e-09 9.57045879e-04]
 [9.99930143e-01 6.47505771e-10 6.98664371e-05]
 [9.99621153e-01 6.41575637e-09 3.78838304e-04]
 [9.94665742e-01 1.10595727e-06 5.33318846e-03]
 [9.97775376e-01 4.14702939e-08 2.22465466e-03]
 [9.96256590e-01 1.67818962e-06 3.74179543e-03]
 [9.03329719e-03 1.10503461e-03 9.89861608e-01]
 [5.72563615e-03 1.78468984e-03 9.92489636e-01]
 [9.94771302e-01 5.59635664e-05 5.17272949e-03]
 [9.97660518e-01 1.59990377e-05 2.32342794e-03]
 [9.98861670e-01 3.62238529e-06 1.13474159e-03]
 [9.91262555e-01 2.72756210e-04 8.46471637e-03]
 [3.05868477e-01 1.75166577e-01 5.18964887e-01]
 [3.26569915e-01 7.59790186e-03 6.65832162e-01]
 [4.86410409e-02 1.70781894e-03 9.49651062e-01]
 [1.35865598e-03 5.36935928e-04 9.98104334e-01]
 [3.13317659e-03 1.12213707e-02 9.85645473e-01]
 [2.48486991e-04 4.68071557e-05 9.99704659e-01]
 [7.89714250e-05 5.23602532e-04 9.99397397e-01]
 [2.03215259e-05 4.05527477e-04 9.995741