# 1.2


In [5]:
import random

def generate_constrained_sequence(length=20):
    """
    Generates a random RNA sequence of 'length' satisfying:
    1. Exactly length/4 of each nucleotide (A, C, G, U).
    2. No more than 3 identical nucleotides consecutively.
    """
    bases = ['A', 'C', 'G', 'U']
    count_per_base = length // 4
    pool = bases * count_per_base  # Create pool with 5 of each

    while True:
        random.shuffle(pool)
        sequence = "".join(pool)

        # Check consecutive constraint
        is_valid = True
        for i in range(len(sequence) - 3):
            if sequence[i] == sequence[i+1] == sequence[i+2] == sequence[i+3]:
                is_valid = False
                break

        if is_valid:
            return sequence

def get_pair_score(b1, b2):
    """Scoring: G-C and A-U contribute 1, others 0."""
    pair = {b1, b2}
    if pair == {'A', 'U'} or pair == {'G', 'C'}:
        return 1
    return 0

def nussinov_algorithm(seq):
    n = len(seq)
    # Initialize DP table with zeros
    dp = [[0 for _ in range(n)] for _ in range(n)]

    # Fill DP table
    # k is the difference between j and i (the span length)
    for k in range(1, n):
        for i in range(n - k):
            j = i + k

            # 1. Unpaired at i
            score_down = dp[i+1][j]
            # 2. Unpaired at j
            score_left = dp[i][j-1]
            # 3. Paired (i, j)
            score_diag = dp[i+1][j-1] + get_pair_score(seq[i], seq[j])

            # 4. Bifurcation (splitting the sequence)
            score_bifurcation = 0
            for split in range(i+1, j):
                current_split = dp[i][split] + dp[split+1][j]
                if current_split > score_bifurcation:
                    score_bifurcation = current_split

            dp[i][j] = max(score_down, score_left, score_diag, score_bifurcation)

    return dp

def backtrack(dp, seq, i, j, pairs, path_log):
    """
    Recursive traceback to find the base pairs.
    """
    if i >= j:
        return

    # Check which move created the score in dp[i][j]
    # Priority order affects the final shape if there are multiple optimal solutions

    # Case 1: i is unpaired (move down)
    if dp[i][j] == dp[i+1][j]:
        path_log.append(f"Cell ({i},{j}) -> ({i+1},{j}) : i ({seq[i]}) is unpaired")
        backtrack(dp, seq, i+1, j, pairs, path_log)

    # Case 2: j is unpaired (move left)
    elif dp[i][j] == dp[i][j-1]:
        path_log.append(f"Cell ({i},{j}) -> ({i},{j-1}) : j ({seq[j]}) is unpaired")
        backtrack(dp, seq, i, j-1, pairs, path_log)

    # Case 3: i and j are paired (move diagonal)
    elif dp[i][j] == dp[i+1][j-1] + get_pair_score(seq[i], seq[j]):
        pairs.append((i, j))
        path_log.append(f"Cell ({i},{j}) -> ({i+1},{j-1}) : Match {seq[i]}-{seq[j]}")
        backtrack(dp, seq, i+1, j-1, pairs, path_log)

    # Case 4: Bifurcation
    else:
        for split in range(i+1, j):
            if dp[i][j] == dp[i][split] + dp[split+1][j]:
                path_log.append(f"Cell ({i},{j}) -> Bifurcation split at {split}")
                backtrack(dp, seq, i, split, pairs, path_log)
                backtrack(dp, seq, split+1, j, pairs, path_log)
                break

def print_report():
    # 1. Generate Sequence
    seq = generate_constrained_sequence(20)
    print(f"### 1. Generated RNA Sequence (S)\n{seq}\n")
    print(f"Length: {len(seq)}")
    print(f"Counts: A={seq.count('A')}, C={seq.count('C')}, G={seq.count('G')}, U={seq.count('U')}")
    print("-" * 50)

    # 2. Run Nussinov
    dp_table = nussinov_algorithm(seq)

    # 3. Output DP Table
    print("\n### 2(i). Full DP Table")
    print("     " + "  ".join(f"{s:>2}" for s in seq))  # Column headers
    for i in range(len(dp_table)):
        row_str = f"{seq[i]}  " + "  ".join(f"{val:>2}" for val in dp_table[i])
        print(row_str)

    # 4. Perform Traceback
    pairs = []
    path_log = []
    backtrack(dp_table, seq, 0, len(seq)-1, pairs, path_log)
    pairs.sort() # Sort by first index

    print("\n### 2(ii). Traceback Path")
    for step in path_log:
        print(step)

    # 5. Dot-Bracket & Visuals
    # Create dot-bracket string
    structure = ['.'] * len(seq)
    for (i, j) in pairs:
        structure[i] = '('
        structure[j] = ')'
    dot_bracket = "".join(structure)

    print("\n### 2(iii). Predicted Structure")
    print("Dot-Bracket Representation:")
    print(seq)
    print(dot_bracket)

    print("\nVisual Marking of Base Pairs:")
    # Create a visual connector line
    # We will print indices, sequence, and connections
    print(f"{'Idx':<4} {'Base':<5} {'Pair'}")
    print("-" * 20)
    pair_map = dict(pairs)
    pair_map.update({v: k for k, v in pairs}) # Add reverse mapping

    for i, base in enumerate(seq):
        if i in pair_map:
            partner = pair_map[i]
            # Visual arrow only for the first occurrence to avoid duplicates in reading
            status = f"Pairs with index {partner} ({seq[partner]})"
        else:
            status = "-"
        print(f"{i:<4} {base:<5} {status}")

if __name__ == "__main__":
    print_report()

### 1. Generated RNA Sequence (S)
ACAUGGAUUGAACUUGCGCC

Length: 20
Counts: A=5, C=5, G=5, U=5
--------------------------------------------------

### 2(i). Full DP Table
      A   C   A   U   G   G   A   U   U   G   A   A   C   U   U   G   C   G   C   C
A   0   0   0   1   2   2   2   3   4   4   4   4   5   6   6   6   7   7   8   9
C   0   0   0   1   2   2   2   3   3   3   4   4   5   5   5   5   6   6   7   8
A   0   0   0   1   1   1   1   2   2   2   3   3   4   4   4   4   5   5   6   7
U   0   0   0   0   0   0   1   1   1   1   2   3   3   3   3   4   4   5   5   6
G   0   0   0   0   0   0   0   1   1   1   2   2   3   3   3   3   4   4   5   6
G   0   0   0   0   0   0   0   1   1   1   2   2   3   3   3   3   4   4   5   6
A   0   0   0   0   0   0   0   1   1   1   2   2   2   3   3   3   4   4   5   6
U   0   0   0   0   0   0   0   0   0   0   1   2   2   2   2   3   3   4   4   5
U   0   0   0   0   0   0   0   0   0   0   1   1   1   2   2   2   3   3   4   5
G   0   