In [299]:
import RNA
import random
import networkx as nx
import matplotlib.pyplot as plt
from pyvis.network import Network
from IPython.display import clear_output

In [33]:
def generateRNA(length):
    bases = 'ACGU'
    return ''.join(random.choice(bases) for _ in range(length))

In [99]:
def mutateRNASequence(sequence, n=1):
    bases = 'ACGU'
    sequence = list(sequence.upper())
    positions = random.sample(range(len(sequence)), n)
    
    for pos in positions:
        original_base = sequence[pos]
        new_base = random.choice([b for b in bases if b != original_base])
        sequence[pos] = new_base
    
    return ''.join(sequence)

In [100]:
def isOneCharDiff(s1, s2):
    if len(s1) != len(s2):
        return False
    count_diff = 0
    for a, b in zip(s1, s2):
        if a != b:
            count_diff += 1
            if count_diff > 1:
                return False
    return count_diff == 1

In [133]:
def character_difference(s1, s2):
    return sum(1 for a, b in zip(s1, s2) if a != b)

In [171]:
seqList = []
def runSimulation()
for i in range(10):
    s = generateRNA(100)
    seqList +=[s]
    for j in range(10):
        s_ = mutateRNASequence(s, 1)
        seqList += [s_]
        

In [279]:
def runSimulation(pool=None):
    seqList = []
    if pool is not None:
        for s in pool:
            s_ = mutateRNASequence(s, 1)
            seqList += [s_]
        seqList += pool
    else:
        count = 0
        for i in range(10):
            s = generateRNA(100)
            
            for j in range(10):
                s_ = mutateRNASequence(s, 1)
                seqList += [s_]
            seqList +=[s]
            
    return list(set(seqList))

In [296]:
ROUNDS = 7
seqList = None
for robin in range(ROUNDS):
    seqList = runSimulation(seqList)

In [297]:
len(seqList)

6630

In [306]:
seqCards = []
for idx, seq in enumerate(seqList):
    clear_output(wait=True)
    print(f"{idx+1}/{len(seqList)}")
    card = {
        "seq": seq,
        "mfe": RNA.fold(seq)[1]
    }
    seqCards.append(card)

6630/6630


In [314]:
def createGraphFromList(seq_list):
    G = nx.Graph()
    
    # Add nodes for each sequence in the list, ensuring mfe is a float
    for item in seq_list:
        G.add_node(item['seq'], mfe=float(item['mfe']))
    
    # Create edges based on single character difference
    for i in range(len(seq_list)):
        for j in range(i + 1, len(seq_list)):
            diff = character_difference(seq_list[i]['seq'], seq_list[j]['seq'])
            if len(diff) == 1:  # Only add edge if there's exactly one difference
                idx, char = next(iter(diff.items()))  # Get the single item from the dict
                # Store the difference as a string
                difference_str = f"{char},{idx}"
                G.add_edge(seq_list[i]['seq'], seq_list[j]['seq'], difference=difference_str)
    
    return G

In [315]:
G = createGraphFromList(seqCards)

In [317]:
net = Network()

In [325]:
for node, data in G.nodes(data=True):
    net.add_node(node, title=f"{data['mfe']}")

for src, dst, data in G.edges(data=True):
    diff = data['difference']
    label = f"{diff}"
    net.add_edge(src, dst, label=label, title=f"Difference: {label}")

In [267]:
# net.toggle_physics(True)
# net.show_buttons(filter_=['physics'])

In [316]:
nx.write_graphml_lxml(G, "mRNA.graphml")

In [326]:
net.show("sequence_graph.html", notebook=False)

sequence_graph.html
