In [1]:
from Bio.Seq import Seq
from Bio.SeqUtils import MeltingTemp as mt
from itertools import product
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from nupack import *
model1 = Model(material='dna', celsius=16, magnesium=0, sodium=0.05)

In [2]:
# group complementary pairs together
def rc(sequence):
    complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
    return "".join(complement[base] for base in sequence[::-1])

# Your sequences
unique_8_mers = """
TAACGGACG
  TCAATCCGG
  CGGTCGTTA
  TGAGTGTCG
  TACGTGAGC
  TGTGTACGG
  GGCATGGTA
  GCTCACGTA
  ACTGTAGCG
  CCGAACGTA
  TATTCGGCC
  CGCTACAGT
  GGCCACATA
  TGCATCTCC
  CGCCTAAGT
  TACAGTCGC
  TACCATGCC
  CGTCCGTTA
  CGCGAATTC
  GATGGAGCA
  GCGTGACTA
  TATGGCACC
  TGCTCCATC
  ACTTAGGCG
  GTGCGAGTA
  GGCCGAATA
  GCGGAACTA
  TGACTACGC
  TAGTCACGC
  GGAGATGCA
  TACGTTCGG
  GAATTCGCG
  CCGGATTGA
  GTTGCACAC
  CGTGGATCA
  GGTGCCATA
  GCGTAGTCA
  TAGTTCCGC
  TATGTGGCC
  TAGCGTCAC
  GCGTTATGC
  GCGACTGTA
  TACTCGCAC
  GCATAACGC
  CCGTACACA
  TGATCCACG
  CGACACTCA
  GTGACGCTA
  GTGTGCAAC
  TAACGACCG
""".split()

# List to hold the sequences and their complements in order
ordered_list = []

# Iterate over the unique 8-mers
for seq in unique_8_mers:
    # Get the reverse complement
    rev_comp = rc(seq)
    # If reverse complement is in the list and not in the ordered_list already
    if rev_comp in unique_8_mers and seq not in ordered_list and rev_comp not in ordered_list:
        # Add the sequence and its reverse complement to the list
        ordered_list.append(seq)
        ordered_list.append(rev_comp)

# Print the ordered list
for sequence in ordered_list:
    print(sequence)
len(ordered_list)


TAACGGACG
CGTCCGTTA
TCAATCCGG
CCGGATTGA
CGGTCGTTA
TAACGACCG
TGAGTGTCG
CGACACTCA
TACGTGAGC
GCTCACGTA
TGTGTACGG
CCGTACACA
GGCATGGTA
TACCATGCC
ACTGTAGCG
CGCTACAGT
CCGAACGTA
TACGTTCGG
TATTCGGCC
GGCCGAATA
GGCCACATA
TATGTGGCC
TGCATCTCC
GGAGATGCA
CGCCTAAGT
ACTTAGGCG
TACAGTCGC
GCGACTGTA
CGCGAATTC
GAATTCGCG
GATGGAGCA
TGCTCCATC
GCGTGACTA
TAGTCACGC
TATGGCACC
GGTGCCATA
GTGCGAGTA
TACTCGCAC
GCGGAACTA
TAGTTCCGC
TGACTACGC
GCGTAGTCA
GTTGCACAC
GTGTGCAAC
CGTGGATCA
TGATCCACG
TAGCGTCAC
GTGACGCTA
GCGTTATGC
GCATAACGC


50

In [5]:
rc('CTAATAGGAAAAAAAACTAAGAAC')

'GTTCTTAGTTTTTTTTCCTATTAG'

In [6]:
# checking melt temp

# Your sequences as a multiline string
sequences_str = """
TAACGGACG
CGTCCGTTA
TCAATCCGG
CCGGATTGA
CGGTCGTTA
TAACGACCG
TGAGTGTCG
CGACACTCA
TACGTGAGC
GCTCACGTA
TGTGTACGG
CCGTACACA
GGCATGGTA
TACCATGCC
ACTGTAGCG
CGCTACAGT
CCGAACGTA
TACGTTCGG
TATTCGGCC
GGCCGAATA
GGCCACATA
TATGTGGCC
TGCATCTCC
GGAGATGCA
CGCCTAAGT
ACTTAGGCG
TACAGTCGC
GCGACTGTA
CGCGAATTC
GAATTCGCG
GATGGAGCA
TGCTCCATC
GCGTGACTA
TAGTCACGC
TATGGCACC
GGTGCCATA
GTGCGAGTA
TACTCGCAC
GCGGAACTA
TAGTTCCGC
TGACTACGC
GCGTAGTCA
GTTGCACAC
GTGTGCAAC
CGTGGATCA
TGATCCACG
TAGCGTCAC
GTGACGCTA
GCGTTATGC
GCATAACGC
"""

sequences = sequences_str.strip().split()

# Adjusted function to calculate melting temperatures for sequences and their reverse complements
def melting_temps_with_reverse(sequences):
    seq_temp_list = []
    seen = set()  # Track sequences to avoid duplicates
    for seq in sequences:
        if seq not in seen:  # Only process if not seen
            temp = mt.Tm_NN(seq, Mg=40, Na=5, saltcorr=4)
            reverse_seq = str(Seq(seq).reverse_complement())
            reverse_temp = mt.Tm_NN(reverse_seq, Mg=40, Na=5, saltcorr=4)
            seen.add(seq)
            seen.add(reverse_seq)  # Mark the reverse sequence as seen to avoid processing it as a forward sequence later
            # Store forward and reverse sequences together, considering them as a processed pair
            seq_temp_list.append(((seq, temp), (reverse_seq, reverse_temp)))
    return seq_temp_list

# Function to sort pairs and flatten the list
def sort_and_pair_sequences(seq_temp_pairs):
    # Sort pairs by the minimum temperature in each pair and then flatten while keeping pairs together
    sorted_pairs = sorted(seq_temp_pairs, key=lambda pair: min(pair[0][1], pair[1][1]))
    sorted_seq_temp = [seq for pair in sorted_pairs for seq in sorted(pair, key=lambda x: x[1])]
    return sorted_seq_temp

seq_temp_pairs = melting_temps_with_reverse(sequences)
sorted_seq_temp = sort_and_pair_sequences(seq_temp_pairs)

# Print the sorted sequences and their melting temperatures without duplicates
for sequence, temp in sorted_seq_temp:
    print(f"{sequence} : {temp:.2f}")

# Calculating and printing min, max, and delta temperatures
temperatures = [temp for _, temp in sorted_seq_temp]
min_temp, max_temp = min(temperatures), max(temperatures)
delta_temp = max_temp - min_temp
print(f"\nMin: {min_temp:.2f}, Max: {max_temp:.2f}, Delta: {delta_temp:.2f}")

GGCATGGTA : 28.51
TACCATGCC : 28.51
TATGGCACC : 28.51
GGTGCCATA : 28.51
GGCCACATA : 28.51
TATGTGGCC : 28.51
TGCATCTCC : 28.77
GGAGATGCA : 28.77
GATGGAGCA : 28.77
TGCTCCATC : 28.77
TCAATCCGG : 28.94
CCGGATTGA : 28.94
TATTCGGCC : 29.21
GGCCGAATA : 29.21
CGCCTAAGT : 29.32
ACTTAGGCG : 29.32
CGTGGATCA : 29.36
TGATCCACG : 29.36
TGTGTACGG : 29.38
CCGTACACA : 29.38
GCGGAACTA : 29.48
TAGTTCCGC : 29.48
TGAGTGTCG : 29.63
CGACACTCA : 29.63
ACTGTAGCG : 29.73
CGCTACAGT : 29.73
GCGTGACTA : 29.89
TAGTCACGC : 29.89
GTGCGAGTA : 29.89
TACTCGCAC : 29.89
TACGTGAGC : 29.89
GCTCACGTA : 29.89
GCGACTGTA : 29.89
TACAGTCGC : 29.89
TGACTACGC : 29.89
GCGTAGTCA : 29.89
TAGCGTCAC : 29.89
GTGACGCTA : 29.89
CGTCCGTTA : 30.05
TAACGGACG : 30.05
CGGTCGTTA : 30.05
TAACGACCG : 30.05
CCGAACGTA : 30.05
TACGTTCGG : 30.05
GTTGCACAC : 30.41
GTGTGCAAC : 30.41
GCGTTATGC : 30.65
GCATAACGC : 30.65
GAATTCGCG : 31.02
CGCGAATTC : 31.02

Min: 28.51, Max: 31.02, Delta: 2.51


In [None]:
CTATATGCG
CGCATATAG
CGATACATG
CATGTATCG
GATCAATGG
CCATTGATC
CTTGCTTAG
CTAAGCAAG
GTATCTTCG
CGAAGATAC

In [None]:
#Implemented the function to input the length of the polyT and the location, in this example I used 4 polyT and added to the 5' end of the sequences

sequences = """
TAACGGACG
CGTCCGTTA
TCAATCCGG
CCGGATTGA
CGGTCGTTA
TAACGACCG
TGAGTGTCG
CGACACTCA
TACGTGAGC
GCTCACGTA
TGTGTACGG
CCGTACACA
GGCATGGTA
TACCATGCC
ACTGTAGCG
CGCTACAGT
CCGAACGTA
TACGTTCGG
TATTCGGCC
GGCCGAATA
GGCCACATA
TATGTGGCC
TGCATCTCC
GGAGATGCA
CGCCTAAGT
ACTTAGGCG
TACAGTCGC
GCGACTGTA
CGCGAATTC
GAATTCGCG
GATGGAGCA
TGCTCCATC
GCGTGACTA
TAGTCACGC
TATGGCACC
GGTGCCATA
GTGCGAGTA
TACTCGCAC
GCGGAACTA
TAGTTCCGC
TGACTACGC
GCGTAGTCA
GTTGCACAC
GTGTGCAAC
CGTGGATCA
TGATCCACG
TAGCGTCAC
GTGACGCTA
GCGTTATGC
GCATAACGC
""".split()

# take out
# TAGGCGAAT : ATTCGCCTA
# TAACAGCCA:TGGCTGTTA
# AGCGATGTA:TACATCGCT
# TGCCTTGTA:TACAAGGCA
# TGCAACCTA:TAGGTTGCA

def calculate_energy(sequences, model, tailing_direction=None, polyT_length=0):
    t_tail = "T" * polyT_length
    if tailing_direction == '5prime':
        sequences = [t_tail + seq for seq in sequences]
    elif tailing_direction == '3prime':
        sequences = [seq + t_tail for seq in sequences]
    else:
        sequences = sequences

    free_energy = []
    dot_parens = []
    for seq1 in sequences:
        row1 = []
        row2 = []
        for seq2 in sequences:
            energy = pfunc([seq1, seq2], model=model)[1] #calculate free energy
            row1.append(round(energy, 2))

            structure = sample([seq1, seq2], num_sample=1, model=model)[0] #calculate dot parens structure
            row2.append(structure)

        free_energy.append(row1)
        dot_parens.append(row2)

    df1 = pd.DataFrame(free_energy, index=sequences, columns=sequences)
    df2 = pd.DataFrame(dot_parens, index=sequences, columns=sequences)
    
    return df1.style.highlight_min(axis=0, color='lightblue'), df2.style

#Select the option to input the length of the polyT and the location 
free_energy, dot_parens = calculate_energy(sequences=sequences, model=model1, tailing_direction='5prime', polyT_length=0)

#print the tables
print("Table 1 Free Energy Complex:")
display(free_energy)

print("\nTable 2 Dot Parens Plus:")
display(dot_parens)

len(sequences)


In [10]:
# Function to get the reverse complement of a DNA sequence
def get_reverse_complement(sequence):
    complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
    return "".join(complement[base] for base in reversed(sequence))

# Your sequences
sequences = """
TAGCACGAT
ATCGTGCTA
TGTTCTGGT
ACCAGAACA
TACGCATCT
AGATGCGTA
TGTGGCTTA
TAAGCCACA
TGAGTGTGT
ACACACTCA
TGTTGAGGT
ACCTCAACA
TGCTTGGTA
TACCAAGCA
""".split()

# Check each sequence against all other sequences
for i in range(len(sequences)):
    for j in range(i+1, len(sequences)):
        if sequences[i] == get_reverse_complement(sequences[j]):
            print(f"Sequences {sequences[i]} and {sequences[j]} are complementary.")
        elif sequences[j] == get_reverse_complement(sequences[i]):
            print(f"Sequences {sequences[j]} and {sequences[i]} are complementary.")


Sequences TAGCACGAT and ATCGTGCTA are complementary.
Sequences TGTTCTGGT and ACCAGAACA are complementary.
Sequences TACGCATCT and AGATGCGTA are complementary.
Sequences TGTGGCTTA and TAAGCCACA are complementary.
Sequences TGAGTGTGT and ACACACTCA are complementary.
Sequences TGTTGAGGT and ACCTCAACA are complementary.
Sequences TGCTTGGTA and TACCAAGCA are complementary.


In [11]:
# Your sequences as a multiline string
sequences_str = """
TTCCCAATTCTTTTGAAAATCTCCACCTTTATTTTTTTTTTTTTTTTGTGTGTC
ATTGCTTTTTTCCTTTTGATATAGATACATTTTTTTTCGCAAATGGTCATTTTTTTTTTTTTTTTGTGTGTC
ATCAAAAAGATTCTAAATCGGTTGATAGCGTTTTTTTTTTTTTTTTTGTGTGTC
TTTGCTTTTTTAAACAACTTTGATACCGATAGTTTTTTTGCGCCGACAATTTTTTTTTTTTTTTTACATCGC
TATCAGCTTGCTTGGTTGCTTTGAATGGGATTTTTTTTTTTTTTTTTACATCGC
CACCAGTACAATGCACCGTAATCAAGGTGTATTTTTTTTTTTTTTTTACATCGC
TAGGGTTTTTCGCTGGCAAGTGGAACGGTACGTTTTTCCAGAATCCTGATTTTTTTTTTTTTTTTATGTCGC
GGAGCTAAACATAATATGCAACTACGGGCGCTTTTTTTTTTTTTTTTATGTCGC
AATTATTTTTACCGTTGTAGCGATAGGGTTGATTTTTGTGTTGTTTTTTTTTTTTTTTTATGTCGC
ATTTTTTTTTGCACCCAGCTAAACATAAAAACTTTTTAGGGAATTTTTTTTTTTTTTTCTTTCGGT
ATGCAAATCCATCATCACCTTGCTCTTAGAATTTTTTTTTTTTTTTCTTTCGGT
TCCTTTTTTTGAAAACATAGCTTTTTCAAATATTTTTTATTTTAGTTAATTTTTTTTTTTTTTTCTTTCGGT
TCCCGCCAAAATAACCTACCATATGAAGTATTTTTTTTTTTTTTTTTGGAGCTA
TGGCAAATCAATGGTGCTTGTTACGCAGAAGTTTTTTTTTTTTTTTTGGAGCTA
TAGACTTTTTTTTACAAACAACGCCCTGGAGTTTTTTGACTCTATGATATTTTTTTTTTTTTTTTGGAGCTA
TAGATTTTCAGTCTAACGGAACAATTATCATTTTTTTTTTTTTTTTTAGAGGCA
CAGAACGAGTATGAAGCCCTTTTTACCAGAGTTTTTTTTTTTTTTTTAGAGGCA
CATATTTTTTTCCTGATTATCCAGTACCTTTTTTTTTACATCGGGAGAATTTTTTTTTTTTTTTTAGAGGCA






TTCCCAATTCTTTTGAAAATCTCCACCTTTATTTTTTTTTTTTTTTTAGCTTGC
ATTGCTTTTTTCCTTTTGATATAGATACATTTTTTTTCGCAAATGGTCATTTTTTTTTTTTTTTTAGCTTGC
ATCAAAAAGATTCTAAATCGGTTGATAGCGTTTTTTTTTTTTTTTTTAGCTTGC
TTTGCTTTTTTAAACAACTTTGATACCGATAGTTTTTTTGCGCCGACAATTTTTTTTTTTTTTTGCGATGTA
TATCAGCTTGCTTGGTTGCTTTGAATGGGATTTTTTTTTTTTTTTTGCGATGTA
CACCAGTACAATGCACCGTAATCAAGGTGTATTTTTTTTTTTTTTTGCGATGTA
TAGGGTTTTTCGCTGGCAAGTGGAACGGTACGTTTTTCCAGAATCCTGATTTTTTTTTTTTTTTCGGATTGT
GGAGCTAAACATAATATGCAACTACGGGCGCTTTTTTTTTTTTTTTCGGATTGT
AATTATTTTTACCGTTGTAGCGATAGGGTTGATTTTTGTGTTGTTTTTTTTTTTTTTTCGGATTGT
ATTTTTTTTTGCACCCAGCTAAACATAAAAACTTTTTAGGGAATTTTTTTTTTTTTTTCGGAACAT
ATGCAAATCCATCATCACCTTGCTCTTAGAATTTTTTTTTTTTTTTCGGAACAT
TCCTTTTTTTGAAAACATAGCTTTTTCAAATATTTTTTATTTTAGTTAATTTTTTTTTTTTTTTCGGAACAT
TCCCGCCAAAATAACCTACCATATGAAGTATTTTTTTTTTTTTTTTTAGCTCCA
TGGCAAATCAATGGTGCTTGTTACGCAGAAGTTTTTTTTTTTTTTTTAGCTCCA
TAGACTTTTTTTTACAAACAACGCCCTGGAGTTTTTTGACTCTATGATATTTTTTTTTTTTTTTTAGCTCCA
TAGATTTTCAGTCTAACGGAACAATTATCATTTTTTTTTTTTTTTTACTTGACG
CAGAACGAGTATGAAGCCCTTTTTACCAGAGTTTTTTTTTTTTTTTACTTGACG
CATATTTTTTTCCTGATTATCCAGTACCTTTTTTTTTACATCGGGAGAATTTTTTTTTTTTTTTACTTGACG






TTCCCAATTCTTTTGAAAATCTCCACCTTTATTTTTTTTTTTTTTTCTCGTTGT
ATTGCTTTTTTCCTTTTGATATAGATACATTTTTTTTCGCAAATGGTCATTTTTTTTTTTTTTTCTCGTTGT
ATCAAAAAGATTCTAAATCGGTTGATAGCGTTTTTTTTTTTTTTTTCTCGTTGT
TTTGCTTTTTTAAACAACTTTGATACCGATAGTTTTTTTGCGCCGACAATTTTTTTTTTTTTTTACAATCCG
TATCAGCTTGCTTGGTTGCTTTGAATGGGATTTTTTTTTTTTTTTTACAATCCG
CACCAGTACAATGCACCGTAATCAAGGTGTATTTTTTTTTTTTTTTACAATCCG
TAGGGTTTTTCGCTGGCAAGTGGAACGGTACGTTTTTCCAGAATCCTGATTTTTTTTTTTTTTTGACACACA
GGAGCTAAACATAATATGCAACTACGGGCGCTTTTTTTTTTTTTTTGACACACA
AATTATTTTTACCGTTGTAGCGATAGGGTTGATTTTTGTGTTGTTTTTTTTTTTTTTTGACACACA
ATTTTTTTTTGCACCCAGCTAAACATAAAAACTTTTTAGGGAATTTTTTTTTTTTTTTCCGCTTTA
ATGCAAATCCATCATCACCTTGCTCTTAGAATTTTTTTTTTTTTTTCCGCTTTA
TCCTTTTTTTGAAAACATAGCTTTTTCAAATATTTTTTATTTTAGTTAATTTTTTTTTTTTTTTCCGCTTTA
TCCCGCCAAAATAACCTACCATATGAAGTATTTTTTTTTTTTTTTTCGTCAAGT
TGGCAAATCAATGGTGCTTGTTACGCAGAAGTTTTTTTTTTTTTTTCGTCAAGT
TAGACTTTTTTTTACAAACAACGCCCTGGAGTTTTTTGACTCTATGATATTTTTTTTTTTTTTTCGTCAAGT
TAGATTTTCAGTCTAACGGAACAATTATCATTTTTTTTTTTTTTTTACCGAAAG
CAGAACGAGTATGAAGCCCTTTTTACCAGAGTTTTTTTTTTTTTTTACCGAAAG
CATATTTTTTTCCTGATTATCCAGTACCTTTTTTTTTACATCGGGAGAATTTTTTTTTTTTTTTACCGAAAG






TTCCCAATTCTTTTGAAAATCTCCACCTTTATTTTTTTTTTTTTTTGCAAGCTA
ATTGCTTTTTTCCTTTTGATATAGATACATTTTTTTTCGCAAATGGTCATTTTTTTTTTTTTTTGCAAGCTA
ATCAAAAAGATTCTAAATCGGTTGATAGCGTTTTTTTTTTTTTTTTGCAAGCTA
TTTGCTTTTTTAAACAACTTTGATACCGATAGTTTTTTTGCGCCGACAATTTTTTTTTTTTTTTACAACGAG
TATCAGCTTGCTTGGTTGCTTTGAATGGGATTTTTTTTTTTTTTTTACAACGAG
CACCAGTACAATGCACCGTAATCAAGGTGTATTTTTTTTTTTTTTTACAACGAG
TAGGGTTTTTCGCTGGCAAGTGGAACGGTACGTTTTTCCAGAATCCTGATTTTTTTTTTTTTTTGCGACATA
GGAGCTAAACATAATATGCAACTACGGGCGCTTTTTTTTTTTTTTTGCGACATA
AATTATTTTTACCGTTGTAGCGATAGGGTTGATTTTTGTGTTGTTTTTTTTTTTTTTTGCGACATA
ATTTTTTTTTGCACCCAGCTAAACATAAAAACTTTTTAGGGAATTTTTTTTTTTTTTTATGTTCCG
ATGCAAATCCATCATCACCTTGCTCTTAGAATTTTTTTTTTTTTTTATGTTCCG
TCCTTTTTTTGAAAACATAGCTTTTTCAAATATTTTTTATTTTAGTTAATTTTTTTTTTTTTTTATGTTCCG
TCCCGCCAAAATAACCTACCATATGAAGTATTTTTTTTTTTTTTTTTAAAGCGG
TGGCAAATCAATGGTGCTTGTTACGCAGAAGTTTTTTTTTTTTTTTTAAAGCGG
TAGACTTTTTTTTACAAACAACGCCCTGGAGTTTTTTGACTCTATGATATTTTTTTTTTTTTTTTAAAGCGG
TAGATTTTCAGTCTAACGGAACAATTATCATTTTTTTTTTTTTTTTTGCCTCTA
CAGAACGAGTATGAAGCCCTTTTTACCAGAGTTTTTTTTTTTTTTTTGCCTCTA
CATATTTTTTTCCTGATTATCCAGTACCTTTTTTTTTACATCGGGAGAATTTTTTTTTTTTTTTTGCCTCTA
"""

# Split the string into a list of sequences
sequences = sequences_str.split()

# Set to hold unique 8-mers
unique_8_mers = set()

# Iterate over the sequences
for seq in sequences:
    # Get the last 8 characters
    last_8_mer = seq[-8:]

    # Add it to the set (automatically handles duplicates)
    unique_8_mers.add(last_8_mer)

# Print the unique 8-mers
for mer in unique_8_mers:
    print(mer)


ACAACGAG
ATGTTCCG
CGGATTGT
CCGCTTTA
ACCGAAAG
CGGAACAT
CTTTCGGT
TAGCTCCA
TGTGTGTC
TATGTCGC
TAAAGCGG
GCGATGTA
CTCGTTGT
TAGCTTGC
TGGAGCTA
GACACACA
GCAAGCTA
TAGAGGCA
ACAATCCG
ACTTGACG
CGTCAAGT
TGCCTCTA
GCGACATA
TACATCGC


In [12]:
# Your sequences as a multiline string
sequences_str = """
ATGCAAATCCATCATCACCTTGCTCTTAGAAttttttttttTAAGCCACA
TCCTTTTTTTGAAAACATAGCTTTTTCAAATATTTTTTATTTTAGTTAAttttttttttTAAGCCACA
ATTTTTTTTTGCACCCAGCTAAACATAAAAACTTTTTAGGGAAttttttttttTAAGCCACA
CATATTTTTTTCCTGATTATCCAGTACCTTTTTTTTTACATCGGGAGAAttttttttttTAGCACGAT
CAGAACGAGTATGAAGCCCTTTTTACCAGAGttttttttttTAGCACGAT
CCGCCTTTTTGCCAGCATTGATCAACTTTAATTTTTTCATTGTGAATTAttttttttttTAGCACGAT
AATAATTTTTATCCTCATTAAAACCTATTATTTTTTTCTGAAAttttttttttTAGCACGAT
CATGAGAAGTTTCCATTTTTTTAAACGGGTAAAttttttttttTAGCACGAT
ACTAATTTTTAACACTCATCTAAAGAGGACAGTTTTTATGAACGGTGTAttttttttttTAGCACGAT
AATTCTTACCATAAAGGGCGACATAACGCGAttttttttttACCAGAACA
GGCGTTTTTTTTTAGCGAACCCTTAATTGAGATTTTTATCGCCATATTTttttttttttACCAGAACA
TATAATTTTTAGTACCGACAATCCTTATCATTTTTTTCCAAGAACGGGTttttttttttACCAGAACA
CACCAGTACAATGCACCGTAATCAAGGTGTAttttttttttTGAGTGTGT
TCACCTTTTTGTACTCAGGAGCAGCCCTCATATTTTTGTTAGCGTAACGttttttttttTGAGTGTGT
TTTGCTTTTTTAAACAACTTTGATACCGATAGTTTTTTTGCGCCGACAAttttttttttTGAGTGTGT
"""

# Split the string into a list of sequences
sequences = sequences_str.split()

# Join the sequences with commas
sequences_comma_separated = ", ".join(sequences)

# Print the comma-separated sequences
print(sequences_comma_separated)


ATGCAAATCCATCATCACCTTGCTCTTAGAAttttttttttTAAGCCACA, TCCTTTTTTTGAAAACATAGCTTTTTCAAATATTTTTTATTTTAGTTAAttttttttttTAAGCCACA, ATTTTTTTTTGCACCCAGCTAAACATAAAAACTTTTTAGGGAAttttttttttTAAGCCACA, CATATTTTTTTCCTGATTATCCAGTACCTTTTTTTTTACATCGGGAGAAttttttttttTAGCACGAT, CAGAACGAGTATGAAGCCCTTTTTACCAGAGttttttttttTAGCACGAT, CCGCCTTTTTGCCAGCATTGATCAACTTTAATTTTTTCATTGTGAATTAttttttttttTAGCACGAT, AATAATTTTTATCCTCATTAAAACCTATTATTTTTTTCTGAAAttttttttttTAGCACGAT, CATGAGAAGTTTCCATTTTTTTAAACGGGTAAAttttttttttTAGCACGAT, ACTAATTTTTAACACTCATCTAAAGAGGACAGTTTTTATGAACGGTGTAttttttttttTAGCACGAT, AATTCTTACCATAAAGGGCGACATAACGCGAttttttttttACCAGAACA, GGCGTTTTTTTTTAGCGAACCCTTAATTGAGATTTTTATCGCCATATTTttttttttttACCAGAACA, TATAATTTTTAGTACCGACAATCCTTATCATTTTTTTCCAAGAACGGGTttttttttttACCAGAACA, CACCAGTACAATGCACCGTAATCAAGGTGTAttttttttttTGAGTGTGT, TCACCTTTTTGTACTCAGGAGCAGCCCTCATATTTTTGTTAGCGTAACGttttttttttTGAGTGTGT, TTTGCTTTTTTAAACAACTTTGATACCGATAGTTTTTTTGCGCCGACAAttttttttttTGAGTGTGT


In [13]:
len(sequences_comma_separated)

948

In [19]:
rc('ACCAGAACA')

'TGTTCTGGT'

In [17]:
sequences = """
TGAGTGTGT
ACCAGAACA
TAAGCCACA
TAGCACGAT
""".split()

reverse = [rc(s) for s in sequences]


for r in reverse:
    print(r)


ACACACTCA
TGTTCTGGT
TGTGGCTTA
ATCGTGCTA


In [21]:
# block particles B from snub

blocking = """
ACACACTCAAAAAAATAACAGCCA
ATCGTGCTAAAAAAAAGCGATGTA
TGTGGCTTAAAAAAATGCCTTGTA
TGTTCTGGTAAAAAATGCAACCTA
""".split()

reverse = [rc(b) for b in blocking]


for r in reverse:
    print(r)

# TAACAGCCA:TGGCTGTTA
# AGCGATGTA:TACATCGCT
# TGCCTTGTA:TACAAGGCA
# TGCAACCTA:TAGGTTGCA

TGGCTGTTATTTTTTTGAGTGTGT
TACATCGCTTTTTTTTAGCACGAT
TACAAGGCATTTTTTTAAGCCACA
TAGGTTGCATTTTTTACCAGAACA


In [22]:
# # Check each sequence against all other sequences
# for i in range(len(sequences)):
#     for j in range(i+1, len(sequences)):
#         if sequences[i] == get_reverse_complement(sequences[j]):
#             print(f"Sequences {sequences[i]} and {sequences[j]} are complementary.")
#         elif sequences[j] == get_reverse_complement(sequences[i]):
#             print(f"Sequences {sequences[j]} and {sequences[i]} are complementary.")

Sequences ACACACTCAAAAAAATAACAGCCA and TGGCTGTTATTTTTTTGAGTGTGT are complementary.
Sequences ATCGTGCTAAAAAAAAGCGATGTA and TACATCGCTTTTTTTTAGCACGAT are complementary.
Sequences TGTGGCTTAAAAAAATGCCTTGTA and TACAAGGCATTTTTTTAAGCCACA are complementary.
Sequences TGTTCTGGTAAAAAATGCAACCTA and TAGGTTGCATTTTTTACCAGAACA are complementary.
