In [0]:
!pip install biopython
import Bio
print(Bio.__version__)

from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from Bio.SeqRecord import SeqRecord

In [0]:
class ModifiedOligo:
    '''
    A class to facilitate viewing of modified bases present in an oligonucleotide. It does this by
        (1) Replacing the modified base position(s) with a user-defined symbol.
        (2) Colorizing each DNA base (and its modified base symbol) with the same color.

    Attributes
    ----------
    SeqRecord: Bio.SeqRecord
        A Biopython SeqRecord object
    modifications: dict
        A dictionary of the form {(symbol, name):(positions)}. It maps a user-defined symbol and the modified base name to the base position(s) in which the modified base occurs.
        The base position is zero-based (so as to be consistent with SeqRecord numbering convention in Biopython). For example, suppose we have an oligo (say ATGTCAGTC) in which the second and eighth T are deoxyUracil (dU) bases,
        and we wish to represent it using the pound '#' symbol. We would specify that as {('#', 'dU'):(1, 7)}
    '''

    #############| class constructor |#############
    def __init__(self, SeqRecord, modifications=None):
        self.__seq_rec = SeqRecord                
        self.__modifications = modifications    
        self.__oligo = [base for base in self.__seq_rec]
        self.__info_table = {}                  

        '''
        Map each modified base's symbol and its position(s) to the canonical base.
        The new dict has structure {(symbol, name, canonical_base):(positions)}
        '''     
        for symbol, positions in self.__modifications.items():
            if len(positions) == 1:
                canonical_base = self.__seq_rec[positions[0]]
                self.__info_table.update({(symbol[0], symbol[1], canonical_base):positions})
            else:   # ensure symbol does NOT map to mulitple (different) canonical bases
                canonical_base = set([self.__seq_rec[i] for i in positions])
                if len(canonical_base) == 1:
                    self.__info_table.update({(symbol[0], symbol[1], canonical_base.pop()):positions})
                else:
                    print("ERROR!: \'{}\' is assigned to multiple canonical bases {}".format(symbol[0], canonical_base))

        # reconstruct oligo object: replacing modified base positions with use-defined symbols
        for symbol, positions in self.__info_table.items():
            if len(positions) == 1:
                self.__oligo[positions[0]] = symbol[0]
            else:
                for i in positions:
                   self.__oligo[i] = symbol[0]


    #############| private class methods |#############
    def __colorize(self, base):
        for key in self.__info_table.keys():
            if base.upper() == 'A' or ('A' in key and base in key):
                return ''.join(['\033[0;31m', base, '\033[0m'])  # red
            elif base.upper() == 'G' or ('G' in key and base in key):
                return ''.join(['\033[0;32m', base, '\033[0m'])  # green
            elif base.upper() == 'T' or ('T' in key and base in key):
                return ''.join(['\033[0;34m', base, '\033[0m'])  # blue
            elif base.upper() == 'C' or ('C' in key and base in key):
                return ''.join(['\033[0;33m', base, '\033[0m'])  # yellow

    def __legend(self):
        legend = []
        for key in self.__info_table.keys():
            if key[2] == 'A':
                legend.append(''.join(['\033[0;31m', ' = '.join([key[0], key[1]]), '\033[0m']))  # red
            elif key[2] == 'G':
                legend.append(''.join(['\033[0;32m', ' = '.join([key[0], key[1]]), '\033[0m']))  # green
            elif key[2] == 'T':
                legend.append(''.join(['\033[0;34m', ' = '.join([key[0], key[1]]), '\033[0m']))  # blue
            elif key[2] == 'C':
                legend.append(''.join(['\033[0;33m', ' = '.join([key[0], key[1]]), '\033[0m']))  # yellow
        return ''.join([i+'\n' for i in legend]).rstrip()

   
   #############| instance methods |#############
    def view53(self, modified=True, showLegend = False):
        # whether to view the unmodified (self.__seq_rec) or modified (self.__oligo) oligo
        oligo_colored = [self.__colorize(base) for base in self.__oligo] if modified else [self.__colorize(base) for base in self.__seq_rec]
        if showLegend:
            return ''.join([self.__legend(), '\n',
                            "5' ", ''.join(oligo_colored), " 3'"])
        else:
            return ''.join(["5' ", ''.join(oligo_colored), " 3'"])

    def view35(self, modified=True, showLegend = False):
        # oligo_colored = [self.__colorize(base) for base in self.__oligo]
        oligo_colored = [self.__colorize(base) for base in self.__oligo] if modified else [self.__colorize(base) for base in self.__seq_rec]
        if showLegend:
            return ''.join([self.__legend(), '\n',
                            "3' ", ''.join(oligo_colored[::-1]), " 5'"])
        else:
            return ''.join(["3' ", ''.join(oligo_colored[::-1]), " 5'"])

# Usage
### 1. Creating a `ModifiedOligo` object
A `ModifiedOligo` object has two atttributes: a sequence of DNA alphabet and a dictionary mapping the modified base(s) to its position.

The DNA alphabet is created from Biopython's `Bio.SeqRecord` object as follows:

In [0]:
# construct IUPAC DNA object
dna = Seq("ATCGAGTTTACCATATCTAGAATGCAT", IUPAC.unambiguous_dna)

# construct SeqRecord object
seq_rec = SeqRecord(dna)

While it is not necessary to specify `IUPAC.unambiguous_dna`, doing so enforces the fact that the created `Bio.Seq` object is a DNA alphabet (and not, for example, a peptide).

Now create dictionary of the form {(symbol, name):(positions)} to map modified bases to their positions. Note the base numbering system is __zero-based__.

In [0]:
# define modified bases, their symbol representations, and their positions in the oligo
mods = {('#', 'dU'):(1, 26),       
        ('^', "mC5"):(11, 24),
        ('$', "Super G"):(19, )}   # singletone tuple MUST end with comma

Create ModifiedOligo object

In [0]:
# create ModifiedOligo object
oligo = ModifiedOligo(seq_rec, modifications=mods)

### 2. Viewing
To view the `ModifiedOligo` object, use `view53()` and wrap it within a `print()` statement. Below, we view the unmodified oligo by setting `modified=False`.

In [6]:
# view the unmodified oligo
print(oligo.view53(modified=False))

5' [0;31mA[0m[0;34mT[0m[0;33mC[0m[0;32mG[0m[0;31mA[0m[0;32mG[0m[0;34mT[0m[0;34mT[0m[0;34mT[0m[0;31mA[0m[0;33mC[0m[0;33mC[0m[0;31mA[0m[0;34mT[0m[0;31mA[0m[0;34mT[0m[0;33mC[0m[0;34mT[0m[0;31mA[0m[0;32mG[0m[0;31mA[0m[0;31mA[0m[0;34mT[0m[0;32mG[0m[0;33mC[0m[0;31mA[0m[0;34mT[0m 3'


Notice the oligo has been padded with `5'` and `3'`.

To view the modified version by setting `modified=True` (default). Add the legend.

In [7]:
# view modified version, along with legend
print(oligo.view53(showLegend=True))

[0;34m# = dU[0m
[0;33m^ = mC5[0m
[0;32m$ = Super G[0m
5' [0;31mA[0m[0;34m#[0m[0;33mC[0m[0;32mG[0m[0;31mA[0m[0;32mG[0m[0;34mT[0m[0;34mT[0m[0;34mT[0m[0;31mA[0m[0;33mC[0m[0;33m^[0m[0;31mA[0m[0;34mT[0m[0;31mA[0m[0;34mT[0m[0;33mC[0m[0;34mT[0m[0;31mA[0m[0;32m$[0m[0;31mA[0m[0;31mA[0m[0;34mT[0m[0;32mG[0m[0;33m^[0m[0;31mA[0m[0;34m#[0m 3'


Notice the symbols **#** (at 1st and 26th bases), **^** (at 11th and 24th bases), and **$** (at 19th base) appear in the proper (zero-based) positions specified in the dictionary.


Notice also they are color-matched to the canonical base which they represent. For example, 
- **#** = T = blue
- **^** = C = yellow
- **$** = G = green

Sometimes, it is desirable to view the oligo in the 3' to 5' orientation (NOT reverse complement. Just reverse!).

In [8]:
# view unmodified oligo in 3'->5', with legend
print(oligo.view35(modified=False, showLegend=True))

# view modified oligo in 3'->5', without legend
print(oligo.view35(modified=True))

[0;34m# = dU[0m
[0;33m^ = mC5[0m
[0;32m$ = Super G[0m
3' [0;34mT[0m[0;31mA[0m[0;33mC[0m[0;32mG[0m[0;34mT[0m[0;31mA[0m[0;31mA[0m[0;32mG[0m[0;31mA[0m[0;34mT[0m[0;33mC[0m[0;34mT[0m[0;31mA[0m[0;34mT[0m[0;31mA[0m[0;33mC[0m[0;33mC[0m[0;31mA[0m[0;34mT[0m[0;34mT[0m[0;34mT[0m[0;32mG[0m[0;31mA[0m[0;32mG[0m[0;33mC[0m[0;34mT[0m[0;31mA[0m 5'
3' [0;34m#[0m[0;31mA[0m[0;33m^[0m[0;32mG[0m[0;34mT[0m[0;31mA[0m[0;31mA[0m[0;32m$[0m[0;31mA[0m[0;34mT[0m[0;33mC[0m[0;34mT[0m[0;31mA[0m[0;34mT[0m[0;31mA[0m[0;33m^[0m[0;33mC[0m[0;31mA[0m[0;34mT[0m[0;34mT[0m[0;34mT[0m[0;32mG[0m[0;31mA[0m[0;32mG[0m[0;33mC[0m[0;34m#[0m[0;31mA[0m 5'
