## Easy visualization of protein sequences in jupyter notebook

In [1]:
import numpy as np
import pandas as pd
import os
from IPython.display import display, HTML
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from Bio import SeqIO

In [2]:
#read sequences from fasta file
fasta_sequences = SeqIO.parse(open('proteins.fasta'),'fasta')
headers = []
sequences = []
for fasta in fasta_sequences:
    sequences.append(str(fasta.seq))
    headers.append(fasta.id)

In [3]:
#write your favorite color coding scheme

small = '#f4aa42'
hydrophobe = '#66b77b'
aromat = '#30f2f2'
polar = '#ff6df0'
negative = '#8682ff'
positive = '#ff4949'

aa_color_salma = {
    'R':positive,
    'K':positive,
    'H':positive,
    'E':negative,
    'D':negative,
    'F':aromat,
    'W':aromat,
    'Y':aromat,
    'I':hydrophobe,
    'L':hydrophobe,
    'M':hydrophobe,
    'V':hydrophobe
}


aa_color_lesk = {
    'G':small,
    'A':small,
    'S':small,
    'T':small,
    'C':hydrophobe,
    'V':hydrophobe,
    'I':hydrophobe,
    'L':hydrophobe,
    'P':hydrophobe,
    'M':hydrophobe,
    'F':aromat,
    'W':aromat,
    'Y':aromat,
    'N':polar,
    'Q':polar,
    'H':polar,
    'D':negative,
    'E':negative,
    'K':positive,
    'R':positive
}

aa_color_rasmol = {
    'D':negative,
    'E':negative,
    'K':positive,
    'R':positive,
    'F':'#3232aa',
    'Y':'#3232aa',
    'G':'#ebebeb',
    'A':'#c8c8c8',
    'H':'#8282d2',
    'C':'#e6e600',
    'M':'#e6e600',
    'S':'#fa9600',
    'T':'#fa9600',
    'N':'#00dcdc',
    'Q':'#00dcdc',
    'L':hydrophobe,
    'V':hydrophobe,
    'I':hydrophobe,
    'W':'#b45ab4',
    'P':'#dc9682',
}

In [4]:
#write out the sequences in html format according to the chosen coloring pattern

def display_seq(color_code):
  
    text=''
    #text += 'body {font-family:monospace;font-size:12pt}'
    text += '<p style="font-family:monospace;font-size:12pt">'

    if color_code=='Lesk':
        aa_color=aa_color_lesk
        
        text += '<span style="background-color: %s">small</span><br />'%small
        text += '<span style="background-color: %s">polar</span><br />'%polar
        text += '<span style="background-color: %s">aromatic</span><br />'%aromat
        text += '<span style="background-color: %s">positive</span><br />'%positive
        text += '<span style="background-color: %s">negative</span><br />'%negative
        text += '<span style="background-color: %s">hydrophobe</span><br />'%hydrophobe
        text += '<br />'
        
    elif color_code=='RasMol':
        aa_color=aa_color_rasmol
        for key in aa_color_rasmol.keys():
            text += '<span style="background-color: %s">%s</span>'%(aa_color_rasmol[key],key)
        text += '<br /><br />'
        
    else:
        aa_color=aa_color_salma
        
        text += '<span style="background-color: %s">aromatic</span><br />'%aromat
        text += '<span style="background-color: %s">positive</span><br />'%positive
        text += '<span style="background-color: %s">negative</span><br />'%negative
        text += '<span style="background-color: %s">hydrophobe</span><br />'%hydrophobe
        text += '<br />'

    for i in range(len(sequences)):
        
        seq = sequences[i]
        header = headers[i]
        
        text += '%s<br />'%header
        
        for j,amino in enumerate(list(seq)):
            aa = amino.upper()
            if j%80==0:
                text += '<br />'
            text += '<span style="background-color: %s">%s</span>'%(aa_color.setdefault(aa, '#ffffff'),aa)
            
        text += '<br /><br />'

    text += '</p>'

    display(HTML(text))

In [5]:
interact(display_seq, color_code=['Salma','Lesk', 'RasMol'])

interactive(children=(Dropdown(description='color_code', options=('Salma', 'Lesk', 'RasMol'), value='Salma'), …

<function __main__.display_seq(color_code)>