In [7]:
%matplotlib inline
import matplotlib
matplotlib.rcParams['figure.figsize'] = (10, 6)
import matplotlib.pyplot as plt
import base64
import os
import pandas as pd
from Bio import motifs
import subprocess
import shutil
import cPickle as pickle
from Bio.motifs.jaspar.db import JASPAR5
from IPython.display import display, Image, HTML
specie_mapping = pd.read_table('http://jaspar.genereg.net/html/DOWNLOAD/database/TAX.txt', 
                               header=None, index_col=0,
                              names= ['ID','SpecieName'])

In [3]:
def job_connect():
    JASPAR_DB_HOST = 'vm5.cmmt.ubc.ca'
    JASPAR_DB_NAME = 'JASPAR_2016'
    return JASPAR5(host=JASPAR_DB_HOST, 
                   name=JASPAR_DB_NAME, 
                   user='jaspar_r', 
                   password='')

In [5]:
jdb =  job_connect()
all_motifs = jdb.fetch_motifs()

In [8]:
picklestring = pickle.dump(all_motifs, open('all_motifs.pickle','wb'))

In [12]:
def display_motifs(motifs=[]):
    for i in range(0,len(motifs)-1):
        m1 = motifs[i]
        m2 = motifs[i+1]
        specie1 = ('-').join(specie_mapping.loc[int(s), 'SpecieName'] for s in m1.species)
        name1 = m1.name
        specie2 = ('-').join(specie_mapping.loc[int(s), 'SpecieName'] for s in m2.species)
        name2 = m2.name
        logo1 = '{}-{}.png'.format(name1, specie1)
        logo2 = '{}-{}.png'.format(name2, specie2)
        m1.weblogo(logo1, logo_title=logo1[:-3])
        m2.weblogo(logo2, logo_title=logo2[:-3])        
        with open(logo1, "rb") as image_file:
            im1 = 'data:image/png;base64,' +\
            base64.b64encode(image_file.read())
        with open(logo2, "rb") as image_file:
            im2 = 'data:image/png;base64,' +\
            base64.b64encode(image_file.read())
    
        s = """<center><table>
        <tr>
        <td><img src="%s" style="width: 400px;"/></td>
        <td><img src="%s" style="width: 400px;"/></td>
        </tr></table></center>"""%(im1, im2)
        t=HTML(s)
        return display(t)

In [13]:
def write_motifs(in_motifs):
    for motif in in_motifs:
        specie = ('-').join(specie_mapping.loc[int(s), 'SpecieName'] for s in motif.species)
        name = motif.name
        fn = '{}-{}.pfm'.format(name, specie.replace(' ','_'))
        out_fn = fn[:-3]+'meme'
        with open(fn, 'w') as f:
            f.write(motifs.write([motif], 'pfm'))
        subprocess.call(['jaspar2meme', '-pfm', '.'], cwd=os.getcwd(), stdout=open(out_fn,'w'))
        os.remove(os.path.abspath(fn))

In [None]:
#ctcf_motifs = jdb.fetch_motifs_by_name('CTCF')
#display_motifs(ctcf_motifs)
#write_motifs(ctcf_motifs)

In [14]:
all_motifs_load = pickle.load(open('all_motifs.pickle','rb'))
set_tf = set()
for motif_m in all_motifs_load:
    set_tf.add(motif_m.name)    

In [15]:
tf_wise_motifs = {k:[] for k in set_tf}
for tf in set_tf:
    
    tf_wise_motifs[tf].append()

{'gt',
 'dl(var.2)',
 'MNT',
 'HOXC10',
 'PHDP',
 'GMEB2',
 'Foxa2',
 'snpc-4',
 'OTX2',
 'DAL82',
 'DAL80',
 'DAL81',
 'LHX2',
 'Prrx2',
 'REB1',
 'WRKY18',
 'LHX6',
 'dsc-1',
 'GSM1',
 'LHX9',
 'DOF2.5',
 'DOF2.4',
 'SPT',
 'EN2',
 'YAP3',
 'SIP4',
 'NEUROD2',
 'Klf4',
 'YPR015C',
 'Klf1',
 'AGL3',
 'Mitf',
 'Hes1',
 'Hes2',
 'PDR1',
 'SP8',
 'Arntl',
 'WRKY15',
 'pal-1',
 'GSC2',
 'SP1',
 'SP2',
 'SP3',
 'SP4',
 'abi4',
 'EHF',
 'ERF105',
 'Mlxip',
 'GATA8',
 'GATA9',
 'Nr1h3::Rxra',
 'GATA5',
 'GATA2',
 'GATA3',
 'ZMS1',
 'PHYPADRAFT_173530',
 'ERF039',
 'Ascl2',
 'che-1',
 'YRM1',
 'eve',
 'ERF',
 'PHD1',
 'OdsH',
 'WRKY57',
 'unc-62',
 'ACE2',
 'CTCF',
 'WRKY38',
 'SUT2',
 'Bgb::run',
 'HINFP',
 'POU6F2',
 'WRKY30',
 'EMX2',
 'bZIP68',
 'hth',
 'ATF7',
 'CG4328-RA',
 'PHYPADRAFT_140773',
 'TP73',
 'MYB4',
 'EMX1',
 'odd',
 'Atf1',
 'GIS1',
 'Ddit3::Cebpa',
 'RXRA::VDR',
 'BZR2',
 'TCF3',
 'BZR1',
 'Su(H)',
 'GSX1',
 'Six3',
 'DREB2C',
 'ZIC4',
 'ATHB-12',
 'VSX1',
 'ZIC3',
 'NFIL

In [16]:
m1 = all_motifs_load[1]


In [17]:
m1.consensus

Seq('TGCGTG', IUPACUnambiguousDNA())

In [19]:
m1.degenerate_consensus

Seq('YGCGTG', IUPACAmbiguousDNA())