In [39]:
import pandas as pd

import plotly.graph_objects as go
# example test data
transcripts ,exons = get_exon_transcript_information(species="homo_sapiens", symbol="BRCA2")
primers = {}


In [40]:
# define colors for transcripts
lCol = ['#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00', '#ffff33', '#a65628', '#f781bf']

colors = dict(zip(transcripts.keys(),lCol))

# define spacing between exon boxes
box_spacing = 0.5

# create the figure
fig = go.Figure()

# loop over transcripts
for i, transcript in enumerate(transcripts):
    # create the transcript line
    fig.add_shape(type='line',
                  x0=min(exons[e][0] for e in transcripts[transcript]),
                  y0=i + 0.25,
                  x1=max(exons[e][1] for e in transcripts[transcript]),
                  y1=i + 0.25,
                  line=dict(color='black', width=2))

    # loop over exons in transcript
    for j, exon in enumerate(transcripts[transcript]):
        # determine x-coordinates of exon box
        x0 = exons[exon][0]
        x1 = exons[exon][1]
        width = x1 - x0

        # add exon box to figure
        fig.add_shape(type='rect',
                      x0=x0,
                      y0=i,
                      x1=x1,
                      y1=i + 0.5,
                      fillcolor=colors[transcript],
                      #line=dict(color='black'),
                      opacity=1)

# set x-axis range
x_range = [min(exons[e][0] for t in transcripts.values() for e in t) - 1, max(exons[e][1] for t in transcripts.values() for e in t) + len(transcripts) * (box_spacing + 1)]
fig.update_xaxes(range=x_range)

# Add primers
for primer in primers:
    fig.add_shape(type='line',
                  x0=primers[primer][0],
                  y0=0.2,
                  x1=primers[primer][1],
                  y1=0.2,
                  line=dict(color='black', width=3),
                  yref='paper')
    fig.add_annotation(text=primer,
                       x=(primers[primer][0]+primers[primer][1])/2,
                       y=0.1,
                       showarrow=False,
                       yref='paper')

# set layout properties
fig.update_layout(
    title='Exon Plot',
    xaxis_title='Position',
    yaxis_title='Transcript',
    showlegend=False,
    height=500,
    width=800,
)

fig.update_layout(template="plotly_white")
fig.update_layout(yaxis=dict(tickmode='array',
                             tickvals=[x + 0.25 for x in list(range(len(transcripts))) ],
                             ticktext=list(transcripts.keys()),
                             range=[-1, len(transcripts)-0.4]))
# show the figure
fig.show()

In [32]:
from ExonSurfer.ensembl import ensembl


def get_exon_transcript_information(species = None, symbol=None,release=108):
    """
    Function that obtain the information of the transcripts and exons positions of a gene
    Args:
        species (string): Species name
        symbol (string): Gene symbol
        release (int): Ensembl release
    Returns:
        transcripts (dictionary): Dictionary with the exons of each transcript
        exons (dictionary): Dictionary with exons positions
    """

    data = ensembl.create_ensembl_data(release, 
                                       species)

    gene_obj = ensembl.get_gene_by_symbol(symbol, data)
        
    d = ensembl.get_transcripts_dict(gene_obj, exclude_noncoding = True)
    
    return d

In [36]:
def get_transcripts_exons_dict(gene):
    """
    This function takes a gene object and returns a dictionary of transcript
    objects, with transcript ID as keys, and exon objects as values.
    Args:
        gene [in] (gene object)   Gene object
        exclude_noncoding [in] (bool) False if all transcripts, True to exclude non
                          coding
        dTranscripts [out] (dict) Dictionary of transcript objects, with
                     transcript ID as keys, and exon objects as values
    """
    dT = {}
    dE = {}
    
    # get list of transcripts to iterate
    all_transcripts = ensembl.get_transcript_from_gene(gene)

    tcripts = ensembl.get_coding_transcript(all_transcripts)
    

    for tcript in tcripts:
        dT[tcript.id] = ensembl.get_exons_from_transcript(tcript)

        for exon in tcript.exons:
            dE[exon.id] = (exon.start, exon.end) 
    return dT,dE

def get_exon_transcript_information(species = None, symbol=None,release=108):
    """
    Function that obtain the information of the transcripts and exons positions of a gene
    Args:
        species (string): Species name
        symbol (string): Gene symbol
        release (int): Ensembl release
    Returns:
        transcripts (dictionary): Dictionary with the exons of each transcript
        exons (dictionary): Dictionary with exons positions
    """

    data = ensembl.create_ensembl_data(release, 
                                       species)

    gene_obj = ensembl.get_gene_by_symbol(symbol, data)
        
    dT,dE = get_transcripts_exons_dict(gene_obj)

    return dT,dE

({'ENST00000544455': ['ENSE00003856928',
   'ENSE00001484009',
   'ENSE00003666217',
   'ENSE00003659301',
   'ENSE00003739878',
   'ENSE00003747332',
   'ENSE00003749714',
   'ENSE00003714754',
   'ENSE00003731761',
   'ENSE00000939167',
   'ENSE00000939168',
   'ENSE00000939169',
   'ENSE00000939171',
   'ENSE00000939173',
   'ENSE00000939174',
   'ENSE00000939175',
   'ENSE00001394102',
   'ENSE00000939177',
   'ENSE00000939178',
   'ENSE00000939180',
   'ENSE00003461148',
   'ENSE00000939183',
   'ENSE00000939185',
   'ENSE00000939187',
   'ENSE00000939189',
   'ENSE00003560258',
   'ENSE00003717596'],
  'ENST00000530893': ['ENSE00002143308',
   'ENSE00003339705',
   'ENSE00003563161',
   'ENSE00003857483',
   'ENSE00003739878',
   'ENSE00003747332',
   'ENSE00003749714',
   'ENSE00003714754',
   'ENSE00003731761',
   'ENSE00002163180'],
  'ENST00000380152': ['ENSE00001184784',
   'ENSE00001484009',
   'ENSE00003666217',
   'ENSE00003659301',
   'ENSE00003739878',
   'ENSE000037473