In [1]:
import pandas as pd
import altair as alt

In [2]:
sc2data = pd.read_csv('sc2_summary.csv', index_col='pk')
sc2data.head()

Unnamed: 0_level_0,name,position,residue,score,assignment,conservation,conservation_highlight,variants,shannon_entropy,structure,chain,contact_number,propensity_score,discotope_score
pk,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,E envelope protein,1,M,0.245,.,,,0,0.0,,,,,
2,E envelope protein,2,Y,0.304,.,,,0,0.0,,,,,
3,E envelope protein,3,S,0.377,.,,,0,0.0,,,,,
4,E envelope protein,4,F,0.441,.,,,0,0.0,,,,,
5,E envelope protein,5,V,0.494,.,,,0,0.0,,,,,


In [5]:
for protein in sc2data['name'].unique():
    nprot_df = sc2data[sc2data['name'] == protein]
    height_y = 60.0

    max_x = max(nprot_df['position'])
    width_x = 1.3*max_x

    # bepipred2 linear epitopes
    bp_yrange = (0.3,0.7)
    threshold = pd.DataFrame([{"threshold": 0.55}])

    chart_bp = alt.Chart(nprot_df).mark_bar(clip=True, size=1, color="lightgrey").encode(
        x=alt.X('position:O', axis=alt.Axis(grid=True, values=list(range(0, max_x, 20)), ticks=False, labels=False, title='' )),
        y=alt.Y('score:Q',axis=alt.Axis(title='bepipred2', grid=True),
                            scale=alt.Scale(domain=bp_yrange)),
    )
    highlight = alt.Chart(nprot_df).mark_bar(clip=True, size=1, color="darkgreen").encode(
        x=alt.X('position:O'),
        y='baseline:Q',
        y2=alt.Y2('score'),
    ).transform_filter(
        alt.datum.score > 0.55
    ).transform_calculate("baseline", "0.55")

    rule = alt.Chart(threshold).mark_rule().encode(
        y='threshold:Q'
    )

    bepipred_chart = (chart_bp + highlight + rule).properties(height=height_y, width=width_x)

    # conservation vs HCoVs
    nprot_df.loc[nprot_df['conservation'].isnull(),'conservation']=0.3

    conservation_chart = alt.Chart(nprot_df, height=height_y, width=width_x).mark_bar(size=1).encode(
        alt.X('position:O', axis=alt.Axis(grid=True, values=list(range(0, max_x, 20)), ticks=False, labels=False, title='' )), 
        alt.Y('conservation:Q', axis=alt.Axis(title='epitope cons', grid=True, 
                                              tickMinStep=0.20, tickCount=3
                                             ),
              scale=alt.Scale(domain=(0.30,1.00))
             ),
        color=alt.condition(
            alt.datum.conservation_highlight == 'struct_epitope',  # If structural epitope,
            alt.value('gray'),     # set the bar gray,
            alt.value('steelblue')   # and if not true set the bar steelblue.
        )
    )

    # discotope2 structural epitopes
    dpyrange = (-2.00, 5.00)
    # scale -2.5 threshold to zero
    nprot_dp = nprot_df.assign(adjusted_discotope_score =lambda x:  x['discotope_score'] + 2.5)
    discotope_chart = alt.Chart(nprot_dp, height=height_y, width=width_x).mark_bar(
        clip=True, size=1).encode(
        alt.X('position:O', 
              axis=alt.Axis(values=list(range(0, max_x, 20)), 
                            grid=True,
                            ticks=False, labels=False, title=''
                           )), 
        alt.Y('adjusted_discotope_score:Q', 
              axis=alt.Axis(tickMinStep=1.0, 
                            tickCount=3, 
                            grid=True,
                            format=".1f",
                            title='discotope2'), 
              scale=alt.Scale(domain=dpyrange)),
        color=alt.condition(
            alt.datum.adjusted_discotope_score > 0.0,  
            alt.value('black'),
            alt.value('grey') 
        )
    )
 
    # GISAID shannon entropy
    seyrange = (0, 1)
    se_chart = alt.Chart(nprot_df, height=height_y/2, width=max_x).mark_bar(size=1, color='magenta').encode(
    alt.X('position:O', axis=alt.Axis(grid=True, 
                                      values=list(range(0, max_x, 20)),
                                      ticks=False, labels=False, title='') ), 
    alt.Y('shannon_entropy:Q', axis=alt.Axis(grid=True,
                                             tickCount = 2,
                                             format=".1f",
                                             title='SE'),
          scale=alt.Scale(domain=seyrange)
         )
    )
    
    # GISAID variants
    varyrange = (0, 4)
    variants_chart = alt.Chart(nprot_df, height=height_y/2, width=max_x).mark_bar(size=1, color='purple').encode(
    alt.X('position:O', axis=alt.Axis(grid=True,
                                      title=protein,
                                      values=list(range(0, max_x, 20))) ), 
    alt.Y('variants:Q', axis=alt.Axis(grid=True,
                                      tickCount=2,
                                      title='var'),
          scale=alt.Scale(domain=varyrange)
         )
    )
    
    alt.vconcat(bepipred_chart, 
                conservation_chart,
                discotope_chart,
                se_chart, variants_chart,
                spacing = 1).resolve_scale(x='shared').display()