## REVEL Heatmap

In [129]:
import altair as alt
import itertools
import pandas as pd
import numpy as np
import yaml
import os
import math

alt.data_transformers.disable_max_rows()

from vega_datasets import data

with open('config.yaml') as f:
    config = yaml.safe_load(f)


revel = pd.read_csv(config['revel_duplicate_avg'], sep='\t')

revel


Unnamed: 0,AA_Substitution,chr,Pos,ref,alt,aaref,aaalt,Ensembl_transcriptid,Codon,Site,Ref_Codon,codon_site,Alt_Codon,Amino_Acid_Ref,Amino_Acid_Alt,Mutation_type,REVEL
0,A109D,22,23793652,C,A,A,D,ENST00000417137;ENST00000344921;ENST0000026312...,109,326,GCT,2,GAT,A,D,M,0.675
1,A109G,22,23793652,C,G,A,G,ENST00000417137;ENST00000344921;ENST0000026312...,109,326,GCT,2,GGT,A,G,M,0.554
2,A109P,22,23793651,G,C,A,P,ENST00000417137;ENST00000344921;ENST0000026312...,109,325,GCT,1,CCT,A,P,M,0.697
3,A109S,22,23793651,G,T,A,S,ENST00000417137;ENST00000344921;ENST0000026312...,109,325,GCT,1,TCT,A,S,M,0.549
4,A109T,22,23793651,G,A,A,T,ENST00000417137;ENST00000344921;ENST0000026312...,109,325,GCT,1,ACT,A,T,M,0.586
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2269,Y81D,22,23793567,T,G,Y,D,ENST00000417137;ENST00000344921;ENST0000026312...,81,241,TAC,1,GAC,Y,D,M,0.602
2270,Y81F,22,23793568,A,T,Y,F,ENST00000417137;ENST00000344921;ENST0000026312...,81,242,TAC,2,TTC,Y,F,M,0.490
2271,Y81H,22,23793567,T,C,Y,H,ENST00000417137;ENST00000344921;ENST0000026312...,81,241,TAC,1,CAC,Y,H,M,0.514
2272,Y81N,22,23793567,T,A,Y,N,ENST00000417137;ENST00000344921;ENST0000026312...,81,241,TAC,1,AAC,Y,N,M,0.559


In [130]:
revel_to_keep = {'REVEL': 'REVEL',
                       'Codon': 'Residue',
                       'Amino_Acid_Ref': 'Wildtype_Amino_Acid',
                       'AA_Substitution': 'Substitution',
                       'Amino_Acid_Alt' : 'Amino Acid'
                      }

# merge the mutational effects and the site annotations
revel_final = (revel[list(revel_to_keep)]
      .rename(columns=revel_to_keep)
     )

revel_final



Unnamed: 0,REVEL,Residue,Wildtype_Amino_Acid,Substitution,Amino Acid
0,0.675,109,A,A109D,D
1,0.554,109,A,A109G,G
2,0.697,109,A,A109P,P
3,0.549,109,A,A109S,S
4,0.586,109,A,A109T,T
...,...,...,...,...,...
2269,0.602,81,Y,Y81D,D
2270,0.490,81,Y,Y81F,F
2271,0.514,81,Y,Y81H,H
2272,0.559,81,Y,Y81N,N


In [131]:
minimum_domain = 0
maximum_domain = 1

In [132]:
def DMS_heatmaps(data, metric):
    aa_order = ['A','I','L','M','V','C','G','P','S','T','N','Q','K','R','H','D','E','F','W','Y']
    tooltips = [c for c in data.columns if c not in
                {'Residue', 'wildtype_code'}]

    # everything is site v mutant
    base = (alt.Chart(data)
            .encode(x=alt.X('Residue:O',
                             axis=alt.Axis(values=[0,50,100,150,200,250,300,350])),
                    y=alt.Y('Amino Acid:O',
                            sort=aa_order,
                            axis=alt.Axis(labelFontSize=12,
                                          titleFontSize=15))
                   )
           )
    heatmap = (base
               .mark_rect()
               .encode(color=alt.Color(metric).scale(scheme="redblue", domain=[minimum_domain, maximum_domain],
                       domainMid=0.5, clamp=True, reverse=True),
                       stroke=alt.value('black'),
                       strokeWidth=alt.condition(cell_selector,
                                                 alt.value(0.5),
                                                 alt.value(0)),
                       tooltip=tooltips
                      )
               .properties(
                width=1000)
              )
    
    return ((heatmap)
            .interactive()
            #.add_selection(subset_select)  # add dropdown menu
            .add_params(cell_selector)  # mouse over highlighting
            #.transform_filter(subset_select)  # add dropdown filtering
            #.transform_filter(zoom_brush)  # add zoom bar filtering
            .properties(height=250, title=' '.join(metric.split('_'))))


In [133]:
cell_selector = alt.selection_point(on='mouseover',
                                     empty='none')


revel_heatmap = DMS_heatmaps(revel_final, 'REVEL')

revel_heatmap

  stacklevel=1,


In [134]:
config = {
    'interactive_heatmap': './plots/computational_predictor_heatmaps/REVEL_v1.3_SMARCB1_heatmap.html'
}

# Print the file path where the chart will be saved
print(f"Saving chart to {config['interactive_heatmap']}")

# Ensure the directory exists
os.makedirs(os.path.dirname(config['interactive_heatmap']), exist_ok=True)

# Save the chart to the specified file path
revel_heatmap.save(config['interactive_heatmap'])

Saving chart to ./plots/computational_predictor_heatmaps/REVEL_v1.3_SMARCB1_heatmap.html


## CADD Heatmap

In [135]:
import altair as alt
import itertools
import pandas as pd
import numpy as np
import yaml
import os
import math

alt.data_transformers.disable_max_rows()

from vega_datasets import data

with open('config.yaml') as f:
    config = yaml.safe_load(f)



cadd = pd.read_csv(config['cadd_duplicate_avg'], sep='\t')

cadd

Unnamed: 0,AA_Substitution,#Chrom,Pos,Ref,Alt,RawScore,Site,Codon,Ref_Codon,codon_site,Alt_Codon,Amino_Acid_Ref,Amino_Acid_Alt,Mutation_type,PHRED
0,A109A,22,23793653,T,A,0.155873,327,109,GCT,3,GCA,A,A,S,2.005333
1,A109D,22,23793652,C,A,4.928915,326,109,GCT,2,GAT,A,D,M,27.500000
2,A109G,22,23793652,C,G,4.050355,326,109,GCT,2,GGT,A,G,M,24.200000
3,A109P,22,23793651,G,C,5.253825,325,109,GCT,1,CCT,A,P,M,29.500000
4,A109S,22,23793651,G,T,3.796234,325,109,GCT,1,TCT,A,S,M,23.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2758,Y81H,22,23793567,T,C,2.818137,241,81,TAC,1,CAC,Y,H,M,20.400000
2759,Y81N,22,23793567,T,A,3.018291,241,81,TAC,1,AAC,Y,N,M,21.200000
2760,Y81S,22,23793568,A,C,2.857949,242,81,TAC,2,TCC,Y,S,M,20.600000
2761,Y81X,22,23793569,C,A,6.353594,243,81,TAC,3,TAA,Y,Stop,Stop,34.000000


In [136]:
cadd_to_keep = {'PHRED': 'PHRED',
                 'Codon': 'Residue',
                 'Amino_Acid_Ref': 'Wildtype_Amino_Acid',
                 'AA_Substitution': 'Substitution',
                 'Amino_Acid_Alt' : 'Amino Acid',
                      }

# merge the mutational effects and the site annotations
cadd_final = (cadd[list(cadd_to_keep)]
      .rename(columns=cadd_to_keep)
     )

cadd_final


Unnamed: 0,PHRED,Residue,Wildtype_Amino_Acid,Substitution,Amino Acid
0,2.005333,109,A,A109A,A
1,27.500000,109,A,A109D,D
2,24.200000,109,A,A109G,G
3,29.500000,109,A,A109P,P
4,23.500000,109,A,A109S,S
...,...,...,...,...,...
2758,20.400000,81,Y,Y81H,H
2759,21.200000,81,Y,Y81N,N
2760,20.600000,81,Y,Y81S,S
2761,34.000000,81,Y,Y81X,Stop


In [137]:
minimum_domain = 0
maximum_domain = 45

In [100]:
def DMS_heatmaps(data, metric):
    aa_order = ['A','I','L','M','V','C','G','P','S','T','N','Q','K','R','H','D','E','F','W','Y']
    tooltips = [c for c in data.columns if c not in
                {'Residue'}]

    # everything is site v mutant
    base = (alt.Chart(data)
            .encode(x=alt.X('Residue:O',
                             axis=alt.Axis(values=[0,50,100,150,200,250,300,350])),
                    y=alt.Y('Amino Acid:O',
                            sort=aa_order,
                            axis=alt.Axis(labelFontSize=12,
                                          titleFontSize=15))
                   )
           )
    heatmap = (base
               .mark_rect()
               .encode(color=alt.Color(metric).scale(scheme="redblue", domain=[minimum_domain, maximum_domain],
                       domainMid=20, clamp=True, reverse=True),
                       stroke=alt.value('black'),
                       strokeWidth=alt.condition(cell_selector,
                                                 alt.value(0.5),
                                                 alt.value(0)),
                       tooltip=tooltips
                      )
               .properties(
                width=1000)
              )
    
    return ((heatmap)
            .interactive()
            .add_params(cell_selector)  # mouse over highlighting
            .properties(height=500, title=' '.join(metric.split('_'))))


In [101]:


cell_selector = alt.selection_point(on='mouseover',
                                     empty='none')


cadd_heatmap = DMS_heatmaps(cadd_final, 'PHRED')

cadd_heatmap

  stacklevel=1,


In [102]:
config = {
    'interactive_heatmap': './plots/computational_predictor_heatmaps/CADD_v1.7_SMARCB1_heatmap.html'
}

# Print the file path where the chart will be saved
print(f"Saving chart to {config['interactive_heatmap']}")

# Ensure the directory exists
os.makedirs(os.path.dirname(config['interactive_heatmap']), exist_ok=True)

# Save the chart to the specified file path
cadd_heatmap.save(config['interactive_heatmap'])

Saving chart to ./plots/computational_predictor_heatmaps/CADD_v1.7_SMARCB1_heatmap.html


In [138]:
def DMS_heatmaps(data, metric):
    aa_order = ['A','I','L','M','V','C','G','P','S','T','N','Q','K','R','H','D','E','F','W','Y']
    tooltips = [c for c in data.columns if c not in
                {'Residue'}]

    # everything is site v mutant
    base = (alt.Chart(data)
            .encode(x=alt.X('Residue:O',
                             axis=alt.Axis(values=[0,50,100,150,200,250,300,350])),
                    y=alt.Y('Amino Acid:O',
                            sort=aa_order,
                            axis=alt.Axis(labelFontSize=12,
                                          titleFontSize=15))
                   )
           )
    heatmap = (base
               .mark_rect()
               .encode(color=alt.Color(metric).scale(scheme="redblue", domain=[minimum_domain, maximum_domain],
                       domainMid=20, clamp=True, reverse=True),
                       stroke=alt.value('black'),
                       strokeWidth=alt.condition(cell_selector,
                                                 alt.value(0.5),
                                                 alt.value(0)),
                       tooltip=tooltips
                      )
               .properties(
                width=1000)
              )
    
    return ((heatmap)
            .interactive()
            .add_params(cell_selector)  # mouse over highlighting
            .properties(height=250, title=' '.join(metric.split('_'))))


In [139]:


cell_selector = alt.selection_point(on='mouseover',
                                     empty='none')


cadd_heatmap = DMS_heatmaps(cadd_final, 'PHRED')

cadd_heatmap

  stacklevel=1,


In [140]:
config = {
    'interactive_heatmap': './plots/computational_predictor_heatmaps/CADD_v1.7_SMARCB1_heatmap_wide.html'
}

# Print the file path where the chart will be saved
print(f"Saving chart to {config['interactive_heatmap']}")

# Ensure the directory exists
os.makedirs(os.path.dirname(config['interactive_heatmap']), exist_ok=True)

# Save the chart to the specified file path
cadd_heatmap.save(config['interactive_heatmap'])

Saving chart to ./plots/computational_predictor_heatmaps/CADD_v1.7_SMARCB1_heatmap_wide.html


In [103]:
import altair as alt
import itertools
import pandas as pd
import numpy as np
import yaml
import os
import math

alt.data_transformers.disable_max_rows()

from vega_datasets import data

with open('config.yaml') as f:
    config = yaml.safe_load(f)



alphamissense = pd.read_csv(config['alphamissense'], sep='\t')

alphamissense

Unnamed: 0,uniprot_id,protein_variant,am_pathogenicity,am_class,Residue,Amino_Acid_Alt,Mutation_type
0,Q12824,M1A,0.1541,benign,1,A,M
1,Q12824,M1C,0.1262,benign,1,C,M
2,Q12824,M1D,0.5555,ambiguous,1,D,M
3,Q12824,M1E,0.3386,benign,1,E,M
4,Q12824,M1F,0.1195,benign,1,F,M
...,...,...,...,...,...,...,...
7310,Q12824,W385R,0.9817,pathogenic,385,R,M
7311,Q12824,W385S,0.6411,pathogenic,385,S,M
7312,Q12824,W385T,0.7140,pathogenic,385,T,M
7313,Q12824,W385V,0.6735,pathogenic,385,V,M


In [104]:
alphamissense_to_keep = {'am_pathogenicity': 'Score',
                 'Residue': 'Residue',
                 'protein_variant': 'Substitution',
                 'Amino_Acid_Alt': 'Amino Acid'
                      }

# merge the mutational effects and the site annotations
alphamissense_final = (alphamissense[list(alphamissense_to_keep)]
      .rename(columns=alphamissense_to_keep)
     )

alphamissense_final


Unnamed: 0,Score,Residue,Substitution,Amino Acid
0,0.1541,1,M1A,A
1,0.1262,1,M1C,C
2,0.5555,1,M1D,D
3,0.3386,1,M1E,E
4,0.1195,1,M1F,F
...,...,...,...,...
7310,0.9817,385,W385R,R
7311,0.6411,385,W385S,S
7312,0.7140,385,W385T,T
7313,0.6735,385,W385V,V


In [105]:
minimum_domain = 0
maximum_domain = 1

In [106]:
def DMS_heatmaps(data, metric):
    aa_order = ['A','I','L','M','V','C','G','P','S','T','N','Q','K','R','H','D','E','F','W','Y']
    tooltips = [c for c in data.columns if c not in
                {'Residue'}]

    # everything is site v mutant
    base = (alt.Chart(data)
            .encode(x=alt.X('Residue:O',
                             axis=alt.Axis(values=[0,50,100,150,200,250,300,350])),
                    y=alt.Y('Amino Acid:O',
                            sort=aa_order,
                            axis=alt.Axis(labelFontSize=12,
                                          titleFontSize=15))
                   )
           )
    heatmap = (base
               .mark_rect()
               .encode(color=alt.Color(metric).scale(scheme="redblue", domain=[minimum_domain, maximum_domain],
                       domainMid=0.5, clamp=True, reverse=True),
                       stroke=alt.value('black'),
                       strokeWidth=alt.condition(cell_selector,
                                                 alt.value(0.5),
                                                 alt.value(0)),
                       tooltip=tooltips
                      )
               .properties(
                width=1000)
              )
    
    return ((heatmap)
            .interactive()
            #.add_selection(subset_select)  # add dropdown menu
            .add_params(cell_selector)  # mouse over highlighting
            #.transform_filter(subset_select)  # add dropdown filtering
            #.transform_filter(zoom_brush)  # add zoom bar filtering
            .properties(height=250, title=' '.join(metric.split('_'))))

In [107]:


cell_selector = alt.selection_point(on='mouseover',
                                     empty='none')


alphamissense_heatmap = DMS_heatmaps(alphamissense_final, 'Score')

alphamissense_heatmap

  stacklevel=1,


In [108]:
config = {
    'interactive_heatmap': './plots/computational_predictor_heatmaps/alphamissense_SMARCB1_heatmap.html'
}

# Print the file path where the chart will be saved
print(f"Saving chart to {config['interactive_heatmap']}")

# Ensure the directory exists
os.makedirs(os.path.dirname(config['interactive_heatmap']), exist_ok=True)

# Save the chart to the specified file path
alphamissense_heatmap.save(config['interactive_heatmap'])

Saving chart to ./plots/computational_predictor_heatmaps/alphamissense_SMARCB1_heatmap.html


# Part 2: Plotting Residue Averaged Scores for all three Computational Predictors

## First we do it for REVEL

In [109]:
alt.data_transformers.disable_max_rows()

from vega_datasets import data

with open('config.yaml') as f:
    config = yaml.safe_load(f)


revel_res_avg = pd.read_csv(config['revel_res_avg'], sep='\t')

revel_res_avg

Unnamed: 0,Codon,res_avg_REVEL,rank
0,188,0.971167,1
1,293,0.937222,2
2,281,0.931800,3
3,351,0.931000,4
4,349,0.930600,5
...,...,...,...
380,72,0.288333,381
381,71,0.284500,382
382,76,0.269200,383
383,183,0.230500,384


In [110]:
minimum_domain = 0
maximum_domain = 1

def DMS_heatmaps(data, metric):
    """Create main heatmap for one condition.
    The heatmap is the results of three layers.
    *heatmap* is the main DMS data
    *wildtype* marks wildtype data with an 'x'
    *nulls* creates grey cells for missing data.
    If you exclude nulls, missing data is white, 
    which is appropriate for some color schemes
    but not all.
    Parameters
    ----------
    data :pandas.DataFrame
        Main dataframe
    metric : str
        Column in `data` with values to color by.
    Returns
    -------
    altair.Chart
    """

    # everything is site v mutant
    base = (alt.Chart(data)
            .encode(x=alt.X('Codon:O',
                             axis=alt.Axis(values=[0,50,100,150,200,250,300,350]))
                   )
           )
    heatmap = (base
               .mark_rect()
               .encode(color=alt.Color(metric).scale(scheme="redblue", domain=[0, 1],
                       domainMid=0.5, clamp=True, reverse=True),
                      )
               .properties(
                width=800)
              )
    
    
    return (heatmap)





In [111]:
REVEL_res_avg_heatmap = DMS_heatmaps(revel_res_avg, 'res_avg_REVEL')

REVEL_res_avg_heatmap

In [112]:
# Specify the file path where the histogram will be saved
config = {
    'av_heatmap': 'plots/computational_predictor_heatmaps/revel_residue_average_heatmap.html'
}

# Print the file path where the chart will be saved
print(f"Saving chart to {config['av_heatmap']}")

# Ensure the directory exists
os.makedirs(os.path.dirname(config['av_heatmap']), exist_ok=True)

# Save the histogram to the specified file path
REVEL_res_avg_heatmap.save(config['av_heatmap'])

Saving chart to plots/computational_predictor_heatmaps/revel_residue_average_heatmap.html


## Next we do it for CADD

In [113]:
alt.data_transformers.disable_max_rows()

from vega_datasets import data

with open('config.yaml') as f:
    config = yaml.safe_load(f)


cadd_res_avg = pd.read_csv(config['cadd_res_avg'], sep='\t')

cadd_res_avg

Unnamed: 0,Codon,res_avg_PHRED,rank
0,31,35.250000,1
1,216,34.250000,2
2,281,33.600000,3
3,316,33.400000,4
4,373,33.133333,5
...,...,...,...
381,103,19.445143,382
382,171,19.302786,383
383,91,18.955000,384
384,79,17.228786,385


In [114]:

def DMS_heatmaps(data, metric):
    """Create main heatmap for one condition.
    The heatmap is the results of three layers.
    *heatmap* is the main DMS data
    *wildtype* marks wildtype data with an 'x'
    *nulls* creates grey cells for missing data.
    If you exclude nulls, missing data is white, 
    which is appropriate for some color schemes
    but not all.
    Parameters
    ----------
    data :pandas.DataFrame
        Main dataframe
    metric : str
        Column in `data` with values to color by.
    Returns
    -------
    altair.Chart
    """

    # everything is site v mutant
    base = (alt.Chart(data)
            .encode(x=alt.X('Codon:O',
                             axis=alt.Axis(values=[0,50,100,150,200,250,300,350]))
                   )
           )
    heatmap = (base
               .mark_rect()
               .encode(color=alt.Color(metric).scale(scheme="redblue", domain=[0, 35],
                       domainMid=25, clamp=True, reverse=True),
                      )
               .properties(
                width=800)
              )
    
    
    return (heatmap)





In [115]:
CADD_res_avg_heatmap = DMS_heatmaps(cadd_res_avg, 'res_avg_PHRED')

CADD_res_avg_heatmap

In [116]:
# Specify the file path where the histogram will be saved
config = {
    'av_heatmap': 'plots/computational_predictor_heatmaps/cadd_residue_average_heatmap.html'
}

# Print the file path where the chart will be saved
print(f"Saving chart to {config['av_heatmap']}")

# Ensure the directory exists
os.makedirs(os.path.dirname(config['av_heatmap']), exist_ok=True)

# Save the histogram to the specified file path
CADD_res_avg_heatmap.save(config['av_heatmap'])

Saving chart to plots/computational_predictor_heatmaps/cadd_residue_average_heatmap.html


## Next we do it for AlphaMissense

In [117]:
alt.data_transformers.disable_max_rows()

from vega_datasets import data

with open('config.yaml') as f:
    config = yaml.safe_load(f)


am_res_avg = pd.read_csv(config['alphamissense_res_avg'], sep='\t')

am_res_avg

Unnamed: 0,Residue,res_avg_AM_score,rank
0,370,0.999874,1
1,33,0.999868,2
2,373,0.999700,3
3,41,0.999647,4
4,377,0.999632,5
...,...,...,...
380,183,0.170937,381
381,75,0.130547,382
382,74,0.117326,383
383,76,0.112784,384


In [118]:

def DMS_heatmaps(data, metric):
    """Create main heatmap for one condition.
    The heatmap is the results of three layers.
    *heatmap* is the main DMS data
    *wildtype* marks wildtype data with an 'x'
    *nulls* creates grey cells for missing data.
    If you exclude nulls, missing data is white, 
    which is appropriate for some color schemes
    but not all.
    Parameters
    ----------
    data :pandas.DataFrame
        Main dataframe
    metric : str
        Column in `data` with values to color by.
    Returns
    -------
    altair.Chart
    """

    # everything is site v mutant
    base = (alt.Chart(data)
            .encode(x=alt.X('Residue:O',
                             axis=alt.Axis(values=[0,50,100,150,200,250,300,350]))
                   )
           )
    heatmap = (base
               .mark_rect()
               .encode(color=alt.Color(metric).scale(scheme="redblue", domain=[0, 1],
                       domainMid=0.5, clamp=True, reverse=True),
                      )
               .properties(
                width=800)
              )
    
    
    return (heatmap)




In [119]:
AM_res_avg_heatmap = DMS_heatmaps(am_res_avg, 'res_avg_AM_score')

AM_res_avg_heatmap

In [120]:
# Specify the file path where the histogram will be saved
config = {
    'av_heatmap': 'plots/computational_predictor_heatmaps/alphamissense_residue_average_heatmap.html'
}

# Print the file path where the chart will be saved
print(f"Saving chart to {config['av_heatmap']}")

# Ensure the directory exists
os.makedirs(os.path.dirname(config['av_heatmap']), exist_ok=True)

# Save the histogram to the specified file path
AM_res_avg_heatmap.save(config['av_heatmap'])

Saving chart to plots/computational_predictor_heatmaps/alphamissense_residue_average_heatmap.html


In [143]:
import altair as alt
import itertools
import pandas as pd
import numpy as np
import yaml
import os
import math

alt.data_transformers.disable_max_rows()

from vega_datasets import data

with open('config.yaml') as f:
    config = yaml.safe_load(f)


sift = pd.read_csv(config['SIFT_SMARCB1'], sep='\t')

sift

Unnamed: 0,#Position,Ref_allele,New_allele,Transcript_id,Gene_id,Gene_name,Region,Ref_amino_acid,New_amino_acid,Position_of_amino_acid_substitution,SIFT_score,SIFT_median_sequence_info,Num_seqs_at_position,dbSNP_id
0,23787170,A,C,ENST00000263121,ENSG00000099956,SMARCB1,CDS,M,L,1,0.566,4.32,6,novel
1,23787170,A,G,ENST00000263121,ENSG00000099956,SMARCB1,CDS,M,V,1,0.055,4.32,6,rs367768260
2,23787170,A,T,ENST00000263121,ENSG00000099956,SMARCB1,CDS,M,L,1,0.566,4.32,6,novel
3,23787171,T,A,ENST00000263121,ENSG00000099956,SMARCB1,CDS,M,K,1,0.007,4.32,6,novel
4,23787171,T,C,ENST00000263121,ENSG00000099956,SMARCB1,CDS,M,T,1,0.012,4.32,6,novel
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2551,23834175,T,G,ENST00000263121,ENSG00000099956,SMARCB1,CDS,W,G,385,0.000,3.86,16,novel
2552,23834176,G,C,ENST00000263121,ENSG00000099956,SMARCB1,CDS,W,S,385,0.000,3.86,16,novel
2553,23834176,G,T,ENST00000263121,ENSG00000099956,SMARCB1,CDS,W,L,385,0.000,3.86,16,novel
2554,23834177,G,C,ENST00000263121,ENSG00000099956,SMARCB1,CDS,W,C,385,0.000,3.86,16,novel


In [146]:
minimum_domain = 0
maximum_domain = 1

def DMS_heatmaps(data, metric):
    aa_order = ['A','I','L','M','V','C','G','P','S','T','N','Q','K','R','H','D','E','F','W','Y']
    tooltips = [c for c in data.columns if c not in
                {'Position_of_amino_acid_substitution'}]

    # everything is site v mutant
    base = (alt.Chart(data)
            .encode(x=alt.X('Position_of_amino_acid_substitution:O',
                             axis=alt.Axis(values=[0,50,100,150,200,250,300,350])),
                    y=alt.Y('New_amino_acid:O',
                            sort=aa_order,
                            axis=alt.Axis(labelFontSize=12,
                                          titleFontSize=15))
                   )
           )
    heatmap = (base
               .mark_rect()
               .encode(color=alt.Color(metric).scale(scheme="redblue", domain=[minimum_domain, maximum_domain],
                       domainMid=0.5, clamp=True, reverse=True),
                       stroke=alt.value('black'),
                       strokeWidth=alt.condition(cell_selector,
                                                 alt.value(0.5),
                                                 alt.value(0)),
                       tooltip=tooltips
                      )
               .properties(
                width=1000)
              )
    
    return ((heatmap)
            .interactive()
            #.add_selection(subset_select)  # add dropdown menu
            .add_params(cell_selector)  # mouse over highlighting
            #.transform_filter(subset_select)  # add dropdown filtering
            #.transform_filter(zoom_brush)  # add zoom bar filtering
            .properties(height=250, title=' '.join(metric.split('_'))))

In [147]:
cell_selector = alt.selection_point(on='mouseover',
                                     empty='none')


sift_heatmap = DMS_heatmaps(sift, 'SIFT_score')

sift_heatmap

In [124]:
config = {
    'interactive_heatmap': './plots/computational_predictor_heatmaps/SIFT_sift4g_SMARCB1_heatmap.html'
}

# Print the file path where the chart will be saved
print(f"Saving chart to {config['interactive_heatmap']}")

# Ensure the directory exists
os.makedirs(os.path.dirname(config['interactive_heatmap']), exist_ok=True)

# Save the chart to the specified file path
sift_heatmap.save(config['interactive_heatmap'])

Saving chart to ./plots/computational_predictor_heatmaps/SIFT_sift4g_SMARCB1_heatmap.html


Polyphen Input

In [125]:
import altair as alt
import itertools
import pandas as pd
import numpy as np
import yaml
import os
import math

alt.data_transformers.disable_max_rows()

from vega_datasets import data

with open('config.yaml') as f:
    config = yaml.safe_load(f)


polyphen = pd.read_csv(config['polyphen_SMARCB1'], sep='\t')
polyphen



# Remove all spaces
polyphen_no_spaces = polyphen.applymap(lambda x: x.replace(' ', '') if isinstance(x, str) else x)


polyphen_no_spaces

Unnamed: 0,#o_acc,o_pos,o_aa1,o_aa2,rsid,acc,pos,aa1,aa2,prediction,pph2_prob,pph2_FPR,pph2_TPR
0,Q12824,1,M,A,?,Q12824,1,M,A,benign,0.000,1.00000,1.00000
1,Q12824,1,M,C,?,Q12824,1,M,C,benign,0.057,0.16200,0.93900
2,Q12824,1,M,D,?,Q12824,1,M,D,benign,0.001,0.85200,0.99400
3,Q12824,1,M,E,?,Q12824,1,M,E,benign,0.000,1.00000,1.00000
4,Q12824,1,M,F,?,Q12824,1,M,F,benign,0.002,0.70400,0.98700
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7310,Q12824,385,W,R,?,Q12824,385,W,R,probablydamaging,0.999,0.00574,0.13600
7311,Q12824,385,W,S,?,Q12824,385,W,S,probablydamaging,0.999,0.00574,0.13600
7312,Q12824,385,W,T,?,Q12824,385,W,T,probablydamaging,1.000,0.00026,0.00018
7313,Q12824,385,W,V,?,Q12824,385,W,V,probablydamaging,1.000,0.00026,0.00018


In [126]:
minimum_domain = 0
maximum_domain = 1

def DMS_heatmaps(data, metric):
    aa_order = ['A','I','L','M','V','C','G','P','S','T','N','Q','K','R','H','D','E','F','W','Y']
    tooltips = [c for c in data.columns if c not in
                {'o_pos'}]

    # everything is site v mutant
    base = (alt.Chart(data)
            .encode(x=alt.X(' o_pos:O',
                             axis=alt.Axis(values=[0,50,100,150,200,250,300,350])),
                    y=alt.Y('o_aa2:O',
                            sort=aa_order,
                            axis=alt.Axis(labelFontSize=12,
                                          titleFontSize=15))
                   )
           )
    heatmap = (base
               .mark_rect()
               .encode(color=alt.Color(metric).scale(scheme="redblue", domain=[minimum_domain, maximum_domain],
                       domainMid=0.5, clamp=True, reverse=True),
                       stroke=alt.value('black'),
                       strokeWidth=alt.condition(cell_selector,
                                                 alt.value(0.5),
                                                 alt.value(0)),
                       tooltip=tooltips
                      )
               .properties(
                width=1000)
              )
    
    return ((heatmap)
            .interactive()
            #.add_selection(subset_select)  # add dropdown menu
            .add_params(cell_selector)  # mouse over highlighting
            #.transform_filter(subset_select)  # add dropdown filtering
            #.transform_filter(zoom_brush)  # add zoom bar filtering
            .properties(height=250, title=' '.join(metric.split('_'))))

In [127]:
cell_selector = alt.selection_point(on='mouseover',
                                     empty='none')


polyphen_heatmap = DMS_heatmaps(polyphen_no_spaces, ' pph2_prob')

polyphen_heatmap

  stacklevel=1,


In [128]:
config = {
    'interactive_heatmap': './plots/computational_predictor_heatmaps/PolyPhen_SMARCB1_heatmap.html'
}

# Print the file path where the chart will be saved
print(f"Saving chart to {config['interactive_heatmap']}")

# Ensure the directory exists
os.makedirs(os.path.dirname(config['interactive_heatmap']), exist_ok=True)

# Save the chart to the specified file path
polyphen_heatmap.save(config['interactive_heatmap'])

Saving chart to ./plots/computational_predictor_heatmaps/PolyPhen_SMARCB1_heatmap.html
