In [2]:
import altair as alt
import itertools
import pandas as pd
import numpy as np
import yaml
import os
import math
from scipy.stats import gaussian_kde

alt.data_transformers.disable_max_rows()

from vega_datasets import data

with open('config.yaml') as f:
    config = yaml.safe_load(f)


cadd = pd.read_csv(config['cadd_duplicate_avg'], sep='\t')

cadd

Unnamed: 0,AA_Substitution,#Chrom,Pos,Ref,Alt,RawScore,Site,Codon,Ref_Codon,codon_site,Alt_Codon,Amino_Acid_Ref,Amino_Acid_Alt,Mutation_type,PHRED
0,A109A,22,23793653,T,A,0.155873,327,109,GCT,3,GCA,A,A,S,2.005333
1,A109D,22,23793652,C,A,4.928915,326,109,GCT,2,GAT,A,D,M,27.500000
2,A109G,22,23793652,C,G,4.050355,326,109,GCT,2,GGT,A,G,M,24.200000
3,A109P,22,23793651,G,C,5.253825,325,109,GCT,1,CCT,A,P,M,29.500000
4,A109S,22,23793651,G,T,3.796234,325,109,GCT,1,TCT,A,S,M,23.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2758,Y81H,22,23793567,T,C,2.818137,241,81,TAC,1,CAC,Y,H,M,20.400000
2759,Y81N,22,23793567,T,A,3.018291,241,81,TAC,1,AAC,Y,N,M,21.200000
2760,Y81S,22,23793568,A,C,2.857949,242,81,TAC,2,TCC,Y,S,M,20.600000
2761,Y81X,22,23793569,C,A,6.353594,243,81,TAC,3,TAA,Y,Stop,Stop,34.000000


In [3]:
import numpy as np
import scipy.stats as stats
import altair as alt
import pandas as pd
from scipy.ndimage import gaussian_filter1d

# Assuming 'revel' is your DataFrame

# Define the step value
step = 50  # Adjust height for each facet

# Desired order for mutation_type facets
facet_order = ['Stop', 'M', 'S']

mutation_types = ['Stop', 'M', 'S']

# Define colors for each facet
colors = {
    'Stop': 'red',
    'M': 'gray',
    'S': 'black'
}

# Convert 'Mutation_Type' to a categorical type with a specified order
cadd['Mutation_type'] = pd.Categorical(cadd['Mutation_type'], categories=facet_order, ordered=True)

# Create individual charts for each facet
charts = []
for i, mutation_type in enumerate(mutation_types):
    df_subset = cadd[cadd['Mutation_type'] == mutation_type]

    
    if df_subset.empty or len(df_subset['PHRED']) < 2:  # Skip if not enough data
        print(f"No data for mutation type: {mutation_type}")
        continue

    kde = gaussian_kde(df_subset['PHRED'], bw_method=0.3)
    x_vals = np.linspace(df_subset['PHRED'].min()-5, df_subset['PHRED'].max()+5, 10000)
    y_vals = kde(x_vals)
    y_vals_normalized = y_vals / np.max(y_vals)
    x_vals[x_vals < 0] = 0

    plot_data = pd.DataFrame({'PHRED': x_vals, 'density_normalized': y_vals_normalized})

    chart = alt.Chart(plot_data, width=300, height=step).mark_area().encode(
        alt.X(
        "PHRED:Q",
        scale=alt.Scale(domain=[0, 99]),  # Set the x-axis range
        axis=alt.Axis(title=None, labels=(i == len(mutation_types) - 1))
    ),
        alt.Y("density_normalized:Q", scale=alt.Scale(domain=[0, 1])),
        color=alt.value(colors[mutation_type])
    )
    charts.append(chart)
    
# Combine individual charts into a single layout
combined_chart_CADD = alt.vconcat(*charts, spacing=0).configure_axis(
    grid=False
).configure_view(
    stroke=None
)


combined_chart_CADD

In [4]:
# Specify the file path where the histogram will be saved
config = {
    'histogram': './plots/DMS_plots/density_plots/histogram_CADD_mutants.html'
}

# Print the file path where the chart will be saved
print(f"Saving chart to {config['histogram']}")

# Ensure the directory exists
os.makedirs(os.path.dirname(config['histogram']), exist_ok=True)

# Save the histogram to the specified file path
combined_chart_CADD.save(config['histogram'])

Saving chart to ./plots/DMS_plots/density_plots/histogram_CADD_mutants.html


In [15]:
import altair as alt
import itertools
import pandas as pd
import numpy as np
import yaml
import os
import math

alt.data_transformers.disable_max_rows()

from vega_datasets import data

with open('config.yaml') as f:
    config = yaml.safe_load(f)


am = pd.read_csv(config['alphamissense'], sep='\t')
am

Unnamed: 0,uniprot_id,protein_variant,am_pathogenicity,am_class,Residue,Amino_Acid_Alt,Mutation_type
0,Q12824,M1A,0.1541,benign,1,A,M
1,Q12824,M1C,0.1262,benign,1,C,M
2,Q12824,M1D,0.5555,ambiguous,1,D,M
3,Q12824,M1E,0.3386,benign,1,E,M
4,Q12824,M1F,0.1195,benign,1,F,M
...,...,...,...,...,...,...,...
7310,Q12824,W385R,0.9817,pathogenic,385,R,M
7311,Q12824,W385S,0.6411,pathogenic,385,S,M
7312,Q12824,W385T,0.7140,pathogenic,385,T,M
7313,Q12824,W385V,0.6735,pathogenic,385,V,M


In [16]:
import numpy as np
import scipy.stats as stats
import altair as alt
import pandas as pd
from scipy.ndimage import gaussian_filter1d

# Assuming 'revel' is your DataFrame

# Define the step value
step = 50  # Adjust height for each facet

# Desired order for mutation_type facets
facet_order = ['M']

mutation_types = ['M']

# Define colors for each facet
colors = {
    'M': 'gray'
}

# Convert 'Mutation_Type' to a categorical type with a specified order
am['Mutation_type'] = pd.Categorical(am['Mutation_type'], categories=facet_order, ordered=True)

# Create individual charts for each facet
charts = []
for i, mutation_type in enumerate(mutation_types):
    df_subset = am[am['Mutation_type'] == mutation_type]

    
    if df_subset.empty or len(df_subset['am_pathogenicity']) < 2:  # Skip if not enough data
        print(f"No data for mutation type: {mutation_type}")
        continue

    kde = gaussian_kde(df_subset['am_pathogenicity'], bw_method=0.1)
    x_vals = np.linspace(df_subset['am_pathogenicity'].min()-5, df_subset['am_pathogenicity'].max(), 10000)
    y_vals = kde(x_vals)
    y_vals_normalized = y_vals / np.max(y_vals)
    x_vals[x_vals < 0] = 0
    valid_mask = x_vals <= 1

    plot_data = pd.DataFrame({'am_pathogenicity': x_vals, 'density_normalized': y_vals_normalized})

    chart = alt.Chart(plot_data, width=300, height=step).mark_area().encode(
        alt.X(
        "am_pathogenicity:Q",
        scale=alt.Scale(domain=[0, 1]),  # Set the x-axis range
        axis=alt.Axis(title=None, labels=(i == len(mutation_types) - 1))
    ),
        alt.Y("density_normalized:Q", scale=alt.Scale(domain=[0, 1])),
        color=alt.value(colors[mutation_type])
    )
    charts.append(chart)
    
# Combine individual charts into a single layout
combined_chart_AM = alt.vconcat(*charts, spacing=0).configure_axis(
    grid=False
).configure_view(
    stroke=None
)


combined_chart_AM

In [17]:
# Specify the file path where the histogram will be saved
config = {
    'histogram': './plots/DMS_plots/density_plots/histogram_alphamissense_mutants.html'
}

# Print the file path where the chart will be saved
print(f"Saving chart to {config['histogram']}")

# Ensure the directory exists
os.makedirs(os.path.dirname(config['histogram']), exist_ok=True)

# Save the histogram to the specified file path
combined_chart_AM.save(config['histogram'])

Saving chart to ./plots/DMS_plots/density_plots/histogram_alphamissense_mutants.html


In [20]:
import altair as alt
import itertools
import pandas as pd
import numpy as np
import yaml
import os
import math
from scipy.stats import gaussian_kde

alt.data_transformers.disable_max_rows()

from vega_datasets import data

with open('config.yaml') as f:
    config = yaml.safe_load(f)



revel = pd.read_csv(config['revel_duplicate_avg'], sep='\t')
revel

Unnamed: 0,AA_Substitution,chr,Pos,ref,alt,aaref,aaalt,Ensembl_transcriptid,Codon,Site,Ref_Codon,codon_site,Alt_Codon,Amino_Acid_Ref,Amino_Acid_Alt,Mutation_type,REVEL
0,A109D,22,23793652,C,A,A,D,ENST00000417137;ENST00000344921;ENST0000026312...,109,326,GCT,2,GAT,A,D,M,0.675
1,A109G,22,23793652,C,G,A,G,ENST00000417137;ENST00000344921;ENST0000026312...,109,326,GCT,2,GGT,A,G,M,0.554
2,A109P,22,23793651,G,C,A,P,ENST00000417137;ENST00000344921;ENST0000026312...,109,325,GCT,1,CCT,A,P,M,0.697
3,A109S,22,23793651,G,T,A,S,ENST00000417137;ENST00000344921;ENST0000026312...,109,325,GCT,1,TCT,A,S,M,0.549
4,A109T,22,23793651,G,A,A,T,ENST00000417137;ENST00000344921;ENST0000026312...,109,325,GCT,1,ACT,A,T,M,0.586
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2269,Y81D,22,23793567,T,G,Y,D,ENST00000417137;ENST00000344921;ENST0000026312...,81,241,TAC,1,GAC,Y,D,M,0.602
2270,Y81F,22,23793568,A,T,Y,F,ENST00000417137;ENST00000344921;ENST0000026312...,81,242,TAC,2,TTC,Y,F,M,0.490
2271,Y81H,22,23793567,T,C,Y,H,ENST00000417137;ENST00000344921;ENST0000026312...,81,241,TAC,1,CAC,Y,H,M,0.514
2272,Y81N,22,23793567,T,A,Y,N,ENST00000417137;ENST00000344921;ENST0000026312...,81,241,TAC,1,AAC,Y,N,M,0.559


In [21]:
import numpy as np
import scipy.stats as stats
import altair as alt
import pandas as pd
from scipy.ndimage import gaussian_filter1d

# Assuming 'revel' is your DataFrame

# Define the step value
step = 50  # Adjust height for each facet

# Desired order for mutation_type facets
mutation_types = ['M']

# Define colors for each facet
colors = {
    'M': 'gray'
}

# Convert 'Mutation_Type' to a categorical type with a specified order
revel['Mutation_type'] = pd.Categorical(revel['Mutation_type'], categories=facet_order, ordered=True)

# Create individual charts for each facet
charts = []
for i, mutation_type in enumerate(mutation_types):
    df_subset = revel[revel['Mutation_type'] == mutation_type]

    
    if df_subset.empty or len(df_subset['REVEL']) < 2:  # Skip if not enough data
        print(f"No data for mutation type: {mutation_type}")
        continue

    kde = gaussian_kde(df_subset['REVEL'], bw_method=0.1)
    x_vals = np.linspace(df_subset['REVEL'].min(), df_subset['REVEL'].max(), 10000)
    y_vals = kde(x_vals)
    y_vals_normalized = y_vals / np.max(y_vals)

    plot_data = pd.DataFrame({'REVEL': x_vals, 'density_normalized': y_vals_normalized})

    chart = alt.Chart(plot_data, width=300, height=step).mark_area().encode(
        alt.X(
        "REVEL:Q",
        scale=alt.Scale(domain=[0, 1]),  # Set the x-axis range
        axis=alt.Axis(title=None, labels=(i == len(mutation_types) - 1))
    ),
        alt.Y("density_normalized:Q", scale=alt.Scale(domain=[0, 1])),
        color=alt.value(colors[mutation_type])
    )
    charts.append(chart)
    
# Combine individual charts into a single layout
combined_chart_REVEL = alt.vconcat(*charts, spacing=0).configure_axis(
    grid=False
).configure_view(
    stroke=None
)


combined_chart_REVEL

In [22]:
# Specify the file path where the histogram will be saved
config = {
    'histogram': './plots/DMS_plots/density_plots/histogram_REVEL_mutants.html'
}

# Print the file path where the chart will be saved
print(f"Saving chart to {config['histogram']}")

# Ensure the directory exists
os.makedirs(os.path.dirname(config['histogram']), exist_ok=True)

# Save the histogram to the specified file path
combined_chart_REVEL.save(config['histogram'])

Saving chart to ./plots/DMS_plots/density_plots/histogram_REVEL_mutants.html
