In [4]:
import os
import sys
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.io as pio

sys.path.insert(1, f"./../")
from db import session, Protein, Pk_sim, SequenceAlign, Pk, Residue_props, Residue, db

In [55]:
pis = session.query(Pk_sim.isoelectric_point).filter(Pk_sim.isoelectric_point != None).all()

In [71]:
bins = [3+i+0.5 for i in range(9)]
bins_labels = [3+i+1 for i in range(8)]
y, x = np.histogram(pis, bins=bins, density=True)

In [72]:
df_pis = pd.DataFrame({'x': bins_labels, 'y': y})

In [73]:
pio.templates.default = "simple_white"

fig = px.bar(df_pis, x='x', y='y', labels={'x': 'Isoelectric Point', 'y': 'Probability Density'})

fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = [3, 4, 5, 6, 7, 8, 9, 10, 11]        
    )
)

fig.update_traces(marker_color='SlateGrey', marker_line_color='white',
                  marker_line_width=1, opacity=1)


fig.update_layout(
    font=dict(
        size=18,        
    )
)


fig.show()

In [44]:
dpk_sasa = session.query(Pk.dpk, Residue_props.sasa_r).filter(Pk.resid == Residue_props.resid).limit(100000).all()

In [45]:
df_dpk_sasa = pd.DataFrame(dpk_sasa, columns=['dpk', 'sasa'])

In [46]:
df_dpk_sasa['dpk_abs'] = abs(df_dpk_sasa['dpk'])

In [53]:
pio.templates.default = "none"

fig = px.scatter(df_dpk_sasa, x='sasa', y='dpk_abs', labels={'sasa': 'SASA<sub>r</sub>', 'dpk_abs': '|ΔpK<sub>a</sub>|'})

fig.update_traces(
    marker=dict(
            color='SlateGrey',
            size=1,
            line=dict(
                color='SlateGrey',
                width=1
            )
    )
)

fig.update_layout(
    font=dict(
        size=18,        
    )
)
        
fig.show()

In [26]:
df_exp = pd.read_csv('../../initial/WT_pka.csv')

In [27]:
ids = [i.lower() for i in df_exp['PDB ID'].unique()]
df_exp['pdbids'] = [i.lower() for i in df_exp['PDB ID']]

In [28]:
df_exp_pkpdb = session.query(Protein.idcode, Residue.resid, Residue.chain, Residue.residue_type, Residue.residue_number).join(Protein, Residue.pid == Protein.pid).filter(Protein.idcode.in_(ids)).all()

In [29]:
df_exp_pkpdb = pd.DataFrame(df_exp_pkpdb, columns=['idcode', 'resid', 'chain', 'residue_type', 'residue_number'])

In [30]:
merged = df_exp_pkpdb.merge(df_exp, left_on=['idcode', 'chain', 'residue_type', 'residue_number'], right_on=['pdbids', 'Chain', 'Res Name', 'Res ID'])

In [84]:
exp_sasa = session.query(Residue_props.sasa_r).filter(Residue_props.resid.in_(merged.resid)).filter(Residue_props.sasa_r != None).all()
all_sasa = session.query(Residue_props.sasa_r).join(Pk, Pk.resid==Residue_props.resid).filter(Residue_props.sasa_r != None).filter(Pk.dpk != None).all()

In [93]:
bins = [i*0.1 for i in range(11)]
bins_labels = [i*0.1+0.05 for i in range(10)]
y, x = np.histogram(exp_sasa, bins=bins, density=False)

df_sasa_exp = pd.DataFrame({'x': bins_labels, 'y': y / len(exp_sasa), 'z': 'Experimental'})

y, x = np.histogram(all_sasa, bins=bins, density=False)
df_sasa_all = pd.DataFrame({'x': bins_labels, 'y': y / len(all_sasa), 'z': 'PKPDB'})

df_sasa = df_sasa_exp.append(df_sasa_all)

In [94]:
pio.templates.default = "simple_white"

fig = px.bar(df_sasa, x='x', y='y', color='z', barmode='group', labels={'x': 'SASA<sub>r</sub>', 'y': 'Probability (%)', 'z': ''}, color_discrete_sequence =['SlateGrey', 'black'])

fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = bins
    )
)
    
fig.update_layout(
    font=dict(
        size=18,        
    )
)


fig.show()

In [81]:
exp_hse = session.query(Residue_props.hsecn).filter(Residue_props.resid.in_(merged.resid)).filter(Residue_props.hsecn != None).all()
all_hse = session.query(Residue_props.hsecn).join(Pk, Pk.resid==Residue_props.resid).filter(Residue_props.hsecn != None).filter(Pk.dpk != None).all()

In [97]:
bins = [i*5 for i in range(13)]
bins_labels = [i*5+2.5 for i in range(12)]
y, x = np.histogram(exp_hse, bins=bins, density=False)

df_hse_exp = pd.DataFrame({'x': bins_labels, 'y': y / len(exp_hse), 'z': 'Experimental'})

y, x = np.histogram(all_hse, bins=bins, density=False)
df_hse_all = pd.DataFrame({'x': bins_labels, 'y': y / len(all_hse), 'z': 'PKPDB'})

df_hse = df_hse_exp.append(df_hse_all)


In [98]:
pio.templates.default = "simple_white"

fig = px.bar(df_hse, x='x', y='y', color='z', barmode='group', labels={'x': 'HSE<sub>CN</sub>', 'y': 'Probability (%)', 'z': ''}, color_discrete_sequence =['SlateGrey', 'black'])

fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = bins
    )
)
    
fig.update_layout(
    font=dict(
        size=18,        
    )
)


fig.show()