In [8]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from scipy.stats import ttest_ind

In [9]:
df = pd.read_csv("/scratch/ASD_Release_201909_AP.txt", sep='\t')
mut = pd.read_csv("/scratch/mutations_gpcr.csv")

In [10]:
uniprot_ids = ['Q14832', 'P51686', 'Q14416', 'Q13467', 'P21730', 'P16473',
       'P47871', 'Q14831', 'Q9UBS5', 'O75473', 'P25116', 'P43220',
       'O00222', 'P28222', 'Q9BXB1', 'P61073', 'P25024', 'Q96RI0',
       'P41143', 'P25103', 'P29274', 'P07550']
df = df[df["uniprot_id"].isin(uniprot_ids)]

In [11]:
data = df.apply(
    lambda row: [f"{row['uniprot_id']},{x}" for x in row["site_residue"].split(";")], 
    axis=1
).sum()

data = pd.DataFrame([x.split(",") for x in data], columns=["uniprot_id", "from_big", "from", "pos", "chain"])

In [12]:
data["pos"] = data["pos"].astype(str)
mut["pos"] = mut["pos"].astype(str)

merge = mut.merge(data.drop(["from_big", "chain"], axis=1), on=["pos", "uniprot_id"], how="left").drop_duplicates()

In [13]:
allosteric_sites = merge[(merge["from_y"].notna()) & (merge["from_x"] == merge["from_y"])]
non_allosteric_sites = merge[merge["from_y"].isna()]
allosteric_sites["type"] = "allosteric"
non_allosteric_sites["type"] = "non allosteric"

In [14]:
col=['#f7d908', '#f4080b']
fig = ff.create_distplot([allosteric_sites["dist"],
                          non_allosteric_sites["dist"]],
                         ["allosteric", "non allosteric"], colors=col,
                         show_rug=False, show_hist=False)
fig.update_layout(
    title="Mutations in allosteric sites"
)
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="right",
    x=0.99
), 
                  margin=dict(l=20, r=20, t=40, b=20), 
                   paper_bgcolor='rgba(0,0,0,0)',
                   plot_bgcolor='rgba(0,0,0,0)'
                 )
fig.update_xaxes(range=[0, 0.5])              
fig.show()
#fig.write_image("allosteric.png", scale=5)
