In [1]:
import pandas as pd
import altair as alt
import numpy as np
import scipy.stats as stats
import sbml_sim_helper as sim

In [104]:
def prepare_df(df):
    prot = 'sp|P01024|CO3_HUMAN'
    df = df[[col for col in df.columns if 'MonoHydro_' in col or 'XL_' in col]]
    df = pd.melt(df)
    df['variable'] = df['variable'].str.replace('\[|\]','', regex=True)
    split = df['variable'].str.split("_", expand=True)
    df["link_type"] = split[0]
    split[1] = split[1].map({'C3': prot})
    split[3] = split[3].map({'C3': prot})
    df['uID'] = np.where(df['link_type'] == 'XL', split[1] + ':' + split[2] + ':' + split[3] +':' + split[4], split[1] + ':' + split[2])
    df['uID_rev'] = np.where(df['link_type'] == 'XL', split[3] + ':' + split[4] + ':' + split[1] +':' + split[2], split[1] + ':' + split[2])
    return df


In [81]:
def get_log2_df(df_c3, df_c3b):
    df_merge = pd.merge(df_c3, df_c3b, on=['variable', 'link_type', 'uID', 'uID_rev'], suffixes=['_c3', '_c3b'])
    df_merge['log2ratio'] = np.log2(df_merge['value_c3b']/df_merge['value_c3'])
    df_merge = df_merge.replace([np.inf, -np.inf], np.nan).dropna().reset_index(drop=True)
    return df_merge


In [82]:
def get_delta_dist(x, ref_exp='c3', exp='c3b', metric='SASD'):
    if len(x) == 2:
        d_ref_exp = x[x['exp_name'] == ref_exp][metric].values[0]
        d_exp = x[x['exp_name'] == exp][metric].values[0]
        return d_exp - d_ref_exp
    else:
        return None

In [83]:
df_c3 = pd.read_csv('../output/c3_final_frame_asa.csv')
df_c3b = pd.read_csv('../output/c3b_final_frame_asa.csv')
df_dist = pd.read_csv('../input/jwalk_dist_combined.csv')
df_xtract = pd.read_csv('/home/kai/Projects/c3_v2/xtract_results_ld28/KK_c3_v2.analyzer.quant.xls', delimiter='\t').rename(columns={'type': 'link_type'})
df_xtract =df_xtract[df_xtract['sign'] == '==']
df_xtract = pd.read_csv('/home/kai/Projects/c3_v2/xtract_results_ld28/xtract_out_from_bagcontainer_sky.csv')

In [6]:
rr_c3 = sim.load_model('c3', 'asa')
rr_c3b = sim.load_model('c3b', 'asa')

Loading c3 model
Loading c3b model


In [7]:
rr_c3.resetToOrigin()
rr_c3b.resetToOrigin()

In [8]:
rr_c3.kh = 0.00001
#rr_c3.klys_C3_408 = 0
rr_c3b.kh = 0.00001
#rr_c3.klys_C3b_408 = 0
rr_c3.Crosslinker = 50
rr_c3b.Crosslinker = 50

In [22]:
l = rr_c3.getFloatingSpeciesIds()
l = [e for e in l if 'LYS_' in e]
for lys in l:
    setattr(rr_c3, lys, 0.5)
l = rr_c3b.getFloatingSpeciesIds()
l = [e for e in l if 'LYS_' in e]
for lys in l:
    setattr(rr_c3b, lys, 0.5)

In [23]:
res_c3 = sim.simulate(rr_c3)
res_c3b = sim.simulate(rr_c3b)

Starting Simulation
convergence 2.248428495734978e-18
Starting Simulation
convergence 9.557500213315073e-19


In [24]:
df_c3 = sim.get_final_frame(res_c3)
df_c3b = sim.get_final_frame(res_c3b)

In [105]:
df_c3_melt = prepare_df(df_c3)
df_c3b_melt = prepare_df(df_c3b)

In [106]:
df_c3_melt['exp'] = 'c3'
df_c3b_melt['exp'] = 'c3b'
df_concat = pd.concat([df_c3_melt, df_c3b_melt])

In [107]:
df_concat.tail()

Unnamed: 0,variable,value,link_type,uID,uID_rev,exp
569,XL_C3_1595_C3_1644,0.153445,XL,sp|P01024|CO3_HUMAN:1595:sp|P01024|CO3_HUMAN:1644,sp|P01024|CO3_HUMAN:1644:sp|P01024|CO3_HUMAN:1595,c3b
570,XL_C3_1599_C3_1600,0.007571,XL,sp|P01024|CO3_HUMAN:1599:sp|P01024|CO3_HUMAN:1600,sp|P01024|CO3_HUMAN:1600:sp|P01024|CO3_HUMAN:1599,c3b
571,XL_C3_1599_C3_1644,0.101762,XL,sp|P01024|CO3_HUMAN:1599:sp|P01024|CO3_HUMAN:1644,sp|P01024|CO3_HUMAN:1644:sp|P01024|CO3_HUMAN:1599,c3b
572,XL_C3_1600_C3_1644,0.169129,XL,sp|P01024|CO3_HUMAN:1600:sp|P01024|CO3_HUMAN:1644,sp|P01024|CO3_HUMAN:1644:sp|P01024|CO3_HUMAN:1600,c3b
573,XL_C3_1615_C3_1624,0.088264,XL,sp|P01024|CO3_HUMAN:1615:sp|P01024|CO3_HUMAN:1624,sp|P01024|CO3_HUMAN:1624:sp|P01024|CO3_HUMAN:1615,c3b


In [36]:
alt.Chart(df_concat).mark_point(size=50).encode(
    x='variable',
    y=alt.Y('value'),
    row='link_type',
    color='exp'
).resolve_scale(x='independent', y='independent')

In [31]:
df_merge = get_log2_df(df_c3_melt, df_c3b_melt)
df_merge.head()

Unnamed: 0,variable,value_c3,link_type,uID,uID_rev,exp_c3,value_c3b,exp_c3b,log2ratio
0,MonoHydro_C3_65,0.016176,MonoHydro,sp|P01024|CO3_HUMAN:65,sp|P01024|CO3_HUMAN:65,c3,0.02592,c3b,0.680244
1,MonoHydro_C3_66,0.058598,MonoHydro,sp|P01024|CO3_HUMAN:66,sp|P01024|CO3_HUMAN:66,c3,0.026332,c3b,-1.154002
2,MonoHydro_C3_73,0.001175,MonoHydro,sp|P01024|CO3_HUMAN:73,sp|P01024|CO3_HUMAN:73,c3,0.034585,c3b,4.878947
3,MonoHydro_C3_97,0.465751,MonoHydro,sp|P01024|CO3_HUMAN:97,sp|P01024|CO3_HUMAN:97,c3,0.5,c3b,0.102371
4,MonoHydro_C3_100,0.465751,MonoHydro,sp|P01024|CO3_HUMAN:100,sp|P01024|CO3_HUMAN:100,c3,0.5,c3b,0.102371


In [32]:
log2_filter = 1

In [60]:
alt.Chart(df_merge[(df_merge['log2ratio'] > log2_filter) | (df_merge['log2ratio'] < -log2_filter)]).mark_bar().encode(
    x='variable',
    y=alt.Y('log2ratio', stack='zero'),
    column='link_type',
).resolve_scale(x='independent', y='independent')

In [107]:
df_merge

Unnamed: 0,variable,value_c3,link_type,uID,uID_rev,exp_c3,value_c3b,exp_c3b,log2ratio
0,MonoHydro_C3_65,0.020814,MonoHydro,sp|P01024|CO3_HUMAN:65,sp|P01024|CO3_HUMAN:65,c3,0.000471,c3b,-5.464507
1,MonoHydro_C3_66,0.098760,MonoHydro,sp|P01024|CO3_HUMAN:66,sp|P01024|CO3_HUMAN:66,c3,0.000492,c3b,-7.648854
2,MonoHydro_C3_73,0.001247,MonoHydro,sp|P01024|CO3_HUMAN:73,sp|P01024|CO3_HUMAN:73,c3,0.000537,c3b,-1.216652
3,MonoHydro_C3_97,0.946121,MonoHydro,sp|P01024|CO3_HUMAN:97,sp|P01024|CO3_HUMAN:97,c3,0.859718,c3b,-0.138162
4,MonoHydro_C3_100,0.946121,MonoHydro,sp|P01024|CO3_HUMAN:100,sp|P01024|CO3_HUMAN:100,c3,0.859718,c3b,-0.138162
...,...,...,...,...,...,...,...,...,...
446,XL_C3_1595_C3_1644,0.232895,XL,sp|P01024|CO3_HUMAN:1595:x:sp|P01024|CO3_HUMAN...,sp|P01024|CO3_HUMAN:1644:x:sp|P01024|CO3_HUMAN...,c3,0.168462,c3b,-0.467252
447,XL_C3_1599_C3_1600,0.002262,XL,sp|P01024|CO3_HUMAN:1599:x:sp|P01024|CO3_HUMAN...,sp|P01024|CO3_HUMAN:1600:x:sp|P01024|CO3_HUMAN...,c3,0.008416,c3b,1.895437
448,XL_C3_1599_C3_1644,0.158422,XL,sp|P01024|CO3_HUMAN:1599:x:sp|P01024|CO3_HUMAN...,sp|P01024|CO3_HUMAN:1644:x:sp|P01024|CO3_HUMAN...,c3,0.141549,c3b,-0.162467
449,XL_C3_1600_C3_1644,0.241588,XL,sp|P01024|CO3_HUMAN:1600:x:sp|P01024|CO3_HUMAN...,sp|P01024|CO3_HUMAN:1644:x:sp|P01024|CO3_HUMAN...,c3,0.179727,c3b,-0.426739


In [108]:
df_xtract.head()

Unnamed: 0,uID,experiment,link_type,ms1_area_sum,ms1_area_sum_ref,log2ratio,log2avg,referenceexperiment,pvalue,FDR
0,sp|P01024|CO3_HUMAN:1036,c3b,monolink,22.996,32.5336,-9.53754,27.7648,c3,1.3e-05,7e-05
1,sp|P01024|CO3_HUMAN:1041:x:sp|P01024|CO3_HUMAN...,c3b,xlink,32.5444,31.4862,1.05819,32.0153,c3,0.000404,0.001121
2,sp|P01024|CO3_HUMAN:1041:x:sp|P01024|CO3_HUMAN...,c3b,xlink,23.7509,22.5223,1.22864,23.1366,c3,0.018367,0.034208
3,sp|P01024|CO3_HUMAN:104:x:sp|P01024|CO3_HUMAN:66,c3b,xlink,26.792,28.4879,-1.6959,27.6399,c3,1.9e-05,9.1e-05
4,sp|P01024|CO3_HUMAN:104:x:sp|P01024|CO3_HUMAN:97,c3b,xlink,24.9138,23.476,1.43778,24.1949,c3,0.044861,0.076832


In [38]:
dfm = pd.merge(df_merge[['uID', 'log2ratio']], df_xtract[['uID', 'log2ratio', 'link_type']], on=['uID'], suffixes=['_sim', '_exp'])
dfm_rev = pd.merge(df_merge[['uID_rev', 'log2ratio']].rename(columns={'uID_rev': 'uID'}), df_xtract[['uID', 'log2ratio', 'link_type']], on=['uID'], suffixes=['_sim', '_exp'])
dfm = pd.concat([dfm, dfm_rev]).drop_duplicates().reset_index(drop=True)

In [39]:
dfm.head()

Unnamed: 0,uID,log2ratio_sim,log2ratio_exp,link_type
0,sp|P01024|CO3_HUMAN:65,0.680244,0.32095,monolink
1,sp|P01024|CO3_HUMAN:66,-1.154002,0.348402,monolink
2,sp|P01024|CO3_HUMAN:155,5.405193,-0.557699,monolink
3,sp|P01024|CO3_HUMAN:176,4.716654,-3.85708,monolink
4,sp|P01024|CO3_HUMAN:205,7.13947,-0.941455,monolink


In [40]:
dfmm = pd.melt(dfm, value_vars=['log2ratio_exp', 'log2ratio_sim'], id_vars=['uID', 'link_type'])

In [41]:
dfmm.head()

Unnamed: 0,uID,link_type,variable,value
0,sp|P01024|CO3_HUMAN:65,monolink,log2ratio_exp,0.32095
1,sp|P01024|CO3_HUMAN:66,monolink,log2ratio_exp,0.348402
2,sp|P01024|CO3_HUMAN:155,monolink,log2ratio_exp,-0.557699
3,sp|P01024|CO3_HUMAN:176,monolink,log2ratio_exp,-3.85708
4,sp|P01024|CO3_HUMAN:205,monolink,log2ratio_exp,-0.941455


In [46]:
alt.Chart(dfmm).mark_point(size=50).encode(
    x='uID',
    y=alt.Y('value', stack='zero'),
    color='variable',
    row='link_type'
).resolve_scale(x='independent', y='independent')

In [50]:
fil_mono = ((dfm['link_type'] == 'monolink') & ((dfm['log2ratio_sim'] > 0.5) | (dfm['log2ratio_sim'] < -0.5))) 
fil_xl = (dfm['link_type'] == 'xlink')
c = alt.Chart(dfm[fil_mono | fil_xl]).mark_point().encode(
    x='log2ratio_sim',
    y=alt.Y('log2ratio_exp'),
)
regression = c.transform_regression('log2ratio_sim', 'log2ratio_exp').mark_line()
params = c.transform_regression(
    'log2ratio_sim', 'log2ratio_exp', params=True
).mark_text(align='left').encode(
    x=alt.value(20),  # pixels from left
    y=alt.value(20),  # pixels from top
    text=alt.Text('rSquared:N', format='.2e')
)
c += regression + params
c.facet(
    row='link_type'
).resolve_scale(x='independent', y='independent')



In [44]:
stats.linregress(dfm[fil_mono]['log2ratio_sim'], dfm[fil_mono]['log2ratio_exp'])

LinregressResult(slope=-0.008794321742067707, intercept=-0.5640573018182282, rvalue=-0.008496559339736897, pvalue=0.9411461546926183, stderr=0.1187235249679711)

In [116]:
# & (dfm['log2ratio_exp'] < 6)  & (dfm['log2ratio_exp'] > -4)
stats.linregress(dfm[fil_xl]['log2ratio_sim'], dfm[fil_xl]['log2ratio_exp'])

LinregressResult(slope=0.1556839381731902, intercept=0.26802473614602185, rvalue=0.06866772904848345, pvalue=0.6428257462138459, stderr=0.3334923542849706)

In [117]:
df_delta_dist = df_dist.groupby('uxID').apply(get_delta_dist).dropna()
df_delta_dist = pd.DataFrame(df_delta_dist).reset_index()
df_delta_dist = df_delta_dist.rename(columns={'uxID': 'uID', 0: 'delta_dist'})


In [118]:
len(df_delta_dist)

1793

In [119]:
dfdd = pd.merge(df_delta_dist, df_xtract[['uID', 'log2ratio', 'link_type']], on=['uID'])

In [120]:
c = alt.Chart(dfdd).mark_point().encode(
    x='delta_dist',
    y=alt.Y('log2ratio'),
)
c = c + c.transform_regression('delta_dist', 'log2ratio').mark_line()
c.facet(
    row='link_type'
).resolve_scale(x='independent', y='independent')

In [121]:
stats.linregress(dfdd['delta_dist'], dfdd['log2ratio'])

LinregressResult(slope=-0.25343897930792686, intercept=-0.4821110909612746, rvalue=-0.3123945815855421, pvalue=0.03064094001902646, stderr=0.11362996213937218)

In [122]:
df_concat.head(1)

Unnamed: 0,variable,value,link_type,uID,uID_rev,exp
0,MonoHydro_C3_65,0.020814,MonoHydro,sp|P01024|CO3_HUMAN:65,sp|P01024|CO3_HUMAN:65,c3


In [123]:
df_xtract.head(1)

Unnamed: 0,uID,experiment,link_type,ms1_area_sum,ms1_area_sum_ref,log2ratio,log2avg,referenceexperiment,pvalue,FDR
0,sp|P01024|CO3_HUMAN:1036,c3b,monolink,22.996,32.5336,-9.53754,27.7648,c3,1.3e-05,7e-05


In [72]:
dfs = pd.merge(df_concat[['uID', 'value', 'exp']], df_xtract[['uID', 'link_type', 'ms1_area_sum', 'ms1_area_sum_ref']], on=['uID'])
dfs_rev = pd.merge(df_concat[['uID_rev', 'value', 'exp']].rename(columns={'uID_rev': 'uID'}), df_xtract[['uID', 'link_type', 'ms1_area_sum', 'ms1_area_sum_ref']], on=['uID'])
dfs = pd.concat([dfs, dfs_rev]).drop_duplicates().reset_index(drop=True)
mask_c3 = dfs['exp'] == 'c3'
dfs.loc[mask_c3, 'ms1_area_sum_exp'] = dfs['ms1_area_sum_ref']
dfs.loc[~mask_c3, 'ms1_area_sum_exp'] = dfs['ms1_area_sum']
#dfs = dfs[~(dfs['value'] >= 0.99)].reset_index(drop=True)

In [73]:
dfs_c3 = dfs[dfs['exp'] == 'c3']
dfs_c3b = dfs[dfs['exp'] == 'c3b']
dfs_c3.head()

Unnamed: 0,uID,value,exp,link_type,ms1_area_sum,ms1_area_sum_ref,ms1_area_sum_exp
0,sp|P01024|CO3_HUMAN:65,0.016176,c3,monolink,30.4247,30.1038,30.1038
2,sp|P01024|CO3_HUMAN:66,0.058598,c3,monolink,30.3158,29.9674,29.9674
4,sp|P01024|CO3_HUMAN:155,0.005198,c3,monolink,33.0811,33.6388,33.6388
6,sp|P01024|CO3_HUMAN:176,0.005546,c3,monolink,23.3706,27.2277,27.2277
8,sp|P01024|CO3_HUMAN:205,0.000529,c3,monolink,27.0118,27.9533,27.9533


In [79]:
c = alt.Chart(dfs).mark_point().encode(
    x='value',
    y=alt.Y('ms1_area_sum_exp'),
    #color='exp'
)#.transform_calculate(y=alt.expr.pow(alt.datum.t))
regr = c.transform_regression('value', 'ms1_area_sum_exp').mark_line()
params = c.transform_regression(
    'value', 'ms1_area_sum_exp', params=True
).mark_text(align='left').encode(
    x=alt.value(20),  # pixels from left
    y=alt.value(20),  # pixels from top
    text=alt.Text('rSquared:N', format='.2e')
)
c += regr + params
c.facet(
    row='link_type',
    column='exp'
).resolve_scale(x='independent', y='independent')


In [81]:
fil = (dfs_c3['link_type'] == 'xlink')
stats.linregress(dfs_c3[fil]['value'], dfs_c3[fil]['ms1_area_sum_ref'])

LinregressResult(slope=-0.24411251287113778, intercept=28.99014474416058, rvalue=-0.004679875302966587, pvalue=0.9748160759619993, stderr=7.690810106709166)

In [82]:
fil = (dfs_c3b['link_type'] == 'xlink')
stats.linregress(dfs_c3b[fil]['value'], dfs_c3b[fil]['ms1_area_sum_ref'])

LinregressResult(slope=3.474930115211977, intercept=28.24315892858036, rvalue=0.07312269963201544, pvalue=0.5922514308142315, stderr=6.449600166243533)