In [1]:
import pandas as pd
import altair as alt
import numpy as np
import scipy.stats as stats

In [2]:
def prepare_df(df):
    df = df[[col for col in df.columns if 'MonoHydro_' in col or 'XL_' in col]]
    df = pd.melt(df)
    df['variable'] = df['variable'].str.replace('\[|\]','', regex=True)
    split = df['variable'].str.split("_", expand=True)
    df["link_type"] = split[0]
    if split.shape[1] > 3:
        df['uID'] = np.where(df['link_type'] == 'XL', 'sp|P01024|CO3_HUMAN:' + split[2] +':x:sp|P01024|CO3_HUMAN:' + split[4], 'sp|P01024|CO3_HUMAN:' + split[2])
        df['uID_rev'] = np.where(df['link_type'] == 'XL', 'sp|P01024|CO3_HUMAN:' + split[4] +':x:sp|P01024|CO3_HUMAN:' + split[2], 'sp|P01024|CO3_HUMAN:' + split[2])
    else:
        df['uID'] = 'sp|P01024|CO3_HUMAN:' + split[2]
        df['uID_rev'] = 'sp|P01024|CO3_HUMAN:' + split[2]
    return df


In [3]:
def get_log2_df(df_c3, df_c3b):
    df_merge = pd.merge(df_c3, df_c3b, on=['variable', 'link_type', 'uID', 'uID_rev','react'], suffixes=['_c3', '_c3b'])
    df_merge['log2ratio'] = np.log2(df_merge['value_c3b']/df_merge['value_c3'])
    df_merge = df_merge.replace([np.inf, -np.inf], np.nan).dropna().reset_index(drop=True)
    return df_merge


In [4]:
def get_delta_dist(x, ref_exp='c3', exp='c3b', metric='SASD'):
    if len(x) == 2:
        d_ref_exp = x[x['exp_name'] == ref_exp][metric].values[0]
        d_exp = x[x['exp_name'] == exp][metric].values[0]
        return d_exp - d_ref_exp
    else:
        return None

In [5]:
df_c3_asa = pd.read_csv('../output/c3_final_frame_asa_mono_only.csv')
df_c3b_asa = pd.read_csv('../output/c3b_final_frame_asa_mono_only.csv')
df_c3_equi = pd.read_csv('../output/c3_final_frame_mono_only.csv')
df_c3b_equi = pd.read_csv('../output/c3b_final_frame_mono_only.csv')


In [6]:
df_c3_asa_melt = prepare_df(df_c3_asa)
df_c3b_asa_melt = prepare_df(df_c3b_asa)
df_c3_equi_melt = prepare_df(df_c3_equi)
df_c3b_equi_melt = prepare_df(df_c3b_equi)

In [7]:
df_c3_asa_melt['exp'] = 'c3'
df_c3b_asa_melt['exp'] = 'c3b'
df_c3_equi_melt['exp'] = 'c3'
df_c3b_equi_melt['exp'] = 'c3b'
df_c3_asa_melt['react'] = 'asa'
df_c3b_asa_melt['react'] = 'asa'
df_c3_equi_melt['react'] = 'equi'
df_c3b_equi_melt['react'] = 'equi'
df_concat = pd.concat([df_c3_asa_melt, df_c3b_asa_melt, df_c3_equi_melt, df_c3b_equi_melt])

In [14]:
df_concat.head()

Unnamed: 0,variable,value,link_type,uID,uID_rev,exp,react
0,MonoHydro_C3_65,0.955172,MonoHydro,sp|P01024|CO3_HUMAN:65,sp|P01024|CO3_HUMAN:65,c3,asa
1,MonoHydro_C3_66,0.446621,MonoHydro,sp|P01024|CO3_HUMAN:66,sp|P01024|CO3_HUMAN:66,c3,asa
2,MonoHydro_C3_73,0.874082,MonoHydro,sp|P01024|CO3_HUMAN:73,sp|P01024|CO3_HUMAN:73,c3,asa
3,MonoHydro_C3_97,0.955172,MonoHydro,sp|P01024|CO3_HUMAN:97,sp|P01024|CO3_HUMAN:97,c3,asa
4,MonoHydro_C3_100,0.955172,MonoHydro,sp|P01024|CO3_HUMAN:100,sp|P01024|CO3_HUMAN:100,c3,asa


In [18]:
alt.Chart(df_concat).mark_point(size=50).encode(
    x='variable',
    y=alt.Y('value'),
    row='link_type',
    column='exp',
    color=alt.Color('react', legend=alt.Legend(orient='top')),
    shape=alt.Shape('react', legend=alt.Legend(orient='top')),
)#.facet(row='exp').resolve_scale(x='independent', y='independent')

In [18]:
df_asa_merge = get_log2_df(df_c3_asa_melt, df_c3b_asa_melt)
df_equi_merge = get_log2_df(df_c3_equi_melt, df_c3b_equi_melt)
df_equi_merge.head(1)

Unnamed: 0,variable,value_c3,link_type,uID,uID_rev,exp_c3,react,value_c3b,exp_c3b,log2ratio
0,MonoHydro_C3_65,0.439804,MonoHydro,sp|P01024|CO3_HUMAN:65,sp|P01024|CO3_HUMAN:65,c3,equi,0.442717,c3b,0.009527


In [19]:
df_merge = pd.concat([df_asa_merge, df_equi_merge])

In [20]:
alt.Chart(df_merge).mark_point(size=50).encode(
    x='variable',
    y=alt.Y('log2ratio'),
    row='link_type',
    color=alt.Color('react', legend=alt.Legend(orient='top')),
    shape=alt.Shape('react', legend=alt.Legend(orient='top')),
).resolve_scale(x='independent', y='independent')