In [1]:
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import os
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from tqdm import tqdm
sns.set_palette(['#376CA8', '#25A37B', '#E44574', '#796E9C'])
sns.set_style({'axes.axisbelow': True, 'axes.edgecolor': '.15', 'axes.facecolor': 'white',
               'axes.grid': True, 'axes.labelcolor': '.15', 'axes.linewidth': 1.25, 
               'figure.facecolor': 'white', 'font.family': ['sans-serif'], 'grid.color': '.15',
               'grid.linestyle': ':', 'grid.alpha': .5, 'image.cmap': 'Greys', 
               'legend.frameon': False, 'legend.numpoints': 1, 'legend.scatterpoints': 1,
               'lines.solid_capstyle': 'round', 'axes.spines.right': False, 'axes.spines.top': False,  
               'text.color': '.15',  'xtick.top': False, 'ytick.right': False, 'xtick.color': '.15',
               'xtick.direction': 'out', 'xtick.major.size': 6, 'xtick.minor.size': 3,
               'ytick.color': '.15', 'ytick.direction': 'out', 'ytick.major.size': 6,'ytick.minor.size': 3})
sns.set_context('paper')

#http://phyletica.org/matplotlib-fonts/
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

In [2]:
from snapanalysis.config import OUTPUT_DIRECTORY as MAIN_OUTPUT_DIRECTORY
OUTPUT_DIRECTORY = os.path.join(MAIN_OUTPUT_DIRECTORY, 'ptm-response')

if not os.path.isdir(OUTPUT_DIRECTORY):
    os.makedirs(OUTPUT_DIRECTORY)

In [3]:
from snapanalysis.models.ptm_response.main import OUTPUT_FILE as PTM_RESPONSE_FILE

from snapanalysis.models.ptm_response.main import PREDICTOR_ORDER
predictors = PREDICTOR_ORDER

limma_results = pd.read_hdf(PTM_RESPONSE_FILE, '/ptm_stats/joint_limma_stats')
predictors_from_limma = limma_results.reset_index()['predictor'].unique()
assert all(p in predictors_from_limma for p in predictors)
assert all(p in predictors for p in predictors_from_limma)

long_matrices = {}
informative_pds = {}


with pd.HDFStore(PTM_RESPONSE_FILE, 'r') as store: 
    enrichment_complexes = store['/ptm_stats/joint_camera_complexes']
    
    
    for predictor in predictors:
        long_matrices[predictor] = lm = store[f'/ptm_stats/{predictor}/long_matrix']
        informative_pds[predictor] = sorted(lm['Pull-Down ID'].unique())
            

We only export the table here because the rest should be handled by Mara.

Specifically, we will export the `logFC` estimates, for proteins that have at least one 
statistically signficant (and large) estimate in at least one predictor.

In [4]:
values = 'logFC'
significant = 'significant_and_large_fc'
matrix_wide = limma_results.unstack('predictor')

In [5]:
import palettable

In [6]:
matrix_for_clustering = matrix_wide[values]
matrix_for_clustering_mask = matrix_wide[significant]
matrix_for_clustering = matrix_for_clustering[matrix_for_clustering_mask.fillna(False).any(axis=1)]
matrix_for_clustering_mask = matrix_for_clustering_mask.loc[matrix_for_clustering.index]


In [7]:
matrix_for_clustering.shape

(352, 15)

In [8]:
matrix_for_clustering.to_csv(os.path.join(OUTPUT_DIRECTORY, 
                                          'feature_effect_matrix_logfc_significant_and_large_fc.csv.gz'))

In [9]:
matrix_for_clustering.head()

predictor,DNA Methylation,H2A.Z,H3K27ac,H3K27me2,H3K27me3,H3K4me1,H3K4me3,H3K9acK14ac,H3K9me2,H3K9me3,H3ac,H4K16ac,H4K20me2,H4K20me3,H4ac
Gene label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
ABRAXAS1,0.163045,-0.044655,-0.025077,-0.108107,-0.028023,-0.027805,-0.615761,0.279407,-0.317315,-0.031382,0.651735,0.128807,-1.54228,-1.030552,0.326409
ACTB,-0.320098,-0.329969,0.892759,0.025224,-0.018707,0.038612,0.158745,1.394101,0.082701,0.063382,1.551879,0.222802,0.400986,-0.018131,0.749387
ACTG1,-0.311329,-0.317037,0.869404,-0.175051,-0.003373,0.062595,0.184102,1.329178,0.114768,-0.002173,1.496575,0.165277,0.32924,-0.092759,0.76568
ACTL6A,-0.549713,-0.674949,1.122702,-0.04841,-0.016357,0.031056,0.165403,1.222988,-0.115009,-0.00526,1.299292,0.209469,0.296015,-0.003559,0.810472
ACTL8,,-0.279445,,,-0.205593,-0.515323,-1.282069,0.853441,,2.254572,1.273351,0.139838,0.220932,-0.005635,0.395663
