/
2_3_significant.py
64 lines (50 loc) · 2.07 KB
/
2_3_significant.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Copyright (C) 2020 Nicolàs Palacio
#
# Contact: nicolas.palacio@bioquant.uni-heidelberg.de
#
# GNU-GLPv3:
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# A full copy of the GNU General Public License can be found on
# http://www.gnu.org/licenses/.
#
# Significant differential phosphorylation
# ========================================
#
# This script subsets the differential expresssion results and saves
# a table for each contrast containing only the significantly
# differentially expressed p-sites.
import os
import pandas as pd
from data_tools.databases import up_map
from data_tools.iterables import chunk_this
#----------------------------------- INPUT -----------------------------------#
parent_dir = 'results'
#-----------------------------------------------------------------------------#
usedirs = [os.path.join(parent_dir, d) for d in os.listdir(parent_dir)
if d.startswith('2_diff_exp')]
for dir_ in usedirs:
files = [f for f in os.listdir(dir_)
if (f.endswith('_ttop.csv') and not f.startswith('sig_'))]
for f in files:
df = pd.read_csv(os.path.join(dir_, f), index_col=0)
sig = [a and b for (a, b) in zip(abs(df['logFC']) >= 1,
df['P.Value'] <= 0.05)]
df = df.loc[sig, :]
df.index
ups = list(set([i.split('_')[0] for i in df.index]))
mapper = dict()
for ch in chunk_this(ups, 1000):
aux = up_map(ch)
mapper.update(aux.set_index('ACC').to_dict()['GENENAME'])
df.index = ['_'.join([mapper[i.split('_')[0]],
'_'.join(i.split('_')[1:])])
for i in df.index]
df.to_csv(os.path.join(dir_, 'sig_' + f))