In [1]:
import gspread
from pathlib import Path
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from time import time, gmtime, sleep
import sys
sys.path.append("/home/jovyan/HSE-Bioinformatics")
sys.path.append("/home/jovyan/diploma_scripts/scripts")
from bio import *
import dcona_runs
import dcona

In [2]:
SHARED_PATH = Path('/home/jovyan/shared')
TCGA_PATH = SHARED_PATH / 'TCGA_data'
OUTPUT_PATH = SHARED_PATH / 'narek/outputs'
BRCA_DATA = SHARED_PATH / 'narek/Diplom2/data_BRCA'

annotation = rt(BRCA_DATA/'annotation.tsv')

In [67]:
def adjust_permute(df):
    df = df.copy()
    df['AdjPermutePvalue'] = df['PermutePvalue'] / round(df.rank(axis=0)["PermutePvalue"]) * df.shape[0]
    print('Pairs:', df.shape[0], '| after cut:', pvalue_cutoff(df, 'AdjPermutePvalue').shape[0])
    return df.sort_values('AdjPermutePvalue')

def pvalue_cutoff(df, by: str, alpha: float = 0.05):
    return df.loc[df[by] <= alpha]

def scatter_gene_pairs(ztest_df, row_ind, expr_matrix, annot):
    annot = annot.copy(); expr_matrix = expr_matrix.copy()
    gene1, gene2 = ztest_df.loc[row_ind, ['Source', 'Target']]
    annot[gene1] = expr_matrix.loc[gene1, annot['Sample']].to_numpy()
    annot[gene2] = expr_matrix.loc[gene2, annot['Sample']].to_numpy()
    return sns.lmplot(data=annot, x=gene1, y=gene2, hue="Group")

def wrapped_hypergeom(df):
    df = df.copy()
    df['Pvalue'] = df['PermutePvalue']
    df['AdjPvalue'] = df['AdjPermutePvalue']
    df_hyper = dcona.hypergeom(df).sort_values('Diff', ascending=False)
    return df_hyper

# BRCA

## AGO2 (ENST00000220592)

### Normal (exp 1)

In [68]:
ts_n = rc(OUTPUT_PATH/'experiments/1/ztest_ts.csv')
ts_n = adjust_permute(ts_n)

Pairs: 488477 | after cut: 0


In [5]:
pvalue_cutoff(ts_n, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue


In [69]:
rna22_n = rc(OUTPUT_PATH/'experiments/1/ztest_rna22.csv')
rna22_n = adjust_permute(rna22_n)

Pairs: 513659 | after cut: 0


In [7]:
pvalue_cutoff(rna22_n, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue


### Luminal A (exp 2)

#### TargetScan

In [70]:
ts_a = rc(OUTPUT_PATH/'experiments/2/ztest_ts.csv')
ts_a = adjust_permute(ts_a)

Pairs: 455889 | after cut: 766


In [51]:
pvalue_cutoff(ts_a, 'AdjPermutePvalue').head()

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue
738,hsa-miR-93-5p|+3,ENST00000372088,0.675655,5.203258e-08,-0.316405,0.019925,1.148722,1e-06,0.560617,0.0,0.0
18,hsa-miR-93-5p|+3,ENST00000300146,0.639292,4.003818e-07,-0.339918,0.012218,1.110976,3e-06,0.24741,0.0,0.0
15,hsa-miR-93-5p|+3,ENST00000225388,0.623995,8.738832e-07,-0.295899,0.029742,1.036541,1.2e-05,0.23842,0.0,0.0
22,hsa-miR-93-5p|+3,ENST00000295951,0.638515,4.170186e-07,-0.361312,0.007607,1.134056,2e-06,0.254892,0.0,0.0
6,hsa-miR-93-5p|+3,ENST00000264033,0.598911,2.885452e-06,-0.393096,0.003567,1.106905,3e-06,0.224219,0.0,0.0


In [47]:
ts_a_hyper = pvalue_cutoff(wrapped_hypergeom(ts_a), 'AdjPvalue')
ts_a_hyper.to_csv(OUTPUT_PATH/'hypergeom/2_ts.csv', index=False)
ts_a_hyper

Unnamed: 0,Molecule,Diff,Total,Proportion,Pvalue,AdjPvalue
0,hsa-miR-93-5p|+3,215,3927,0.054749,0.0,0.0
6,hsa-miR-23a-3p|+1,167,4386,0.038076,0.0,0.0
2,hsa-miR-140-3p|+1,51,4703,0.010844,0.0,0.0
7,hsa-miR-140-3p|0,46,4246,0.010834,0.0,0.0
9,hsa-miR-23b-3p|+1,36,4386,0.008208,1.909584e-14,2.100542e-13
13,hsa-miR-22-3p|+1,30,3424,0.008762,5.789813e-13,5.307329e-12
3,hsa-miR-26b-5p|0,24,3864,0.006211,8.175053e-08,6.423256e-07
5,hsa-miR-128-3p|0,21,4445,0.004724,3.180566e-05,0.0002186639
4,hsa-miR-17-3p|0,19,4762,0.00399,0.0005959461,0.003277703
15,hsa-miR-361-3p|0,19,4180,0.004545,0.0001216044,0.0007431377


#### RNA22

In [71]:
rna22_a = rc(OUTPUT_PATH/'experiments/2/ztest_rna22.csv')
rna22_a = adjust_permute(rna22_a)

Pairs: 483291 | after cut: 848


In [61]:
rna22_a

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue
0,hsa-miR-93-5p|+3,ENST00000298068,0.747537,3.287528e-10,-0.408407,0.002418,1.401047,3.292676e-09,0.001591,0.000000,0.000000
1,hsa-miR-93-5p|+3,ENST00000465503,0.530788,4.748792e-05,-0.544088,0.000034,1.201186,3.929732e-07,0.063307,0.000000,0.000000
28,hsa-miR-361-3p|0,ENST00000620918,-0.420599,1.585994e-03,0.562201,0.000017,-1.084465,4.662105e-06,0.204832,0.000000,0.000000
4,hsa-miR-93-5p|+3,ENST00000454497,0.549391,2.339158e-05,-0.467327,0.000461,1.124154,2.064119e-06,0.142510,0.000000,0.000000
6,hsa-miR-93-5p|+3,ENST00000295951,0.638515,4.170186e-07,-0.361312,0.007607,1.134056,1.677328e-06,0.162127,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
480564,hsa-miR-181b-5p|0,ENST00000351193,0.022621,8.679694e-01,0.022625,0.869166,-0.000004,9.999881e-01,0.999998,0.999988,0.999998
483288,hsa-miR-134-5p|0,ENST00000593035,0.201841,1.365443e-01,0.201845,0.140221,-0.000005,9.999840e-01,1.000000,0.999982,0.999999
483284,hsa-miR-29a-3p|-1,ENST00000319854,0.060021,6.590726e-01,0.060014,0.662083,0.000007,9.999764e-01,1.000000,0.999974,0.999999
483286,hsa-miR-30a-3p|+1,ENST00000430952,-0.186803,1.684917e-01,-0.186808,0.172476,0.000005,9.999834e-01,1.000000,0.999982,1.000001


In [60]:
pvalue_cutoff(rna22_a, 'AdjPermutePvalue').head()

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue
0,hsa-miR-93-5p|+3,ENST00000298068,0.747537,3.287528e-10,-0.408407,0.002418,1.401047,3.292676e-09,0.001591,0.0,0.0
1,hsa-miR-93-5p|+3,ENST00000465503,0.530788,4.748792e-05,-0.544088,3.4e-05,1.201186,3.929732e-07,0.063307,0.0,0.0
28,hsa-miR-361-3p|0,ENST00000620918,-0.420599,0.001585994,0.562201,1.7e-05,-1.084465,4.662105e-06,0.204832,0.0,0.0
4,hsa-miR-93-5p|+3,ENST00000454497,0.549391,2.339158e-05,-0.467327,0.000461,1.124154,2.064119e-06,0.14251,0.0,0.0
6,hsa-miR-93-5p|+3,ENST00000295951,0.638515,4.170186e-07,-0.361312,0.007607,1.134056,1.677328e-06,0.162127,0.0,0.0


In [13]:
rna22_a_hyper = pvalue_cutoff(wrapped_hypergeom(rna22_a), 'AdjPvalue')
rna22_a_hyper.to_csv(OUTPUT_PATH/'hypergeom/2_rna22.csv', index=False)
rna22_a_hyper

Unnamed: 0,Molecule,Diff,Total,Proportion,Pvalue,AdjPvalue
0,hsa-miR-93-5p|+3,355,6280,0.056529,0.0,0.0
5,hsa-miR-23a-3p|+1,88,2865,0.030716,0.0,0.0
4,hsa-miR-22-3p|+1,41,4295,0.009546,0.0,0.0
2,hsa-let-7g-5p|0,34,6267,0.005425,1.461551e-08,1.558988e-07
19,hsa-let-7d-5p|0,30,6935,0.004326,9.3103e-06,5.958592e-05
8,hsa-miR-128-3p|0,27,5286,0.005108,1.354451e-06,1.083561e-05
6,hsa-miR-140-3p|0,26,2297,0.011319,1.94067e-13,3.105072e-12
7,hsa-miR-140-3p|+1,25,2928,0.008538,2.064986e-10,2.643182e-09
12,hsa-miR-23b-3p|+1,18,2741,0.006567,2.826794e-06,2.010165e-05
1,hsa-miR-361-3p|0,17,3871,0.004392,0.000646351,0.003760588


### Luminal B (exp 3)

#### TargetScan

In [72]:
ts_b = rc(OUTPUT_PATH/'experiments/3/ztest_ts.csv')
ts_b = adjust_permute(ts_b)

Pairs: 446066 | after cut: 0


In [15]:
pvalue_cutoff(ts_b, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue


#### RNA22

In [73]:
rna22_b = rc(OUTPUT_PATH/'experiments/3/ztest_rna22.csv')
rna22_b = adjust_permute(rna22_b)

Pairs: 470265 | after cut: 0


In [17]:
pvalue_cutoff(rna22_b, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue


### Basal-like (exp 4)

TargetScan

In [74]:
ts_bl = rc(OUTPUT_PATH/'experiments/4/ztest_ts.csv')
ts_bl = adjust_permute(ts_bl)

Pairs: 429421 | after cut: 0


In [19]:
pvalue_cutoff(ts_bl, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue


RNA22

In [75]:
rna22_bl = rc(OUTPUT_PATH/'experiments/4/ztest_rna22.csv')
rna22_bl = adjust_permute(rna22_bl)

Pairs: 448281 | after cut: 1


In [21]:
pvalue_cutoff(rna22_bl, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue
0,hsa-miR-93-5p|+3,ENST00000382363,-0.687055,1e-05,0.513276,0.001879,-1.409525,2e-06,0.933916,0.0,0.0


## DROSHA (ENST00000513349)

### Normal (exp 5)

TargetScan

In [76]:
ts_n = rc(OUTPUT_PATH/'experiments/5/ztest_ts.csv')
ts_n = adjust_permute(ts_n)

Pairs: 488477 | after cut: 0


In [23]:
pvalue_cutoff(ts_n, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue


RNA22

In [77]:
rna22_bl = rc(OUTPUT_PATH/'experiments/5/ztest_rna22.csv')
rna22_bl = adjust_permute(rna22_bl)

Pairs: 513659 | after cut: 0


In [25]:
pvalue_cutoff(rna22_bl, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue


### Luminal A (exp 6)

TargetScan

In [78]:
ts_n = rc(OUTPUT_PATH/'experiments/6/ztest_ts.csv')
ts_n = adjust_permute(ts_n)

Pairs: 455889 | after cut: 236


In [27]:
pvalue_cutoff(ts_n, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue
15,hsa-miR-23a-3p|+1,ENST00000353379,-0.647340,2.742449e-09,0.127795,0.277517,-0.899204,0.000011,0.602310,0.000000,0.000000
102,hsa-miR-23a-3p|+1,ENST00000288828,-0.565036,4.546116e-07,0.322873,0.005605,-0.975051,0.000002,0.812144,0.000000,0.000000
49,hsa-miR-140-3p|0,ENST00000246802,0.675847,3.198984e-10,-0.020370,0.862770,0.841801,0.000037,0.679102,0.000000,0.000000
83,hsa-miR-361-3p|0,ENST00000512944,0.570213,3.427588e-07,-0.254139,0.030007,0.907672,0.000009,0.795187,0.000000,0.000000
9,hsa-miR-93-5p|+3,ENST00000605895,-0.551209,9.456138e-07,0.249474,0.033204,-0.874968,0.000018,0.591190,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
763,hsa-miR-140-3p|+1,ENST00000312579,0.502532,9.915333e-06,-0.092091,0.434224,0.645041,0.001577,0.927935,0.000025,0.049138
484,hsa-miR-1307-3p|0,ENST00000221922,0.390185,7.512160e-04,-0.325491,0.005221,0.749795,0.000240,0.880547,0.000025,0.049142
59,hsa-miR-23a-3p|+1,ENST00000319004,-0.569331,3.597662e-07,0.141280,0.229790,-0.788765,0.000111,0.770037,0.000025,0.049359
454,hsa-miR-140-3p|+1,ENST00000327761,0.599147,6.474714e-08,0.016216,0.890555,0.675597,0.000934,0.872388,0.000025,0.049359


In [28]:
ts_n_hyper = pvalue_cutoff(wrapped_hypergeom(ts_n), 'AdjPvalue')
ts_n_hyper.to_csv(OUTPUT_PATH/'hypergeom/6_ts.csv', index=False)
ts_n_hyper

Unnamed: 0,Molecule,Diff,Total,Proportion,Pvalue,AdjPvalue
1,hsa-miR-140-3p|0,42,4246,0.009892,0.0,0.0
4,hsa-miR-16-5p|0,30,4347,0.006901,0.0,0.0
3,hsa-miR-93-5p|+3,29,3927,0.007385,0.0,0.0
0,hsa-miR-23a-3p|+1,27,4386,0.006156,0.0,0.0
6,hsa-miR-1307-3p|0,19,1815,0.010468,0.0,0.0
14,hsa-miR-140-3p|+1,18,4703,0.003827,8.375611e-11,3.589548e-10
5,hsa-miR-191-5p|0,12,906,0.013245,1.102451e-13,5.512257e-13
2,hsa-miR-361-3p|0,11,4180,0.002632,1.449172e-05,5.434396e-05
11,hsa-miR-17-3p|0,10,4762,0.0021,0.0002214371,0.0007381237
15,hsa-let-7b-5p|0,5,2604,0.00192,0.01200448,0.03601343


RNA22

In [79]:
rna22_la = rc(OUTPUT_PATH/'experiments/6/ztest_rna22.csv')
rna22_la = adjust_permute(rna22_la)

Pairs: 483291 | after cut: 822


In [30]:
pvalue_cutoff(rna22_la, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue
19,hsa-miR-1307-3p|0,ENST00000391759,0.535022,2.145514e-06,-0.356216,0.002174,0.969700,2.028749e-06,0.326825,0.000000,0.000000
2,hsa-miR-93-5p|+3,ENST00000177648,0.565462,4.442301e-07,-0.295676,0.011362,0.945601,3.613085e-06,0.218271,0.000000,0.000000
114,hsa-miR-361-3p|0,ENST00000394780,-0.458492,6.368733e-05,0.469246,0.000041,-1.004504,8.608383e-07,0.416035,0.000000,0.000000
109,hsa-miR-191-5p|0,ENST00000383811,-0.458862,6.275934e-05,0.447568,0.000098,-0.977523,1.677256e-06,0.405301,0.000000,0.000000
5,hsa-miR-93-5p|+3,ENST00000440600,0.678208,2.648289e-10,-0.142532,0.225674,0.969297,2.048624e-06,0.247520,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
787,hsa-miR-191-5p|0,ENST00000325167,-0.513257,6.078106e-06,0.107624,0.360648,-0.675185,9.405946e-04,0.569650,0.000084,0.049878
1408,hsa-miR-99b-5p|+1,ENST00000381793,0.390469,7.442373e-04,-0.217724,0.063505,0.633620,1.908657e-03,0.655605,0.000085,0.049939
758,hsa-miR-1307-3p|0,ENST00000299275,-0.544467,1.336678e-06,0.070555,0.549221,-0.681155,8.469939e-04,0.566175,0.000085,0.049939
1193,hsa-miR-342-3p|0,ENST00000529064,-0.116529,3.222037e-01,0.481081,0.000025,-0.641451,1.675377e-03,0.640075,0.000085,0.050000


In [31]:
rna22_la_hyper = pvalue_cutoff(wrapped_hypergeom(rna22_la), 'AdjPvalue')
rna22_la_hyper.to_csv(OUTPUT_PATH/'hypergeom/6_rna22.csv', index=False)
rna22_la_hyper

Unnamed: 0,Molecule,Diff,Total,Proportion,Pvalue,AdjPvalue
0,hsa-miR-1307-3p|0,183,7043,0.025983,0.0,0.0
1,hsa-miR-93-5p|+3,127,6280,0.020223,0.0,0.0
8,hsa-let-7b-5p|0,72,7553,0.009533,0.0,0.0
6,hsa-miR-23a-3p|+1,59,2865,0.020593,0.0,0.0
5,hsa-miR-140-3p|0,41,2297,0.017849,0.0,0.0
11,hsa-miR-140-3p|+1,28,2928,0.009563,6.044054e-13,5.612336e-12
12,hsa-miR-17-3p|0,26,4416,0.005888,8.606815e-08,5.59443e-07
4,hsa-miR-99b-5p|+1,25,1897,0.013179,1.021405e-14,1.106522e-13
13,hsa-let-7i-5p|0,22,7176,0.003066,0.006829917,0.02611439
10,hsa-miR-16-5p|0,18,1825,0.009863,4.96445e-09,3.585436e-08


## DICER1 (ENST00000393063)

### Normal (exp 7)

TargetScan

In [80]:
ts_n = rc(OUTPUT_PATH/'experiments/7/ztest_ts.csv')
ts_n = adjust_permute(ts_n)

Pairs: 488477 | after cut: 0


In [33]:
pvalue_cutoff(ts_n, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue


RNA22

In [81]:
rna22_n = rc(OUTPUT_PATH/'experiments/7/ztest_rna22.csv')
rna22_n = adjust_permute(rna22_n)

Pairs: 513659 | after cut: 16


In [35]:
pvalue_cutoff(rna22_n, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue
518,hsa-miR-1307-5p|0,ENST00000413266,-0.30121,0.107666,0.802016,3.722291e-07,-1.415087,1.5e-05,1.0,0.0,0.0
549,hsa-miR-1307-5p|0,ENST00000420936,-0.232661,0.215917,0.768548,2.275446e-06,-1.253772,0.000128,1.0,0.0,0.0
6,hsa-miR-1307-5p|0,ENST00000261772,-0.356452,0.055758,0.829839,5.957517e-08,-1.560433,2e-06,0.959528,0.0,0.0
514,hsa-miR-15a-5p|0,ENST00000013807,-0.610887,0.000559,0.63629,0.0002851596,-1.462251,8e-06,1.0,9.734084e-07,0.041667
523,hsa-miR-1307-5p|0,ENST00000564370,-0.378226,0.041906,0.732661,1.131262e-05,-1.332437,4.7e-05,1.0,9.734084e-07,0.041667
515,hsa-miR-1307-5p|0,ENST00000373053,-0.367339,0.048432,0.790323,7.305377e-07,-1.457634,8e-06,1.0,9.734084e-07,0.041667
7,hsa-miR-196a-5p|0,ENST00000356970,0.491129,0.007209,-0.682258,7.141023e-05,1.370873,2.8e-05,0.963336,9.734084e-07,0.041667
527,hsa-miR-1307-5p|0,ENST00000300737,-0.4125,0.025873,0.704435,3.334066e-05,-1.314671,5.9e-05,1.0,1.460113e-06,0.041667
0,hsa-miR-196a-5p|0,ENST00000554015,0.745968,6e-06,-0.401613,0.03029203,1.389372,2.2e-05,0.865243,1.16809e-06,0.042857
517,hsa-miR-1307-5p|0,ENST00000610179,-0.355242,0.056624,0.785484,9.522953e-07,-1.43096,1.2e-05,1.0,1.265431e-06,0.043333


In [36]:
rna22_n_hyper = pvalue_cutoff(wrapped_hypergeom(rna22_n), 'AdjPvalue')
rna22_n_hyper.to_csv(OUTPUT_PATH/'hypergeom/7_rna22.csv', index=False)
rna22_n_hyper

Unnamed: 0,Molecule,Diff,Total,Proportion,Pvalue,AdjPvalue
0,hsa-miR-1307-5p|0,9,3281,0.002743,2.220446e-16,8.881784e-16
1,hsa-miR-15a-5p|0,3,2557,0.001173,6.573344e-05,0.0001314669
2,hsa-miR-196a-5p|0,3,4698,0.000639,0.0003916503,0.0005222003


### Luminal A (exp 8)

TargetScan

In [82]:
ts_n = rc(OUTPUT_PATH/'experiments/8/ztest_ts.csv')
ts_n = adjust_permute(ts_n)

Pairs: 455889 | after cut: 0


In [38]:
pvalue_cutoff(ts_n, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue


RNA22

In [83]:
rna22_n = rc(OUTPUT_PATH/'experiments/8/ztest_rna22.csv')
rna22_n = adjust_permute(rna22_n)

Pairs: 483291 | after cut: 0


In [40]:
pvalue_cutoff(rna22_n, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue


## DGCR8 (ENST00000351989)

### Luminal A (exp 9)

TargetScan

In [84]:
ts_n = rc(OUTPUT_PATH/'experiments/9/ztest_ts.csv')
ts_n = adjust_permute(ts_n)

Pairs: 455889 | after cut: 743


In [42]:
pvalue_cutoff(ts_n, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue
0,hsa-miR-140-3p|0,ENST00000308683,0.661223,9.917929e-10,-0.276444,0.018097,1.078812,1.256515e-07,0.057283,0.000000,0.000000
38,hsa-miR-361-3p|0,ENST00000373316,0.460854,5.797451e-05,-0.406202,0.000439,0.929449,5.279833e-06,0.300877,0.000000,0.000000
2,hsa-miR-23a-3p|+1,ENST00000319004,-0.616387,2.218929e-08,0.279431,0.016866,-1.006222,8.245699e-07,0.093978,0.000000,0.000000
3,hsa-miR-140-3p|0,ENST00000373314,0.661081,1.002455e-09,-0.233855,0.046082,1.032995,4.178905e-07,0.095256,0.000000,0.000000
32,hsa-miR-191-5p|0,ENST00000245539,-0.542049,1.510759e-06,0.308250,0.008254,-0.925663,5.765717e-06,0.292059,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
690,hsa-miR-16-5p|0,ENST00000326195,-0.403499,4.814341e-04,0.248506,0.033902,-0.681642,8.397433e-04,0.562985,0.000081,0.049932
601,hsa-miR-193a-5p|0,ENST00000376573,-0.370811,1.394332e-03,0.293144,0.012099,-0.691365,7.066572e-04,0.535145,0.000081,0.049932
677,hsa-miR-10b-5p|+1,ENST00000342374,0.314054,7.093165e-03,-0.342475,0.003248,0.681931,8.354805e-04,0.561779,0.000081,0.049933
426,hsa-miR-140-3p|0,ENST00000217402,0.478663,2.796023e-05,-0.196188,0.094806,0.720013,4.197741e-04,0.476046,0.000081,0.049933


In [43]:
ts_n_hyper = pvalue_cutoff(wrapped_hypergeom(ts_n), 'AdjPvalue')
ts_n_hyper.to_csv(OUTPUT_PATH/'hypergeom/9_ts.csv', index=False)
ts_n_hyper

Unnamed: 0,Molecule,Diff,Total,Proportion,Pvalue,AdjPvalue
0,hsa-miR-140-3p|0,169,4246,0.039802,0.0,0.0
4,hsa-miR-16-5p|0,76,4347,0.017483,0.0,0.0
9,hsa-miR-93-5p|+3,66,3927,0.016807,0.0,0.0
10,hsa-miR-140-3p|+1,58,4703,0.012333,0.0,0.0
6,hsa-miR-17-3p|0,42,4762,0.00882,0.0,0.0
1,hsa-miR-361-3p|0,38,4180,0.009091,0.0,0.0
8,hsa-miR-20a-5p|0,37,4863,0.007608,3.164136e-14,2.214895e-13
2,hsa-miR-23a-3p|+1,36,4386,0.008208,7.660539e-15,6.032674e-14
3,hsa-miR-191-5p|0,23,906,0.025386,0.0,0.0
13,hsa-miR-192-5p|0,15,3386,0.00443,0.0005769757,0.003304497


RNA22

In [None]:
rna22_n = rc(OUTPUT_PATH/'experiments/9/ztest_rna22.csv')
rna22_n = adjust_permute(rna22_n)

In [None]:
pvalue_cutoff(rna22_n, 'AdjPermutePvalue')

### Normal (exp 10)

TargetScan

In [86]:
ts_n = rc(OUTPUT_PATH/'experiments/10/ztest_ts.csv')
ts_n = adjust_permute(ts_n)

Pairs: 488477 | after cut: 193


In [88]:
pvalue_cutoff(ts_n, 'AdjPermutePvalue')

Unnamed: 0,Source,Target,RefCorr,RefPvalue,ExpCorr,ExpPvalue,Statistic,Pvalue,AdjPvalue,PermutePvalue,AdjPermutePvalue
78,hsa-miR-151a-3p|0,ENST00000377893,-0.647177,2.097337e-04,0.508871,0.005202,-1.331632,0.000047,0.192856,0.000000e+00,0.000000
31,hsa-miR-200b-3p|0,ENST00000263923,-0.901210,4.159900e-11,-0.044758,0.812752,-1.433835,0.000012,0.144639,4.094359e-07,0.033333
16,hsa-miR-203a-3p|+1,ENST00000371158,0.867742,2.360165e-09,-0.235081,0.211125,1.563425,0.000002,0.124570,9.212307e-07,0.034615
46,hsa-miR-151a-3p|0,ENST00000263636,0.611694,5.480214e-04,-0.602823,0.000685,1.409192,0.000017,0.166352,9.212307e-07,0.034615
37,hsa-miR-17-3p|0,ENST00000411734,0.590726,9.188853e-04,-0.633468,0.000308,1.425967,0.000013,0.150200,9.212307e-07,0.034615
...,...,...,...,...,...,...,...,...,...,...,...
260,hsa-miR-16-5p|0,ENST00000253462,-0.642742,2.380361e-04,0.460484,0.012234,-1.260757,0.000117,0.235819,2.139302e-05,0.050000
194,hsa-miR-200b-3p|0,ENST00000539065,0.792339,6.526293e-07,-0.200806,0.286194,1.281256,0.000091,0.225960,2.190482e-05,0.050000
64,hsa-miR-203a-3p|0,ENST00000251691,0.910484,9.843397e-12,0.190322,0.312297,1.337675,0.000044,0.190863,1.924348e-05,0.050000
1631,hsa-miR-140-3p|0,ENST00000441159,0.429839,1.994564e-02,-0.528629,0.003550,1.047939,0.001367,0.410701,1.924348e-05,0.050000


In [89]:
ts_n_hyper = pvalue_cutoff(wrapped_hypergeom(ts_n), 'AdjPvalue')
ts_n_hyper.to_csv(OUTPUT_PATH/'hypergeom/10_ts.csv', index=False)
ts_n_hyper

Unnamed: 0,Molecule,Diff,Total,Proportion,Pvalue,AdjPvalue
7,hsa-miR-140-3p|0,35,4523,0.007738,0.0,0.0
10,hsa-miR-205-5p|0,19,4036,0.004708,5.77316e-15,6.542914e-14
0,hsa-miR-151a-3p|0,16,2423,0.006603,5.218048e-15,8.870682e-14
8,hsa-miR-203a-3p|0,15,5086,0.002949,2.661335e-09,2.262135e-08
3,hsa-miR-17-3p|0,13,5027,0.002586,1.408111e-07,9.575154e-07
2,hsa-miR-203a-3p|+1,12,4941,0.002429,8.259282e-07,4.68026e-06
15,hsa-miR-140-3p|+1,11,4981,0.002208,5.83906e-06,2.836115e-05
4,hsa-miR-22-3p|+1,8,3567,0.002243,0.0001003333,0.0004264166
5,hsa-miR-200c-3p|0,8,4311,0.001856,0.000357019,0.001277752
1,hsa-miR-200b-3p|0,8,4311,0.001856,0.000357019,0.001277752


RNA22

In [87]:
rna22_n = rc(OUTPUT_PATH/'experiments/10/ztest_rna22.csv')
rna22_n = adjust_permute(rna22_n)

Pairs: 513659 | after cut: 118


In [None]:
pvalue_cutoff(rna22_n, 'AdjPermutePvalue')

In [90]:
rna22_n_hyper = pvalue_cutoff(wrapped_hypergeom(rna22_n), 'AdjPvalue')
rna22_n_hyper.to_csv(OUTPUT_PATH/'hypergeom/10_rna22.csv', index=False)
rna22_n_hyper

Unnamed: 0,Molecule,Diff,Total,Proportion,Pvalue,AdjPvalue
0,hsa-miR-151a-3p|0,39,4919,0.007928,0.0,0.0
2,hsa-miR-205-5p|0,15,6065,0.002473,1.373157e-11,1.92242e-10
1,hsa-miR-140-3p|0,10,2464,0.004058,3.861773e-10,3.604322e-09
7,hsa-miR-17-3p|0,7,4652,0.001505,0.0001093004,0.0007651029
5,hsa-miR-200a-5p|0,6,4596,0.001305,0.0007181284,0.004021519
3,hsa-miR-140-3p|+1,4,3128,0.001279,0.006080639,0.02432255
14,hsa-miR-200a-3p|0,3,1474,0.002035,0.004925605,0.02298616
