In [1]:
import numpy as np
import pandas as pd
import adafdr.method as md
import adafdr.data_loader as dl
import matplotlib.pyplot as plt
import pickle
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
output_folder = '/home/martin/NeuralFDR2/AdaFDRpaper/experiments_v1/temp_result'
output_folder_fig = '/home/martin/NeuralFDR2/figures/fig_alg'

In [3]:
data = {}
# small gtex Adipose_Subcutaneous
data_name = 'Adipose_Subcutaneous'
data['small_gtex_Adipose_Subcutaneous'] = dl.data_small_gtex_chr21(opt=data_name)
# small gtex Adipose_Visceral_Omentum
data_name = 'Adipose_Visceral_Omentum'
data['small_gtex_Adipose_Visceral_Omentum'] = dl.data_small_gtex_chr21(opt=data_name)
# RNAseq: bottomly
data['bottomly'] = dl.data_bottomly()
# RNAseq: pasilla
data['pasilla'] = dl.data_pasilla()
# RNAseq: airway
data['airway'] = dl.data_airway()
# microbiome_enigma_ph
data_path = '/data3/martin/AdaFDRpaper_data'
file_path = data_path + '/microbiome/microbiome_enigma_ph'
df_data = pd.read_csv(file_path, sep=',')
p = df_data['p_val'].as_matrix()
x = df_data[['ubiquity', 'mean_abun']].as_matrix()
data['microbiome_enigma_ph'] = (p,x)
# microbiome_enigma_al
data_path = '/data3/martin/AdaFDRpaper_data'
file_path = data_path + '/microbiome/microbiome_enigma_al'
df_data = pd.read_csv(file_path, sep=',')
p = df_data['p_val'].as_matrix()
x = df_data[['ubiquity', 'mean_abun']].as_matrix()
data['microbiome_enigma_al'] = (p,x)
# proteomics
data_path = '/data3/martin/AdaFDRpaper_data'
file_path = data_path + '/proteomics/proteomics'
df_data = pd.read_csv(file_path, sep=',')
p = df_data['p_val'].as_matrix()
x = df_data['x'].as_matrix()
data['proteomics'] = (p,x)
# fmri_auditory
data_path = '/data3/martin/AdaFDRpaper_data'
file_path = data_path + '/fmri/fmri_auditory'
df_fmri = pd.read_csv(file_path, sep=',')
p = df_fmri['p_val'].as_matrix()
x = df_fmri['B_label'].as_matrix()
data['fmri_auditory'] = (p,x)
# fmri_imagination
data_path = '/data3/martin/AdaFDRpaper_data'
file_path = data_path + '/fmri/fmri_imagination'
df_fmri = pd.read_csv(file_path, sep=',')
p = df_fmri['p_val'].as_matrix()
x = df_fmri['B_label'].as_matrix()
data['fmri_imagination'] = (p,x)

In [7]:
data_list = list(data.keys())
alpha_dic = {'small_gtex_Adipose_Subcutaneous': 0.01, 
             'small_gtex_Adipose_Visceral_Omentum': 0.01,
             'bottomly':0.1,
             'pasilla':0.1,
             'airway':0.1,
             'microbiome_enigma_ph': 0.2,
             'microbiome_enigma_al': 0.2,
             'proteomics': 0.1,
             'fmri_auditory': 0.1,
             'fmri_imagination': 0.1}
fast_list = ['microbiome_enigma_ph','microbiome_enigma_al','proteomics',
             'fmri_auditory','fmri_imagination']

# Computation

In [5]:
n_rep = 10
decision_dic = {}
for data_name in data_list:
    p = data[data_name][0]
    x = data[data_name][1]
    alpha = alpha_dic[data_name]
    n_sample = p.shape[0]
    decision = np.zeros([n_rep,n_sample], dtype=bool)
    if data_name in fast_list:
        fast_mode = True
    else:
        fast_mode = False
    for i_rep in range(n_rep):
        print('%s: rep %d'%(data_name,i_rep))
        res = md.adafdr_test(p, x, fast_mode=fast_mode, alpha=alpha, 
                             random_state=i_rep)
        decision[i_rep, :] = res['decision']
    decision_dic[data_name] = decision

small_gtex_Adipose_Subcutaneous: rep 0
## total rejection: 1469

small_gtex_Adipose_Subcutaneous: rep 1
## total rejection: 1562

small_gtex_Adipose_Subcutaneous: rep 2
## total rejection: 1446

small_gtex_Adipose_Subcutaneous: rep 3
## total rejection: 1469

small_gtex_Adipose_Subcutaneous: rep 4
## total rejection: 1426

small_gtex_Adipose_Subcutaneous: rep 5
## total rejection: 1515

small_gtex_Adipose_Subcutaneous: rep 6
## total rejection: 1570

small_gtex_Adipose_Subcutaneous: rep 7
## total rejection: 1444

small_gtex_Adipose_Subcutaneous: rep 8
## total rejection: 1469

small_gtex_Adipose_Subcutaneous: rep 9
## total rejection: 1469

small_gtex_Adipose_Visceral_Omentum: rep 0
## total rejection: 1360

small_gtex_Adipose_Visceral_Omentum: rep 1
## total rejection: 1367

small_gtex_Adipose_Visceral_Omentum: rep 2
## total rejection: 1427

small_gtex_Adipose_Visceral_Omentum: rep 3
## total rejection: 1400

small_gtex_Adipose_Visceral_Omentum: rep 4
## total rejection: 1358

small

KeyboardInterrupt: 

In [None]:
fil = open(output_folder+'/res_stability.pickle','wb') 
pickle.dump(decision_dic, fil)
fil.close()

# Analysis

In [4]:
fil = open(output_folder+'/res_stability.pickle','rb') 
decision_dic = pickle.load(fil)
fil.close()

In [5]:
name_dic = {'small_gtex_Adipose_Subcutaneous': 'small GTEx:\nAdipose_Subcutaneous', 
            'small_gtex_Adipose_Visceral_Omentum': 'small GTEx:\nAdipose_Visceral_Omentum',
            'bottomly': 'RNA-Seq: Bottomly',
            'pasilla': 'RNA-Seq: Pasilla',
            'airway': 'RNA-Seq: airway',
            'microbiome_enigma_ph': 'microbiome: enigma_ph',
            'microbiome_enigma_al': 'microbiome: enigma_al',
            'proteomics': 'proteomics',
            'fmri_auditory': 'fMRI: auditory',
            'fmri_imagination': 'fMRI: imagination'}

In [32]:
temp_ind = np.arange(50)*2
for data_name in data_list:
    rep_1 = decision_dic[data_name][temp_ind,:]
    rep_2 = decision_dic[data_name][temp_ind+1,:]
    v_rej = np.sum(rep_1, axis=1)
    v_overlap = np.sum((rep_1==True)&(rep_2==True), axis=1)
    
    print(data_name)
    print('Rep1 reject=%d, std=%d'%(np.mean(v_rej),np.std(v_rej)))
    print('Overlap=%d, std=%d, percentage=%0.1f, std percent=%0.1f'
          %(np.mean(v_overlap),np.std(v_overlap), 100*np.mean(v_overlap)/np.mean(v_rej),
            100*np.std(v_overlap)/np.mean(v_rej)))
    print()

small_gtex_Adipose_Subcutaneous
Rep1 reject=1491, std=41
Overlap=1408, std=25, percentage=94.5, std percent=1.7

small_gtex_Adipose_Visceral_Omentum
Rep1 reject=1396, std=96
Overlap=1250, std=87, percentage=89.5, std percent=6.2

bottomly
Rep1 reject=2147, std=38
Overlap=2010, std=36, percentage=93.6, std percent=1.7

pasilla
Rep1 reject=830, std=15
Overlap=784, std=14, percentage=94.4, std percent=1.7

airway
Rep1 reject=6041, std=33
Overlap=5871, std=46, percentage=97.2, std percent=0.8

microbiome_enigma_ph
Rep1 reject=119, std=8
Overlap=104, std=5, percentage=87.8, std percent=4.8

microbiome_enigma_al
Rep1 reject=480, std=46
Overlap=396, std=42, percentage=82.4, std percent=8.8

proteomics
Rep1 reject=408, std=18
Overlap=366, std=14, percentage=89.6, std percent=3.5

fmri_auditory
Rep1 reject=1066, std=10
Overlap=1033, std=11, percentage=96.9, std percent=1.1

fmri_imagination
Rep1 reject=2233, std=12
Overlap=2178, std=13, percentage=97.6, std percent=0.6

