# Running MCCV to Predict Classes from Biomarker Distributions

In [1]:
import numpy as np
import pandas as pd
import mccv
import time

In [2]:
mccv.__package__

'mccv'

## Import datasets

In [3]:
N = 50
Nparam = 500
dists = ['normal','t','beta']
mccv_data = {}
for dist in dists:
    mccv_data[dist] = pd.read_csv('data/mccv_'+str(N)+
                               'subjects_'+str(Nparam)+
                               'parameters_'+str(dist)+'_data.csv',
                                 index_col=0)

In [4]:
mccv_data['normal']

Unnamed: 0,class,distribution,biomarker,seed,mu,sigma
subject0,1,normal,-3.640308,0,-3.960959,2.550307
subject1,1,normal,-4.297867,0,-3.960959,2.550307
subject2,1,normal,-2.327685,0,-3.960959,2.550307
subject3,1,normal,-3.693431,0,-3.960959,2.550307
subject4,1,normal,-5.327080,0,-3.960959,2.550307
...,...,...,...,...,...,...
subject95,0,normal,-4.781341,499,-4.733145,0.105035
subject96,0,normal,-4.745675,499,-4.733145,0.105035
subject97,0,normal,-4.701267,499,-4.733145,0.105035
subject98,0,normal,-4.652756,499,-4.733145,0.105035


## Timing mccv procedure on 1 dataset

In [5]:
t0 = time.time()

dist='normal'
i=1
mccv_obj = mccv.mccv(num_bootstraps=200)

mccv_obj.model_names = \
['Logistic Regression']

X = mccv_data[dist].query('seed==@i').loc[:,['biomarker']]
Y = mccv_data[dist].query('seed==@i').loc[:,['class']]

mccv_obj.set_X(X)
mccv_obj.set_Y(Y)

mccv_obj.run_mccv()

t1 = time.time()

print(str(np.round(((t1-t0)/60),2)) + 
      ' minutes to run mccv')

mccv_obj.run_permuted_mccv()

t2 = time.time()

print(str(np.round(((t2-t1)/60),2)) + 
      ' minutes to run permuted mccv')

print(str(np.round(((t2-t0)/60),2)) + 
      ' minutes to run mccv proocedures')

for key in mccv_obj.mccv_data.keys():
    tmp = mccv_obj.mccv_data[key]
    tmp['distribution'] = dist
    tmp['seed'] = i
    #display(tmp)
    tmp.to_csv('data/mccv_test_output/'+dist+'_data_mccv_'+key.lower().replace(' ','_')+'.csv')
    
for key in mccv_obj.mccv_permuted_data.keys():
    tmp = mccv_obj.mccv_permuted_data[key]
    tmp['distribution'] = dist
    tmp['seed'] = i
    #display(tmp)
    tmp.to_csv('data/mccv_test_output/'+dist+'_data_mccv_permuted_'+key.lower().replace(' ','_')+'.csv')

0.15 minutes to run mccv
0.1 minutes to run permuted mccv
0.25 minutes to run mccv proocedures


## Dictionary of datasets

In [6]:
Ns = [int(50)]
Nparams = [int(500)]
dists = ['normal','t','beta']
mccv_data = {}
for N in Ns:
    for Nparam in Nparams:
        for dist in dists:
            mccv_data[dist] = [x for x in (pd.read_csv('data/mccv_'+str(N)+
                               'subjects_'+str(Nparam)+
                               'parameters_'+str(dist)+'_data.csv',
                                 index_col=0).
                         groupby('seed')
                        )]

## Running MCCV algorithm on datasets

In [8]:
model_names = \
['Logistic Regression']
metrics = \
['roc_auc','average_precision']
nboots = 200

for N in Ns:
    for Nparam in Nparams:
        for dist in dists:
            print('\n'+dist+'\n')
            tupl = mccv_data[dist]
            for arr in tupl:
                t0 = time.time()
                seed, dataset = arr
                print('biomarker'+str(seed))
                
                mccv_obj = mccv.mccv(num_bootstraps=nboots)
                
                mccv_obj.model_names = model_names
                mccv_obj.metrics = metrics
                mccv_obj.n_jobs = 4
                
                X = dataset.loc[:,['biomarker']]
                Y = dataset.loc[:,['class']]

                mccv_obj.set_X(X)
                mccv_obj.set_Y(Y)

                mccv_obj.run_mccv()

                t1 = time.time()

                mccv_obj.run_permuted_mccv()

                t2 = time.time()

                print('\t'+str(np.round(((t2-t0)/60),2)) + 
                      ' minutes to run mccv procedures')
                
                for key in mccv_obj.mccv_data.keys():
                    tmp = mccv_obj.mccv_data[key]
                    tmp['distribution'] = dist
                    tmp['seed'] = seed
                    tmp.to_csv('data/mccv_output/'+dist+'/real/'+
                               str(seed)+'data_mccv_'+
                               key.lower().replace(' ','_')+'.csv')

                for key in mccv_obj.mccv_permuted_data.keys():
                    tmp = mccv_obj.mccv_permuted_data[key]
                    tmp['distribution'] = dist
                    tmp['seed'] = seed
                    tmp.to_csv('data/mccv_output/'+dist+'/permuted/'+
                               str(seed)+'data_mccv_permuted_'+
                               key.lower().replace(' ','_')+'.csv')


normal

biomarker0
	0.15 minutes to run mccv procedures
biomarker1
	0.16 minutes to run mccv procedures
biomarker2
	0.16 minutes to run mccv procedures
biomarker3
	0.17 minutes to run mccv procedures
biomarker4
	0.18 minutes to run mccv procedures
biomarker5
	0.18 minutes to run mccv procedures
biomarker6
	0.18 minutes to run mccv procedures
biomarker7
	0.17 minutes to run mccv procedures
biomarker8
	0.2 minutes to run mccv procedures
biomarker9
	0.19 minutes to run mccv procedures
biomarker10
	0.19 minutes to run mccv procedures
biomarker11
	0.18 minutes to run mccv procedures
biomarker12
	0.19 minutes to run mccv procedures
biomarker13
	0.19 minutes to run mccv procedures
biomarker14
	0.19 minutes to run mccv procedures
biomarker15
	0.17 minutes to run mccv procedures
biomarker16
	0.16 minutes to run mccv procedures
biomarker17
	0.16 minutes to run mccv procedures
biomarker18
	0.17 minutes to run mccv procedures
biomarker19
	0.18 minutes to run mccv procedures
biomarker20
	0.18 minu

	0.18 minutes to run mccv procedures
biomarker167
	0.16 minutes to run mccv procedures
biomarker168
	0.18 minutes to run mccv procedures
biomarker169
	0.17 minutes to run mccv procedures
biomarker170
	0.17 minutes to run mccv procedures
biomarker171
	0.19 minutes to run mccv procedures
biomarker172
	0.18 minutes to run mccv procedures
biomarker173
	0.18 minutes to run mccv procedures
biomarker174
	0.18 minutes to run mccv procedures
biomarker175
	0.2 minutes to run mccv procedures
biomarker176
	0.18 minutes to run mccv procedures
biomarker177
	0.19 minutes to run mccv procedures
biomarker178
	0.18 minutes to run mccv procedures
biomarker179
	0.19 minutes to run mccv procedures
biomarker180
	0.19 minutes to run mccv procedures
biomarker181
	0.18 minutes to run mccv procedures
biomarker182
	0.19 minutes to run mccv procedures
biomarker183
	0.19 minutes to run mccv procedures
biomarker184
	0.21 minutes to run mccv procedures
biomarker185
	0.19 minutes to run mccv procedures
biomarker186
	

	0.21 minutes to run mccv procedures
biomarker332
	0.21 minutes to run mccv procedures
biomarker333
	0.21 minutes to run mccv procedures
biomarker334
	0.17 minutes to run mccv procedures
biomarker335
	0.18 minutes to run mccv procedures
biomarker336
	0.19 minutes to run mccv procedures
biomarker337
	0.18 minutes to run mccv procedures
biomarker338
	0.17 minutes to run mccv procedures
biomarker339
	0.19 minutes to run mccv procedures
biomarker340
	0.21 minutes to run mccv procedures
biomarker341
	0.19 minutes to run mccv procedures
biomarker342
	0.18 minutes to run mccv procedures
biomarker343
	0.18 minutes to run mccv procedures
biomarker344
	0.18 minutes to run mccv procedures
biomarker345
	0.19 minutes to run mccv procedures
biomarker346
	0.2 minutes to run mccv procedures
biomarker347
	0.19 minutes to run mccv procedures
biomarker348
	0.2 minutes to run mccv procedures
biomarker349
	0.19 minutes to run mccv procedures
biomarker350
	0.19 minutes to run mccv procedures
biomarker351
	0

	0.18 minutes to run mccv procedures
biomarker497
	0.18 minutes to run mccv procedures
biomarker498
	0.19 minutes to run mccv procedures
biomarker499
	0.19 minutes to run mccv procedures

t

biomarker0
	0.18 minutes to run mccv procedures
biomarker1
	0.18 minutes to run mccv procedures
biomarker2
	0.18 minutes to run mccv procedures
biomarker3
	0.2 minutes to run mccv procedures
biomarker4
	0.2 minutes to run mccv procedures
biomarker5
	0.19 minutes to run mccv procedures
biomarker6
	0.21 minutes to run mccv procedures
biomarker7
	0.2 minutes to run mccv procedures
biomarker8
	0.19 minutes to run mccv procedures
biomarker9
	0.17 minutes to run mccv procedures
biomarker10
	0.17 minutes to run mccv procedures
biomarker11
	0.18 minutes to run mccv procedures
biomarker12
	0.18 minutes to run mccv procedures
biomarker13
	0.18 minutes to run mccv procedures
biomarker14
	0.18 minutes to run mccv procedures
biomarker15
	0.17 minutes to run mccv procedures
biomarker16
	0.19 minutes to run mccv 

	0.18 minutes to run mccv procedures
biomarker164
	0.17 minutes to run mccv procedures
biomarker165
	0.19 minutes to run mccv procedures
biomarker166
	0.18 minutes to run mccv procedures
biomarker167
	0.19 minutes to run mccv procedures
biomarker168
	0.17 minutes to run mccv procedures
biomarker169
	0.18 minutes to run mccv procedures
biomarker170
	0.18 minutes to run mccv procedures
biomarker171
	0.19 minutes to run mccv procedures
biomarker172
	0.19 minutes to run mccv procedures
biomarker173
	0.17 minutes to run mccv procedures
biomarker174
	0.18 minutes to run mccv procedures
biomarker175
	0.18 minutes to run mccv procedures
biomarker176
	0.19 minutes to run mccv procedures
biomarker177
	0.17 minutes to run mccv procedures
biomarker178
	0.18 minutes to run mccv procedures
biomarker179
	0.19 minutes to run mccv procedures
biomarker180
	0.19 minutes to run mccv procedures
biomarker181
	0.17 minutes to run mccv procedures
biomarker182
	0.18 minutes to run mccv procedures
biomarker183


	0.17 minutes to run mccv procedures
biomarker328
	0.17 minutes to run mccv procedures
biomarker329
	0.18 minutes to run mccv procedures
biomarker330
	0.18 minutes to run mccv procedures
biomarker331
	0.17 minutes to run mccv procedures
biomarker332
	0.17 minutes to run mccv procedures
biomarker333
	0.18 minutes to run mccv procedures
biomarker334
	0.17 minutes to run mccv procedures
biomarker335
	0.16 minutes to run mccv procedures
biomarker336
	0.17 minutes to run mccv procedures
biomarker337
	0.17 minutes to run mccv procedures
biomarker338
	0.17 minutes to run mccv procedures
biomarker339
	0.17 minutes to run mccv procedures
biomarker340
	0.17 minutes to run mccv procedures
biomarker341
	0.18 minutes to run mccv procedures
biomarker342
	0.17 minutes to run mccv procedures
biomarker343
	0.17 minutes to run mccv procedures
biomarker344
	0.17 minutes to run mccv procedures
biomarker345
	0.17 minutes to run mccv procedures
biomarker346
	0.18 minutes to run mccv procedures
biomarker347


	0.18 minutes to run mccv procedures
biomarker492
	0.18 minutes to run mccv procedures
biomarker493
	0.17 minutes to run mccv procedures
biomarker494
	0.19 minutes to run mccv procedures
biomarker495
	0.18 minutes to run mccv procedures
biomarker496
	0.18 minutes to run mccv procedures
biomarker497
	0.17 minutes to run mccv procedures
biomarker498
	0.17 minutes to run mccv procedures
biomarker499
	0.18 minutes to run mccv procedures

beta

biomarker0
	0.17 minutes to run mccv procedures
biomarker1
	0.17 minutes to run mccv procedures
biomarker2
	0.17 minutes to run mccv procedures
biomarker3
	0.17 minutes to run mccv procedures
biomarker4
	0.16 minutes to run mccv procedures
biomarker5
	0.15 minutes to run mccv procedures
biomarker6
	0.17 minutes to run mccv procedures
biomarker7
	0.16 minutes to run mccv procedures
biomarker8
	0.17 minutes to run mccv procedures
biomarker9
	0.16 minutes to run mccv procedures
biomarker10
	0.16 minutes to run mccv procedures
biomarker11
	0.16 minutes t

	0.17 minutes to run mccv procedures
biomarker158
	0.16 minutes to run mccv procedures
biomarker159
	0.17 minutes to run mccv procedures
biomarker160
	0.17 minutes to run mccv procedures
biomarker161
	0.18 minutes to run mccv procedures
biomarker162
	0.16 minutes to run mccv procedures
biomarker163
	0.17 minutes to run mccv procedures
biomarker164
	0.17 minutes to run mccv procedures
biomarker165
	0.18 minutes to run mccv procedures
biomarker166
	0.16 minutes to run mccv procedures
biomarker167
	0.17 minutes to run mccv procedures
biomarker168
	0.18 minutes to run mccv procedures
biomarker169
	0.16 minutes to run mccv procedures
biomarker170
	0.16 minutes to run mccv procedures
biomarker171
	0.17 minutes to run mccv procedures
biomarker172
	0.17 minutes to run mccv procedures
biomarker173
	0.18 minutes to run mccv procedures
biomarker174
	0.16 minutes to run mccv procedures
biomarker175
	0.17 minutes to run mccv procedures
biomarker176
	0.17 minutes to run mccv procedures
biomarker177


	0.17 minutes to run mccv procedures
biomarker322
	0.17 minutes to run mccv procedures
biomarker323
	0.17 minutes to run mccv procedures
biomarker324
	0.18 minutes to run mccv procedures
biomarker325
	0.17 minutes to run mccv procedures
biomarker326
	0.17 minutes to run mccv procedures
biomarker327
	0.17 minutes to run mccv procedures
biomarker328
	0.17 minutes to run mccv procedures
biomarker329
	0.19 minutes to run mccv procedures
biomarker330
	0.16 minutes to run mccv procedures
biomarker331
	0.17 minutes to run mccv procedures
biomarker332
	0.17 minutes to run mccv procedures
biomarker333
	0.18 minutes to run mccv procedures
biomarker334
	0.16 minutes to run mccv procedures
biomarker335
	0.17 minutes to run mccv procedures
biomarker336
	0.17 minutes to run mccv procedures
biomarker337
	0.19 minutes to run mccv procedures
biomarker338
	0.16 minutes to run mccv procedures
biomarker339
	0.17 minutes to run mccv procedures
biomarker340
	0.17 minutes to run mccv procedures
biomarker341


	0.17 minutes to run mccv procedures
biomarker486
	0.19 minutes to run mccv procedures
biomarker487
	0.17 minutes to run mccv procedures
biomarker488
	0.18 minutes to run mccv procedures
biomarker489
	0.18 minutes to run mccv procedures
biomarker490
	0.19 minutes to run mccv procedures
biomarker491
	0.2 minutes to run mccv procedures
biomarker492
	0.17 minutes to run mccv procedures
biomarker493
	0.18 minutes to run mccv procedures
biomarker494
	0.18 minutes to run mccv procedures
biomarker495
	0.19 minutes to run mccv procedures
biomarker496
	0.17 minutes to run mccv procedures
biomarker497
	0.18 minutes to run mccv procedures
biomarker498
	0.18 minutes to run mccv procedures
biomarker499
	0.19 minutes to run mccv procedures
