## Profiling of FFSR Functions

In [14]:
import ffsr as f

In [15]:
import numpy as np
import pandas as pd

np.random.seed(1234)

X = np.random.multivariate_normal(np.zeros(15),np.eye(15),(100))
beta = np.array([0,0,5,6,0,0,4,0,0,0,5,0,0,0,0]).reshape(15,1) # signif betas: 3,4,7,11
Y = X.dot(beta)
Y2 = pd.DataFrame(Y)
X2 = pd.DataFrame(X)
X2.columns = ["V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11","V12","V13","V14","V15"]
d = pd.concat([Y2,X2],axis=1)

In [16]:
%%time
fres = f.ffsr(d,0.05)

CPU times: user 20.2 ms, sys: 1.43 ms, total: 21.6 ms
Wall time: 21.4 ms


In [17]:
pstats = %prun -r -q f.ffsr(d, 0.05)

 

In [18]:
pstats.sort_stats('time').print_stats(10);

         21289 function calls (21226 primitive calls) in 0.060 seconds

   Ordered by: internal time
   List reduced from 356 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       23    0.005    0.000    0.007    0.000 functions.py:95(__call__)
     3254    0.005    0.000    0.008    0.000 numeric.py:1910(isscalar)
     1601    0.004    0.000    0.020    0.000 common.py:261(notnull)
     1618    0.004    0.000    0.011    0.000 common.py:132(_isnull_new)
     4157    0.003    0.000    0.004    0.000 {isinstance}
        2    0.003    0.002    0.042    0.021 common.py:282(convert_to_r_dataframe)
     1618    0.002    0.000    0.006    0.000 {pandas.lib.isscalar}
     1618    0.002    0.000    0.013    0.000 common.py:111(isnull)
        2    0.002    0.001    0.002    0.001 {method 'rcall' of 'rpy2.rinterface.SexpClosure' objects}
       90    0.001    0.000    0.002    0.000 vectors.py:230(__init__)




In [19]:
pstats = %prun -r -q f.bagfsr(d, 0.05)

 

In [20]:
pstats.sort_stats('time').print_stats(10);

         4844418 function calls (4824089 primitive calls) in 12.109 seconds

   Ordered by: internal time
   List reduced from 1457 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
   656043    0.893    0.000    1.386    0.000 numeric.py:1910(isscalar)
   930418    0.788    0.000    0.922    0.000 {isinstance}
   320001    0.688    0.000    3.639    0.000 common.py:261(notnull)
     4600    0.643    0.000    0.958    0.000 functions.py:95(__call__)
   326408    0.618    0.000    2.124    0.000 common.py:132(_isnull_new)
      400    0.500    0.001    7.503    0.019 common.py:282(convert_to_r_dataframe)
   326408    0.474    0.000    1.168    0.000 {pandas.lib.isscalar}
      400    0.408    0.001    0.408    0.001 {method 'rcall' of 'rpy2.rinterface.SexpClosure' objects}
   326408    0.404    0.000    2.527    0.000 common.py:111(isnull)
    13800    0.225    0.000    0.527    0.000 __init__.py:61(sexpvector_to_ro)




In [21]:
%load_ext line_profiler

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


In [22]:
lstats = %lprun -r -f f.ffsr f.ffsr(d, 0.05)

In [15]:
lstats.print_stats()

Timer unit: 1e-06 s

Total time: 0.030589 s
File: ffsr.py
Function: ffsr at line 385

Line #      Hits         Time  Per Hit   % Time  Line Contents
   385                                           def ffsr(dat,g0=0.05,betaout=False,gs=None,max_size=None,var_incl=None,prec_f='.4f'):
   386                                               
   387                                               """
   388                                               ### Purpose:
   389                                               #   Perform the Fast False Selection Rate procedure with linear regression.
   390                                               
   391                                               ### NOTE: Outcome variable must be in FIRST column of dataset 'dat'
   392                                               
   393                                               ### NOTE: If bagging necessary with FFSR, use function 'bagfsr()' in this module.
   394                                        

In [16]:
lstats = %lprun -r -f bagfsr bagfsr(d, 0.05)

In [17]:
lstats.print_stats()

Timer unit: 1e-06 s

Total time: 6.77536 s
File: ffsr.py
Function: bagfsr at line 556

Line #      Hits         Time  Per Hit   % Time  Line Contents
   556                                           def bagfsr(dat,g0,B=200,max_s=None,v_incl=None,prec=4):
   557                                               
   558                                               """
   559                                               ### Purpose:
   560                                               #   Perform bagging with Fast False Selection Rate procedure to allow for more accurate predictions.
   561                                               
   562                                               ### NOTE: appropriate covariate transformations are expected to have been applied prior 
   563                                               ###       to utilization of this FSR algorithm.
   564                                                
   565                                               ### Input

In [33]:
%load_ext memory_profiler

In [35]:
%memit f.ffsr(d,0.05)

peak memory: 120.65 MiB, increment: 0.01 MiB


In [36]:
mstats = %mprun -f f.ffsr f.ffsr(d,0.05)

('',)


In [37]:
%memit f.bagfsr(d,0.05)

peak memory: 120.70 MiB, increment: 0.00 MiB


In [38]:
mstats = %mprun -f f.bagfsr f.bagfsr(d,0.05)

('',)
