Merge pull request #247 from ljchang/tests

refactored tests
cosanlab · Jul 29, 2018 · 88ee7c1 · 88ee7c1
2 parents f8e9b43 + b483ee6
commit 88ee7c1
Show file tree

Hide file tree

Showing 26 changed files with 855 additions and 1,258 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -25,6 +25,8 @@ install:
   - pip install -r requirements.txt
   - pip install -r optional-dependencies.txt
   - python setup.py install
+  - cp nltools/tests/matplotlibrc .
+
 
 script: coverage run --source nltools -m py.test
 

diff --git a/README.md b/README.md
@@ -1,20 +1,21 @@
 [![PyPI version](https://badge.fury.io/py/nltools.svg)](https://badge.fury.io/py/nltools)
 [![Build Status](https://api.travis-ci.org/ljchang/nltools.png)](https://travis-ci.org/ljchang/nltools/)
 [![Coverage Status](https://coveralls.io/repos/github/ljchang/nltools/badge.svg?branch=master)](https://coveralls.io/github/ljchang/nltools?branch=master)
+[![Codacy Badge](https://api.codacy.com/project/badge/Grade/625677967a0749299f38c2bf8ee269c3)](https://www.codacy.com/app/ljchang/nltools?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=ljchang/nltools&amp;utm_campaign=Badge_Grade)
 [![Documentation Status](https://readthedocs.org/projects/neurolearn/badge/?version=latest)](http://neurolearn.readthedocs.io/en/latest/?badge=latest)
 
 # NLTools
 Python toolbox for analyzing neuroimaging data.  Compatible with both Python 2.7 and Python 3.6.  It is particularly useful for conducting multivariate analyses.  It is originally based on Tor Wager's object oriented matlab [canlab core tools](http://wagerlab.colorado.edu/tools) and relies heavily on [nilearn](http://nilearn.github.io) and [scikit learn](http://scikit-learn.org/stable/index.html)
 
 ### Installation
 1. Method 1
-  
+
    ```
    pip install nltools
    ```
 
 2. Method 2 (Recommended)
-  
+
    ```
    pip install git+https://github.com/ljchang/neurolearn
    ```
@@ -43,13 +44,13 @@ nltools requires several dependencies.  All are available in pypi.  Can use `pip
  - six
  - pynv
  - joblib
- 
+
 ### Optional Dependencies
  - mne
  - requests
  - networkx
  - ipywidgets >=5.2.2
- 
+
 ### Documentation
 Current Documentation can be found at [readthedocs](http://neurolearn.readthedocs.org/en/latest).  
 

diff --git a/nltools/__init__.py b/nltools/__init__.py
@@ -8,7 +8,6 @@
 			'stats',
             'utils',
             'file_reader',
-			'pbs_job',
 			'mask',
             'prefs',
             'external',
@@ -21,7 +20,6 @@
                     Groupby,
                     Design_Matrix,
                     Design_Matrix_Series)
-from .pbs_job import PBS_Job
 from .simulator import Simulator
 from .prefs import MNI_Template, resolve_mni_path
 from .version import __version__

diff --git a/nltools/analysis.py b/nltools/analysis.py
@@ -14,8 +14,6 @@
 import pandas as pd
 import numpy as np
 from nltools.plotting import roc_plot
-from nltools.stats import pearson
-from nltools.utils import get_resource_path
 from scipy.stats import norm, binom_test
 from sklearn.metrics import auc
 from copy import deepcopy
@@ -107,10 +105,12 @@ def calculate(self, input_values=None, binary_outcome=None,
 
         if self.forced_choice is not None:
             sub_idx = np.unique(self.forced_choice)
-            assert len(sub_idx) == len(self.binary_outcome)/2, ("Make sure "
-                        "that subject ids are correct for 'forced_choice'.")
-            assert len(set(sub_idx).union(set(np.array(self.forced_choice)[self.binary_outcome]))) == len(sub_idx), "Issue with forced_choice subject labels."
-            assert len(set(sub_idx).union(set(np.array(self.forced_choice)[~self.binary_outcome]))) == len(sub_idx), "Issue with forced_choice subject labels."
+            if len(sub_idx) != len(self.binary_outcome)/2:
+                raise ValueError("Make sure that subject ids are correct for 'forced_choice'.")
+            if len(set(sub_idx).union(set(np.array(self.forced_choice)[self.binary_outcome]))) != len(sub_idx):
+                raise ValueError("Issue with forced_choice subject labels.")
+            if len(set(sub_idx).union(set(np.array(self.forced_choice)[~self.binary_outcome]))) != len(sub_idx):
+                raise ValueError("Issue with forced_choice subject labels.")
             for sub in sub_idx:
                 sub_mn = (self.input_values[(self.forced_choice == sub) & (self.binary_outcome == True)]+self.input_values[(self.forced_choice == sub) & (self.binary_outcome == False)])[0]/2
                 self.input_values[(self.forced_choice == sub) & (self.binary_outcome == True)] = self.input_values[(self.forced_choice == sub) & (self.binary_outcome == True)][0] - sub_mn

diff --git a/nltools/cross_validation.py b/nltools/cross_validation.py
@@ -17,7 +17,6 @@
 from sklearn.model_selection._split import _BaseKFold
 from sklearn.utils.validation import check_array
 import numpy as np
-import random
 import pandas as pd
 
 class KFoldStratified(_BaseKFold):
@@ -97,7 +96,7 @@ def set_cv(Y=None, cv_dict=None):
 
      """
 
-    if type(cv_dict) is dict:
+    if isinstance(cv_dict, dict):
         if cv_dict['type'] == 'kfolds':
             if 'subject_id' in cv_dict: # Hold out subjects within each fold
                 from sklearn.model_selection import GroupKFold

diff --git a/nltools/data/adjacency.py b/nltools/data/adjacency.py
@@ -111,9 +111,11 @@ def __init__(self, data=None, Y=None, matrix_type=None, labels=None,
             self.Y = pd.DataFrame()
 
         if labels is not None:
-            assert isinstance(labels, (list, np.ndarray)), "Make sure labels is a list or numpy array."
+            if not isinstance(labels, (list, np.ndarray)):
+                raise ValueError( "Make sure labels is a list or numpy array.")
             if self.is_single_matrix:
-                assert len(labels) == self.square_shape()[0], 'Make sure the length of labels matches the shape of data.'
+                if len(labels) != self.square_shape()[0]:
+                    raise ValueError('Make sure the length of labels matches the shape of data.')
                 self.labels = deepcopy(labels)
             else:
                 if len(labels) != len(self):
@@ -125,7 +127,8 @@ def __init__(self, data=None, Y=None, matrix_type=None, labels=None,
                     else:
                         self.labels = list(labels) * len(self)
                 else:
-                    assert np.all(np.array([len(x) for x in labels])==self.square_shape()[0]), "All lists of labels must be same length as shape of data."
+                    if np.all(np.array([len(x) for x in labels]) !=self.square_shape()[0]):
+                        raise ValueError("All lists of labels must be same length as shape of data.")
                     self.labels = deepcopy(labels)
         else:
             self.labels = None
@@ -600,7 +603,6 @@ def plot_label_distance(self, labels=None, ax=None):
             if len(labels) != distance.shape[0]:
                 raise ValueError('Labels must be same length as distance matrix')
 
-        within = []; between = []
         out = pd.DataFrame(columns=['Distance', 'Group', 'Type'], index=None)
         for i in np.unique(labels):
             tmp_w = pd.DataFrame(columns=out.columns, index=None)
@@ -643,7 +645,6 @@ def stats_label_distance(self, labels=None, n_permute=5000, n_jobs=-1):
             if len(labels) != distance.shape[0]:
                 raise ValueError('Labels must be same length as distance matrix')
 
-        within = []; between = []
         out = pd.DataFrame(columns=['Distance', 'Group', 'Type'], index=None)
         for i in np.unique(labels):
             tmp_w = pd.DataFrame(columns=out.columns, index=None)
@@ -725,12 +726,17 @@ def plot_mds(self, n_components=2, metric=True, labels_color=None,
                 fig: returns matplotlib figure
         '''
 
-        assert self.matrix_type == 'distance', "MDS only works on distance matrices."
-        assert ~self.is_single_matrix, "MDS only works on single matrices."
-        assert n_components == 2 or n_components==3, 'Cannot plot {0}-d image'.format(n_components)
+        if self.matrix_type != 'distance':
+            raise ValueError("MDS only works on distance matrices.")
+        if not self.is_single_matrix:
+            raise ValueError("MDS only works on single matrices.")
+        if n_components not in [2,3]:
+            raise ValueError('Cannot plot {0}-d image'.format(n_components))
         if labels_color is not None:
-            assert self.labels is not None, "Make sure that Adjacency object has labels specified."
-            assert len(self.labels) == len(labels_color), "Length of labels_color must match self.labels."
+            if self.labels is None:
+                raise ValueError("Make sure that Adjacency object has labels specified.")
+            if len(self.labels) != len(labels_color):
+                raise ValueError("Length of labels_color must match self.labels.")
 
         # Run MDS
         mds = MDS(n_components=n_components, metric=metric, n_jobs=n_jobs,
@@ -810,7 +816,6 @@ def within_cluster_mean(self, clusters = None):
         if len(clusters) != distance.shape[0]:
             raise ValueError('Cluster labels must be same length as distance matrix')
 
-        within = []
         out = pd.DataFrame(columns=['Mean','Label'],index=None)
         out = {}
         for i in list(set(clusters)):
@@ -834,7 +839,7 @@ def regress(self, X, mode='ols', **kwargs):
         if isinstance(X, Adjacency):
             if X.square_shape()[0] != self.square_shape()[0]:
                 raise ValueError('Adjacency instances must be the same size.')
-            b,t,p,df,res = regression(X.data.T, self.data, mode=mode, **kwargs)
+            b,t,p,_,res = regression(X.data.T, self.data, mode=mode, **kwargs)
             stats['beta'],stats['t'],stats['p'],stats['residual'] = (b,t,p,res)
         elif isinstance(X, Design_Matrix):
             if X.shape[0] != len(self):

diff --git a/nltools/data/brain_data.py b/nltools/data/brain_data.py
@@ -14,16 +14,13 @@
 __author__ = ["Luke Chang"]
 __license__ = "MIT"
 
-import pickle # import cPickle
 from nilearn.signal import clean
-from scipy.stats import ttest_1samp, norm, spearmanr
+from scipy.stats import ttest_1samp
 from scipy.stats import t as t_dist
 from scipy.signal import detrend
-from scipy.spatial.distance import squareform
 import os
 import shutil
 import nibabel as nib
-import seaborn as sns
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
@@ -35,13 +32,12 @@
 from sklearn.utils import check_random_state
 from pynv import Client
 from joblib import Parallel, delayed
-from nltools.mask import expand_mask, collapse_mask
+from nltools.mask import expand_mask
 from nltools.analysis import Roc
 from nilearn.input_data import NiftiMasker
 from nilearn.image import resample_img
 from nilearn.masking import intersect_masks
 from nilearn.regions import connected_regions, connected_label_regions
-from nilearn.plotting.img_plotting import plot_epi, plot_roi, plot_stat_map
 from nltools.utils import (get_resource_path,
                             set_algorithm,
                             get_anatomical,
@@ -50,9 +46,7 @@
                             _bootstrap_apply_func,
                             set_decomposition_algorithm)
 from nltools.cross_validation import set_cv
-from nltools.plotting import (dist_from_hyperplane_plot,
-                              scatterplot,
-                              probability_plot,
+from nltools.plotting import (scatterplot,
                               roc_plot,
                               plot_stacked_adjacency,
                               plot_silhouette)
@@ -71,7 +65,6 @@
                            summarize_bootstrap,
                            procrustes)
 from nltools.stats import regress as regression
-from nltools.pbs_job import PBS_Job
 from .adjacency import Adjacency
 from nltools.prefs import MNI_Template, resolve_mni_path
 from nltools.external.srm import DetSRM, SRM
@@ -409,8 +402,7 @@ def regress(self, mode='ols', **kwargs):
             raise ValueError("self.X does not match the correct size of "
                              "self.data")
 
-        b,t,p,df,res = regression(self.X,self.data,mode=mode,**kwargs)
-        sigma = np.std(res,axis=0,ddof=self.X.shape[1])
+        b,t,p,_,res = regression(self.X,self.data,mode=mode,**kwargs)
 
         # Prevent copy of all data in self multiple times; instead start with an empty instance and copy only needed attributes from self, and use this as a template for other outputs
         b_out = self.__class__()
@@ -908,44 +900,6 @@ def apply_mask(self, mask):
             masked.data = masked.data.flatten()
         return masked
 
-    def searchlight(self, ncores, process_mask=None, parallel_out=None,
-                    radius=3, walltime='24:00:00', email=None,
-                    algorithm='svr', cv_dict=None, kwargs={}):
-
-        if len(kwargs) is 0:
-            kwargs['kernel']= 'linear'
-
-        # new parallel job
-        pbs_kwargs = {'algorithm': algorithm,
-                  'cv_dict': cv_dict,
-                  'predict_kwargs': kwargs}
-        #cv_dict={'type': 'kfolds','n_folds': 5,'stratified':dat.Y}
-
-        parallel_job = PBS_Job(self, parallel_out=parallel_out,
-                                process_mask=process_mask, radius=radius,
-                                kwargs=pbs_kwargs)
-
-        # make and store data we will need to access on the worker core level
-        parallel_job.make_searchlight_masks()
-        pickle.dump(parallel_job, open(
-                        os.path.join(parallel_out, "pbs_searchlight.pkl"), "w"))
-        # cPickle.dump(parallel_job, open(
-        #                 os.path.join(parallel_out, "pbs_searchlight.pkl"), "w"))
-
-        #make core startup script (python)
-        parallel_job.make_startup_script("core_startup.py")
-
-        # make email notification script (pbs)
-        if type(email) is str:
-            parallel_job.make_pbs_email_alert(email)
-
-        # make pbs job submission scripts (pbs)
-        for core_i in range(ncores):
-            script_name = "core_pbs_script_" + str(core_i) + ".pbs"
-            parallel_job.make_pbs_scripts(script_name, core_i, ncores, walltime)  # create a script
-            print("python " + os.path.join(parallel_out, script_name))
-            os.system("qsub " + os.path.join(parallel_out, script_name))  # run it on a core
-
     def extract_roi(self, mask, method='mean'):
         """ Extract activity from mask
 
@@ -1442,8 +1396,10 @@ def align(self, target, method='procrustes', n_features=None, axis=0,
         source = self.copy()
         common = target.copy()
 
-        assert isinstance(target, Brain_Data), "Target must be Brain_Data instance."
-        assert method in ['probabilistic_srm', 'deterministic_srm','procrustes'], "Method must be ['probabilistic_srm','deterministic_srm','procrustes']"
+        if not isinstance(target, Brain_Data):
+            raise ValueError("Target must be Brain_Data instance.")
+        if method not in ['probabilistic_srm', 'deterministic_srm','procrustes']:
+            raise ValueError("Method must be ['probabilistic_srm','deterministic_srm','procrustes']")
 
         data1 = source.data.T
         data2 = target.data.T

diff --git a/nltools/data/design_matrix.py b/nltools/data/design_matrix.py
@@ -438,7 +438,8 @@ def vif(self,exclude_polys=True):
             exclude_polys (bool): whether to skip checking of polynomial terms (i.e. intercept, trends, basis functions); default True
 
         """
-        assert self.shape[1] > 1, "Can't compute vif with only 1 column!"
+        if self.shape[1] <= 1:
+            raise ValueError("Can't compute vif with only 1 column!")
         if self.polys and exclude_polys:
             out = self.drop(self.polys,axis=1)
         else:
@@ -481,16 +482,19 @@ def convolve(self, conv_func='hrf', columns=None):
                             to all non-polynomial columns
 
         """
-        assert self.sampling_freq is not None, "Design_matrix has no sampling_freq set!"
+        if self.sampling_freq is None:
+            raise ValueError("Design_matrix has no sampling_freq set!")
 
         if columns is None:
             columns = [col for col in self.columns if col not in self.polys]
         nonConvolved = [col for col in self.columns if col not in columns]
 
         if isinstance(conv_func, np.ndarray):
-            assert len(conv_func.shape) <= 2, "2d conv_func must be formatted as samplex X kernals!"
+            if len(conv_func.shape) > 2:
+                raise ValueError("2d conv_func must be formatted as samplex X kernals!")
         elif isinstance(conv_func, six.string_types):
-            assert conv_func == 'hrf',"Did you mean 'hrf'? 'hrf' can generate a kernel for you, otherwise custom kernels should be passed in as 1d or 2d arrays."
+            if conv_func != 'hrf':
+                raise ValueError("Did you mean 'hrf'? 'hrf' can generate a kernel for you, otherwise custom kernels should be passed in as 1d or 2d arrays.")
             conv_func = glover_hrf(1. / self.sampling_freq, oversampling=1.)
 
         else:
@@ -624,7 +628,8 @@ def add_dct_basis(self,duration=180,drop=0):
             drop (int): index of which early/slow bases to drop if any; will always drop constant (i.e. intercept) like SPM. Unlike SPM, retains first basis (i.e. linear/sigmoidal). Will cumulatively drop bases up to and inclusive of index provided (e.g. 2, drops bases 1 and 2); default None
 
         """
-        assert self.sampling_freq is not None, "Design_Matrix has no sampling_freq set!"
+        if self.sampling_freq is None:
+            raise ValueError("Design_Matrix has no sampling_freq set!")
 
         if self.polys:
             if any([elem.count('_') == 2 and 'cosine' in elem for elem in self.polys]):

diff --git a/nltools/datasets.py b/nltools/datasets.py
@@ -18,12 +18,7 @@
 __license__ = "MIT"
 
 import os
-import nibabel as nib
 import pandas as pd
-import numpy as np
-import warnings
-import shutil
-import tempfile
 from nltools.data import Brain_Data
 from nilearn.datasets.utils import _get_dataset_dir, _fetch_file
 from pynv import Client

diff --git a/nltools/mask.py b/nltools/mask.py
@@ -12,10 +12,7 @@
 
 import os
 import nibabel as nib
-from nltools.utils import get_resource_path
 from nltools.prefs import MNI_Template, resolve_mni_path
-from nilearn.input_data import NiftiMasker
-from copy import deepcopy
 import pandas as pd
 import numpy as np
 import six