Skip to content

Commit

Permalink
make the code compatible with newer versions of pandas
Browse files Browse the repository at this point in the history
  • Loading branch information
tgy committed Mar 29, 2020
1 parent 948ce7e commit f44165a
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 14 deletions.
24 changes: 13 additions & 11 deletions neurosynth/base/dataset.py
Expand Up @@ -621,7 +621,7 @@ def add_features(self, features, merge='outer', duplicates='ignore',
"instance from a newer database file that uses PMIDs rather "
"than doi's as the study identifiers in the first column.")

old_data = self.data.to_dense()
old_data = self.data
# Handle features with duplicate names
common_features = list(set(old_data.columns) & set(features.columns))
if duplicates == 'ignore':
Expand All @@ -631,7 +631,7 @@ def add_features(self, features, merge='outer', duplicates='ignore',

data = old_data.merge(
features, how=merge, left_index=True, right_index=True)
self.data = data.fillna(0.0).to_sparse()
self.data = data.fillna(0.0).astype(pd.SparseDtype(np.float64))

@property
def feature_names(self):
Expand All @@ -657,12 +657,12 @@ def get_feature_data(self, ids=None, features=None, dense=True):
result = self.data

if ids is not None:
result = result.ix[ids]
result = result.loc[ids]

if features is not None:
result = result.ix[:, features]
result = result.loc[:, features]

return result.to_dense() if dense else result
return result.astype(np.float64) if dense else result

def get_ordered_names(self, features):
""" Given a list of features, returns features in order that they
Expand Down Expand Up @@ -713,7 +713,7 @@ def get_ids(self, features, threshold=0.0, func=np.sum, get_weights=False):
if isinstance(features, str):
features = [features]
features = self.search_features(features) # Expand wild cards
feature_weights = self.data.ix[:, features]
feature_weights = self.data.loc[:, features]
weights = feature_weights.apply(func, 1)
above_thresh = weights[weights >= threshold]
# ids_to_keep = self.ids[above_thresh]
Expand Down Expand Up @@ -751,13 +751,13 @@ def get_features_by_ids(self, ids=None, threshold=0.0001, func=np.mean,
get_weights=False):
''' Returns features for which the mean loading across all specified
studies (in ids) is >= threshold. '''
weights = self.data.ix[ids].apply(func, 0)
weights = func(self.data.loc[ids], axis=0)
above_thresh = weights[weights >= threshold]
return above_thresh if get_weights else list(above_thresh.index)

def _sdf_to_csr(self):
""" Convert FeatureTable to SciPy CSR matrix. """
data = self.data.to_dense()
data = self.data.astype(np.float64)
self.data = {
'columns': list(data.columns),
'index': list(data.index),
Expand All @@ -766,6 +766,8 @@ def _sdf_to_csr(self):

def _csr_to_sdf(self):
""" Inverse of _sdf_to_csr(). """
self.data = pd.DataFrame(self.data['values'].todense(),
index=self.data['index'],
columns=self.data['columns']).to_sparse()
self.data = pd.DataFrame(
self.data["values"].todense(),
index=self.data["index"],
columns=self.data["columns"],
).astype(pd.SparseDtype(np.float64))
9 changes: 6 additions & 3 deletions neurosynth/tests/test_base.py
Expand Up @@ -67,7 +67,7 @@ def test_feature_table_loads(self):
self.assertEqual(len(self.dataset.get_feature_names()), 5)
self.assertEqual(tt.data.shape, (5, 5))
self.assertEqual(tt.data.columns[3], 'f4')
self.assertEqual(tt.data.to_dense().iloc[0, 0], 0.0003)
self.assertEqual(tt.data.astype(np.float64).iloc[0, 0], 0.0003)

def test_feature_addition(self):
""" Add feature data from multiple sources to FeatureTable. """
Expand Down Expand Up @@ -225,8 +225,11 @@ def test_get_feature_counts(self):
self.assertGreater(c, 0, "feature %s has no hits" % f)
# and should be equal to the ones computed directly (we do not do
# any fancy queries atm), assumes default threshold of 0.001
feature_counts_ = dict([(feature, np.sum(self.dataset.feature_table.data.to_dense().ix[:, col] > 0.001))
for col, feature in enumerate(self.dataset.feature_table.feature_names)])
ft = self.dataset.feature_table
feature_counts_ = dict(
(feature, np.sum(ft.data.iloc[:, col].sparse.to_dense() > 0.001))
for col, feature in enumerate(ft.feature_names)
)
self.assertEqual(feature_counts, feature_counts_)

def test_get_feature_data(self):
Expand Down

0 comments on commit f44165a

Please sign in to comment.