Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] Replace numpy with pandas in data loaders #2829

Merged
merged 49 commits into from
Jan 28, 2022
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
1d04072
Repalce numpy with pandas
achamma723 May 7, 2021
4fae766
rebase
achamma723 Dec 16, 2021
58ff41c
[MAINT] Fix new typos found by codespell (#3101)
DimitriPapadopoulos Dec 16, 2021
68e19e5
Merge branch 'main' of https://github.com/nilearn/nilearn into test_5
achamma723 Dec 16, 2021
03e1f58
Fix pep8 + destrieux
achamma723 Dec 16, 2021
1bac472
Fix pep8
achamma723 Dec 16, 2021
382e6a4
Merge branch 'main' of https://github.com/nilearn/nilearn into test_5
achamma723 Jan 12, 2022
aeaf3ef
Merge docs
achamma723 Jan 12, 2022
24024b1
continue work (unfinished)
NicolasGensollen Jan 19, 2022
539ab52
Iter
NicolasGensollen Jan 19, 2022
c74c356
Fix test
NicolasGensollen Jan 19, 2022
0a0b98b
Fix other test
NicolasGensollen Jan 19, 2022
8ec02f9
Iter
NicolasGensollen Jan 19, 2022
93f36dd
Add warnings
NicolasGensollen Jan 20, 2022
1d0eaba
Fix PEP8
NicolasGensollen Jan 20, 2022
4264ab2
Address Jerome's reviews
NicolasGensollen Jan 24, 2022
3be0123
Fix PEP8
NicolasGensollen Jan 24, 2022
8455f92
convert to lower case in fetch_atlas_difumo
NicolasGensollen Jan 24, 2022
e558a25
[circle full] Add whatsnew.
NicolasGensollen Jan 25, 2022
ba553c2
Fix
NicolasGensollen Jan 25, 2022
0c7ff85
[circle full] remove deprecation warning from example
NicolasGensollen Jan 25, 2022
b1a86a3
Fix examples
NicolasGensollen Jan 25, 2022
41c8e7b
[ENH] Add `cbar_tick_format` to plotting functions (#2859)
NicolasGensollen Jan 17, 2022
10e1795
Rename private functions of `permuted_least_squares` to start with '_…
NicolasGensollen Jan 18, 2022
b229c9f
[MAINT] Update git protocol in CircleCI fetch step (#3124)
NicolasGensollen Jan 19, 2022
ca6746d
[MAINT] Scipy deprecation warning in RegionExtractor (#3130)
htwangtw Jan 19, 2022
0f5f07d
[MAINT] Remove deprecated `sessions` and `sample_mask` attributes of …
NicolasGensollen Jan 20, 2022
1745e44
[MAINT] Remove old workaround (#3092)
NicolasGensollen Jan 20, 2022
4668e3e
[ENH] Move FSL- and BIDS-related functions to interfaces module (#3126)
tsalo Jan 21, 2022
d0d1093
[FIX] Fix links in whats_new (#3139)
NicolasGensollen Jan 21, 2022
b42b4c0
[FIX] fixed code + added test (#3137)
bthirion Jan 24, 2022
7b4a476
[ENH] Refactor `plot_matrix` (#3001)
NicolasGensollen Jan 24, 2022
c61e7a4
[FIX] replace interpreter call (#3136)
bnavigator Jan 24, 2022
737c986
[FIX] `FirstLevelModel` signal_scaling (#3135)
NicolasGensollen Jan 24, 2022
3fcc155
[ENH] Include Hierarchical KMeans in regions.Parcellations (#2282)
thomasbazeille Jan 25, 2022
da18fdc
[DOC] Fix wrong whats_new entry (#3142)
NicolasGensollen Jan 25, 2022
d1466c2
[FIX] Fix hommel value computation (#3109)
bthirion Jan 25, 2022
3e900a9
[DOC] Refactor change logs (#3049)
NicolasGensollen Jan 26, 2022
e1657b1
Merge branch 'main' of https://github.com/nilearn/nilearn into test_5
achamma723 Jan 26, 2022
0d2bc7f
Rebase
achamma723 Jan 26, 2022
26e92ff
[circle full] fix whats_new bug
NicolasGensollen Jan 27, 2022
26de3fe
[MAINT] Bump dependencies for release `0.9.0` (#3143)
NicolasGensollen Jan 27, 2022
55db108
Jerome's review
NicolasGensollen Jan 27, 2022
fb5c56b
remove warning
NicolasGensollen Jan 27, 2022
27e00f0
[INFRA] Add tests min requirements with Matplotlib (#3144)
NicolasGensollen Jan 27, 2022
304e782
2021 -> 2022 (#3146)
NicolasGensollen Jan 27, 2022
1ba2487
Revert "[MAINT] Bump dependencies for release `0.9.0` (#3143)"
achamma723 Jan 28, 2022
f1dd5d8
Revert "[INFRA] Add tests min requirements with Matplotlib (#3144)"
achamma723 Jan 28, 2022
65fe185
Revert "2021 -> 2022 (#3146)"
achamma723 Jan 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
33 changes: 16 additions & 17 deletions nilearn/datasets/atlas.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import nibabel as nb
import numpy as np
import pandas as pd
from numpy.lib import recfunctions
import re
from sklearn.utils import Bunch
Expand Down Expand Up @@ -112,7 +113,7 @@ def fetch_atlas_difumo(dimension=64, resolution_mm=2, data_dir=None, resume=True

# Download the zip file, first
files_ = _fetch_files(data_dir, files, verbose=verbose)
labels = np.recfromcsv(files_[0])
labels = pd.read_csv(files_[0]).to_records()
NicolasGensollen marked this conversation as resolved.
Show resolved Hide resolved

# README
readme_files = [('README.md', 'https://osf.io/4k9bf/download',
Expand Down Expand Up @@ -259,7 +260,8 @@ def fetch_atlas_destrieux_2009(lateralized=True, data_dir=None, url=None,
files_ = _fetch_files(data_dir, files, resume=resume,
verbose=verbose)

params = dict(maps=files_[1], labels=np.recfromcsv(files_[0]))
params = dict(maps=files_[1],
labels=pd.read_csv(files_[0], index_col=0).to_records())
NicolasGensollen marked this conversation as resolved.
Show resolved Hide resolved

with open(files_[2], 'r') as rst_file:
params['description'] = rst_file.read()
Expand Down Expand Up @@ -712,13 +714,13 @@ def fetch_atlas_msdl(data_dir=None, url=None, resume=True, verbose=1):
data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
verbose=verbose)
files = _fetch_files(data_dir, files, resume=resume, verbose=verbose)
csv_data = np.recfromcsv(files[0])
csv_data = pd.read_csv(files[0])
labels = [name.strip() for name in csv_data['name'].tolist()]
labels = [label.decode("utf-8") for label in labels]

with warnings.catch_warnings():
warnings.filterwarnings('ignore', module='numpy',
category=FutureWarning)
region_coords = csv_data[['x', 'y', 'z']].tolist()
region_coords = csv_data[['x', 'y', 'z']].values.tolist()
NicolasGensollen marked this conversation as resolved.
Show resolved Hide resolved
net_names = [net_name.strip() for net_name in csv_data['net_name'].tolist()]
fdescr = _get_dataset_descr(dataset_name)

Expand Down Expand Up @@ -750,7 +752,7 @@ def fetch_coords_power_2011():
fdescr = _get_dataset_descr(dataset_name)
package_directory = os.path.dirname(os.path.abspath(__file__))
csv = os.path.join(package_directory, "data", "power_2011.csv")
params = dict(rois=np.recfromcsv(csv), description=fdescr)
params = dict(rois=pd.read_csv(csv).to_records(), description=fdescr)
NicolasGensollen marked this conversation as resolved.
Show resolved Hide resolved

return Bunch(**params)

Expand Down Expand Up @@ -1202,10 +1204,10 @@ def fetch_coords_dosenbach_2010(ordered_regions=True):
fdescr = _get_dataset_descr(dataset_name)
package_directory = os.path.dirname(os.path.abspath(__file__))
csv = os.path.join(package_directory, "data", "dosenbach_2010.csv")
out_csv = np.recfromcsv(csv)
out_csv = pd.read_csv(csv)

if ordered_regions:
out_csv = np.sort(out_csv, order=['network', 'name', 'y'])
out_csv = out_csv.sort_values(by=['network', 'name', 'y'])
jeromedockes marked this conversation as resolved.
Show resolved Hide resolved

# We add the ROI number to its name, since names are not unique
names = out_csv['name']
Expand Down Expand Up @@ -1267,10 +1269,8 @@ def fetch_coords_seitzman_2018(ordered_regions=True):
anatomical_file = os.path.join(package_directory, "data",
"seitzman_2018_ROIs_anatomicalLabels.txt")

rois = np.recfromcsv(roi_file, delimiter=" ")
rois = recfunctions.rename_fields(rois, {"netname": "network",
"radiusmm": "radius"})
rois.network = rois.network.astype(str)
rois = pd.read_csv(roi_file, delimiter=" ")
rois = rois.rename(columns={"netName": "network", "radius(mm)": "radius"})

# get integer regional labels and convert to text labels with mapping
# from header line
Expand All @@ -1284,16 +1284,15 @@ def fetch_coords_seitzman_2018(ordered_regions=True):
anatomical = np.genfromtxt(anatomical_file, skip_header=1)
anatomical_names = np.array([region_mapping[a] for a in anatomical])

rois = recfunctions.merge_arrays((rois, anatomical_names),
asrecarray=True, flatten=True)
rois.dtype.names = rois.dtype.names[:-1] + ("region",)
rois = pd.concat([rois, pd.DataFrame(anatomical_names)], axis=1)
rois.columns = list(rois.columns[:-1]) + ["region"]

if ordered_regions:
rois = np.sort(rois, order=['network', 'y'])
rois = rois.sort_values(by=['network', 'y'])

params = dict(rois=rois[['x', 'y', 'z']],
radius=rois['radius'],
networks=rois['network'].astype(str),
networks=rois['network'],
jeromedockes marked this conversation as resolved.
Show resolved Hide resolved
regions=rois['region'], description=fdescr)

return Bunch(**params)
Expand Down
19 changes: 8 additions & 11 deletions nilearn/datasets/func.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,20 +740,17 @@ def _is_valid_path(path, index, verbose):
# Load covariates file
from numpy.lib.recfunctions import join_by
participants_file = os.path.join(data_dir, participants_file)
csv_data = np.recfromcsv(participants_file, delimiter='\t')
csv_data = pd.read_csv(participants_file, delimiter='\t')
behavioural_file = os.path.join(data_dir, behavioural_file)
csv_data2 = np.recfromcsv(behavioural_file, delimiter='\t')
csv_data = join_by(
"participant_id", csv_data, csv_data2, usemask=False, asrecarray=True)
csv_data2 = pd.read_csv(behavioural_file, delimiter='\t')
csv_data = csv_data.merge(csv_data2)
subject_names = csv_data["participant_id"].tolist()
subjects_indices = []
for name in subject_ids:
name = name.encode("utf8")
if name not in subject_names:
continue
subjects_indices.append(subject_names.index(name))
csv_data = csv_data[subjects_indices]

csv_data = csv_data.iloc[subjects_indices]
jeromedockes marked this conversation as resolved.
Show resolved Hide resolved
return Bunch(ext_vars=csv_data, description=fdescr, **files)


Expand Down Expand Up @@ -982,10 +979,10 @@ def fetch_abide_pcp(data_dir=None, n_subjects=None, pipeline='cpac',
# bytes (encode()) needed for python 2/3 compat with numpy
pheno = '\n'.join(pheno).encode()
pheno = BytesIO(pheno)
pheno = np.recfromcsv(pheno, comments='$', case_sensitive=True)
pheno = pd.read_csv(pheno, comment='$')

# First, filter subjects with no filename
pheno = pheno[pheno['FILE_ID'] != b'no_filename']
pheno = pheno[pheno['FILE_ID'] != 'no_filename']
# Apply user defined filters
user_filter = _filter_columns(pheno, kwargs)
pheno = pheno[user_filter]
Expand All @@ -996,7 +993,7 @@ def fetch_abide_pcp(data_dir=None, n_subjects=None, pipeline='cpac',

# Get the files
results = {}
file_ids = [file_id.decode() for file_id in pheno['FILE_ID']]
file_ids = pheno['FILE_ID'].tolist()
if n_subjects is not None:
file_ids = file_ids[:n_subjects]
pheno = pheno[:n_subjects]
Expand Down Expand Up @@ -1667,7 +1664,7 @@ def _reduce_confounds(regressors, keep_confounds):
out_file = in_file.replace('desc-confounds',
'desc-reducedConfounds')
if not os.path.isfile(out_file):
confounds = np.recfromcsv(in_file, delimiter='\t')
confounds = pd.read_csv(in_file, delimiter='\t').to_records()
selected_confounds = confounds[keep_confounds]
header = '\t'.join(selected_confounds.dtype.names)
np.savetxt(out_file, np.array(selected_confounds.tolist()),
Expand Down
5 changes: 3 additions & 2 deletions nilearn/datasets/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pathlib import Path

import numpy as np
import pandas as pd
from scipy import ndimage
from sklearn.utils import Bunch

Expand Down Expand Up @@ -735,14 +736,14 @@ def fetch_oasis_vbm(n_subjects=None, dartel_version=True, data_dir=None,
data_usage_agreement = files[-1]

# Keep CSV information only for selected subjects
csv_data = np.recfromcsv(ext_vars_file)
csv_data = pd.read_csv(ext_vars_file)
# Comparisons to recfromcsv data must be bytes.
actual_subjects_ids = [("OAS1" +
str.split(os.path.basename(x),
"OAS1")[1][:9]).encode()
for x in gm_maps]
subject_mask = np.asarray([subject_id in actual_subjects_ids
for subject_id in csv_data['id']])
for subject_id in csv_data['ID']])
jeromedockes marked this conversation as resolved.
Show resolved Hide resolved
csv_data = csv_data[subject_mask]

fdescr = _get_dataset_descr(dataset_name)
Expand Down
23 changes: 12 additions & 11 deletions nilearn/datasets/tests/test_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,20 +170,20 @@ def test_fetch_localizer_contrasts(tmp_path, request_mocker, localizer_mocker):
assert not hasattr(dataset, 'tmaps')
assert not hasattr(dataset, 'masks')
assert isinstance(dataset.cmaps[0], str)
assert isinstance(dataset.ext_vars, np.recarray)
assert isinstance(dataset.ext_vars, pd.DataFrame)
assert len(dataset.cmaps) == 2
assert dataset.ext_vars.size == 2
assert len(dataset['ext_vars']) == 2
jeromedockes marked this conversation as resolved.
Show resolved Hide resolved

# Multiple contrasts
dataset = func.fetch_localizer_contrasts(
['checkerboard', 'horizontal checkerboard'],
n_subjects=2,
data_dir=tmp_path,
verbose=1)
assert isinstance(dataset.ext_vars, np.recarray)
assert isinstance(dataset.ext_vars, pd.DataFrame)
assert isinstance(dataset.cmaps[0], str)
assert len(dataset.cmaps) == 2 * 2 # two contrasts are fetched
assert dataset.ext_vars.size == 2
assert len(dataset['ext_vars']) == 2

# all get_*=True
dataset = func.fetch_localizer_contrasts(
Expand All @@ -194,12 +194,12 @@ def test_fetch_localizer_contrasts(tmp_path, request_mocker, localizer_mocker):
get_masks=True,
get_tmaps=True,
verbose=1)
assert isinstance(dataset.ext_vars, np.recarray)
assert isinstance(dataset.ext_vars, pd.DataFrame)
assert isinstance(dataset.anats[0], str)
assert isinstance(dataset.cmaps[0], str)
assert isinstance(dataset.masks[0], str)
assert isinstance(dataset.tmaps[0], str)
assert dataset.ext_vars.size == 1
assert len(dataset['ext_vars']) == 1
assert len(dataset.anats) == 1
assert len(dataset.cmaps) == 1
assert len(dataset.masks) == 1
Expand All @@ -212,10 +212,11 @@ def test_fetch_localizer_contrasts(tmp_path, request_mocker, localizer_mocker):
n_subjects=[2, 3, 5],
data_dir=tmp_path,
verbose=1)
assert dataset2.ext_vars.size == 3
assert len(dataset2['ext_vars']) == 3
assert len(dataset2.cmaps) == 3
assert ([row[0] for row in dataset2.ext_vars] ==
[b'S02', b'S03', b'S05'])
assert (list(dataset2['ext_vars']['participant_id'].values) == ['S02',
'S03',
'S05'])


def test_fetch_localizer_calculation_task(tmp_path, request_mocker,
Expand All @@ -225,9 +226,9 @@ def test_fetch_localizer_calculation_task(tmp_path, request_mocker,
n_subjects=2,
data_dir=tmp_path,
verbose=1)
assert isinstance(dataset.ext_vars, np.recarray)
assert isinstance(dataset.ext_vars, pd.DataFrame)
assert isinstance(dataset.cmaps[0], str)
assert dataset.ext_vars.size == 2
assert len(dataset['ext_vars']) == 2
assert len(dataset.cmaps) == 2
assert dataset.description != ''

Expand Down
4 changes: 2 additions & 2 deletions nilearn/datasets/tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def test_fetch_oasis_vbm(tmp_path, request_mocker):
assert len(dataset.white_matter_maps) == 403
assert isinstance(dataset.gray_matter_maps[0], str)
assert isinstance(dataset.white_matter_maps[0], str)
assert isinstance(dataset.ext_vars, np.recarray)
assert isinstance(dataset.ext_vars, pd.DataFrame)
assert isinstance(dataset.data_usage_agreement, str)
assert request_mocker.url_count == 1

Expand All @@ -130,7 +130,7 @@ def test_fetch_oasis_vbm(tmp_path, request_mocker):
assert len(dataset.white_matter_maps) == 415
assert isinstance(dataset.gray_matter_maps[0], str)
assert isinstance(dataset.white_matter_maps[0], str)
assert isinstance(dataset.ext_vars, np.recarray)
assert isinstance(dataset.ext_vars, pd.DataFrame)
assert isinstance(dataset.data_usage_agreement, str)
assert request_mocker.url_count == 2
assert dataset.description != ''
Expand Down