Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow histology patches to be extracted without ground truth labels #657

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions GANDLF/cli/patch_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,17 @@
def parse_gandlf_csv(fpath):
df, _ = parseTrainingCSV(fpath, train=False)
df = df.drop_duplicates()
# nans can be easily removed using df.dropna(axis=1, how='all')
# we want to keep them because we want the user to check the CSV instead
# there might be cases where labels are accidentally removed for some subjects, but not all
assert (
df.isnull().values.any() == False
), "Data CSV contains null/nan values, please check."
for _, row in df.iterrows():
if "Label" in row:
yield row["SubjectID"], row["Channel_0"], row["Label"]
else:
yield row["SubjectID"], row["Channel_0"]
yield row["SubjectID"], row["Channel_0"], None


def patch_extraction(input_path, output_path, config=None):
Expand Down Expand Up @@ -63,7 +69,8 @@ def patch_extraction(input_path, output_path, config=None):
for sid, slide, label in parse_gandlf_csv(input_path):
# Create new instance of slide manager
manager = PatchManager(slide, os.path.join(output_path, str(sid)))
manager.set_label_map(label)
if label is not None:
manager.set_label_map(label)
manager.set_subjectID(str(sid))
manager.set_image_header("Channel_0")
manager.set_mask_header("Label")
Expand Down
5 changes: 2 additions & 3 deletions GANDLF/data/patch_miner/opm/patch_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,9 +435,8 @@ def mine_patches(self, config, output_csv=None):
new_df_rows.append(new_row)

new_df = pd.DataFrame(new_df_rows)
output_df = pd.concat(
[output_df, new_df]
) # Concatenate in case there is a pre-existing dataframe
# Concatenate in case there is a pre-existing dataframe
output_df = pd.concat([output_df, new_df])

output_df.to_csv(csv_filename, index=False)

Expand Down
19 changes: 10 additions & 9 deletions GANDLF/utils/write_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,13 +200,14 @@ def convert_relative_paths_in_dataframe(input_dataframe, headers, path_root):
if (loc == headers["labelHeader"]) or (loc in headers["channelHeaders"]):
# These entries can be considered as paths to files
for index, entry in enumerate(input_dataframe[column]):
this_path = pathlib.Path(entry)
start_path = pathlib.Path(path_root)
if start_path.is_file():
start_path = start_path.parent
if not this_path.is_file():
if not this_path.is_absolute():
input_dataframe.loc[index, column] = str(
start_path.joinpath(this_path)
)
if isinstance(entry, str):
this_path = pathlib.Path(entry)
start_path = pathlib.Path(path_root)
if start_path.is_file():
start_path = start_path.parent
if not this_path.is_file():
if not this_path.is_absolute():
input_dataframe.loc[index, column] = str(
start_path.joinpath(this_path)
)
return input_dataframe
5 changes: 3 additions & 2 deletions gandlf_run
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import os
import argparse
import ast
import sys
import traceback

from GANDLF import version
from GANDLF.cli import main_run, copyrightMessage
Expand Down Expand Up @@ -136,7 +137,7 @@ if __name__ == "__main__":
args.reset,
args.outputdir,
)
except Exception as e:
sys.exit("ERROR: " + str(e))
except Exception:
sys.exit("ERROR: " + traceback.format_exc())

print("Finished.")
Loading