Skip to content

Commit

Permalink
Merge pull request #657 from sarthakpati/656-using-patch-miner-withou…
Browse files Browse the repository at this point in the history
…t-label-header-results-in-failure

Allow histology patches to be extracted without ground truth labels
  • Loading branch information
sarthakpati committed May 22, 2023
2 parents b6dc393 + fb9dc83 commit af1a8bd
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 16 deletions.
11 changes: 9 additions & 2 deletions GANDLF/cli/patch_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,17 @@
def parse_gandlf_csv(fpath):
df, _ = parseTrainingCSV(fpath, train=False)
df = df.drop_duplicates()
# nans can be easily removed using df.dropna(axis=1, how='all')
# we want to keep them because we want the user to check the CSV instead
# there might be cases where labels are accidentally removed for some subjects, but not all
assert (
df.isnull().values.any() == False
), "Data CSV contains null/nan values, please check."
for _, row in df.iterrows():
if "Label" in row:
yield row["SubjectID"], row["Channel_0"], row["Label"]
else:
yield row["SubjectID"], row["Channel_0"]
yield row["SubjectID"], row["Channel_0"], None


def patch_extraction(input_path, output_path, config=None):
Expand Down Expand Up @@ -63,7 +69,8 @@ def patch_extraction(input_path, output_path, config=None):
for sid, slide, label in parse_gandlf_csv(input_path):
# Create new instance of slide manager
manager = PatchManager(slide, os.path.join(output_path, str(sid)))
manager.set_label_map(label)
if label is not None:
manager.set_label_map(label)
manager.set_subjectID(str(sid))
manager.set_image_header("Channel_0")
manager.set_mask_header("Label")
Expand Down
5 changes: 2 additions & 3 deletions GANDLF/data/patch_miner/opm/patch_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,9 +435,8 @@ def mine_patches(self, config, output_csv=None):
new_df_rows.append(new_row)

new_df = pd.DataFrame(new_df_rows)
output_df = pd.concat(
[output_df, new_df]
) # Concatenate in case there is a pre-existing dataframe
# Concatenate in case there is a pre-existing dataframe
output_df = pd.concat([output_df, new_df])

output_df.to_csv(csv_filename, index=False)

Expand Down
19 changes: 10 additions & 9 deletions GANDLF/utils/write_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,13 +200,14 @@ def convert_relative_paths_in_dataframe(input_dataframe, headers, path_root):
if (loc == headers["labelHeader"]) or (loc in headers["channelHeaders"]):
# These entries can be considered as paths to files
for index, entry in enumerate(input_dataframe[column]):
this_path = pathlib.Path(entry)
start_path = pathlib.Path(path_root)
if start_path.is_file():
start_path = start_path.parent
if not this_path.is_file():
if not this_path.is_absolute():
input_dataframe.loc[index, column] = str(
start_path.joinpath(this_path)
)
if isinstance(entry, str):
this_path = pathlib.Path(entry)
start_path = pathlib.Path(path_root)
if start_path.is_file():
start_path = start_path.parent
if not this_path.is_file():
if not this_path.is_absolute():
input_dataframe.loc[index, column] = str(
start_path.joinpath(this_path)
)
return input_dataframe
5 changes: 3 additions & 2 deletions gandlf_run
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import os
import argparse
import ast
import sys
import traceback

from GANDLF import version
from GANDLF.cli import main_run, copyrightMessage
Expand Down Expand Up @@ -136,7 +137,7 @@ if __name__ == "__main__":
args.reset,
args.outputdir,
)
except Exception as e:
sys.exit("ERROR: " + str(e))
except Exception:
sys.exit("ERROR: " + traceback.format_exc())

print("Finished.")

0 comments on commit af1a8bd

Please sign in to comment.