Skip to content

Commit

Permalink
fixed std issue in samples, for real
Browse files Browse the repository at this point in the history
  • Loading branch information
mpecchi committed Mar 19, 2024
1 parent 48185bd commit 18fe33d
Show file tree
Hide file tree
Showing 8 changed files with 17 additions and 8 deletions.
2 changes: 1 addition & 1 deletion example/example_gcms_data_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
) # Import the Project class from the gcms_data_analysis package

# Define the folder path where your data is located. Change this path to where you've stored your data files.
folder_path = plib.Path(plib.Path(__file__).parent, "example\data")
# folder_path = plib.Path(plib.Path(__file__).parent, "example\data")
folder_path = plib.Path(
r"C:\Users\mp933\OneDrive - Cornell University\Python\gcms_data_analysis\example\data"
)
Expand Down
Binary file removed example/output/files_info.xlsx
Binary file not shown.
Binary file removed example/output/samples/samples_info.xlsx
Binary file not shown.
Binary file removed example/output/samples/samples_info_std.xlsx
Binary file not shown.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "gcms_data_analysis"
version = "1.0.6"
version = "1.0.7"
authors = [{ name = "Matteo Pecchi" }]
description = "Automatic analysis of GC-MS data"
readme = "README.md"
Expand Down
21 changes: 15 additions & 6 deletions src/gcms_data_analysis/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2631,12 +2631,21 @@ def _create_sample_from_files(
non_num_columns = ["iupac_name", "compound_used_for_calibration"]
else:
non_num_columns = ["iupac_name"]
# Step 1: Create a comprehensive index of all unique compounds
all_compounds = pd.Index([])
for df in files_in_sample:
all_compounds = all_compounds.union(df.index)

# Step 2: Align all DataFrames to the comprehensive index
aligned_dfs: list[pd.DataFrame] = [
df.align(files_in_sample[0], join="outer", axis=0)[0]
for df in files_in_sample
] # Align indices
df.reindex(all_compounds) for df in files_in_sample
]
# aligned_dfs = [
# df.align(files_in_sample[0], join="outer", axis=0)[0]
# for df in files_in_sample
# ] # Align indices
# Fill NaN values for numerical columns after alignment and before concatenation
filled_dfs = [df.fillna(0) for df in aligned_dfs]
filled_dfs = [df.fillna(0.0) for df in aligned_dfs]
# Keep non-numerical data separately and ensure no duplicates
non_num_data: pd.DataFrame = pd.concat(
[df[non_num_columns].drop_duplicates() for df in files_in_sample]
Expand Down Expand Up @@ -3038,8 +3047,8 @@ def plot_ave_std(
aggr: bool = False,
min_y_thresh: float | None = None,
only_samples_to_plot: list[str] = None,
rename_samples: list[str] =None,
reorder_samples: list[str] =None,
rename_samples: list[str] = None,
reorder_samples: list[str] = None,
item_to_color_to_hatch: pd.DataFrame | None = None,
paper_col=0.8,
fig_hgt_mlt=1.5,
Expand Down
Binary file modified tests/data_for_testing/compounds_properties.xlsx
Binary file not shown.
Binary file modified tests/data_for_testing/deriv_compounds_properties.xlsx
Binary file not shown.

0 comments on commit 18fe33d

Please sign in to comment.