Skip to content

Commit

Permalink
fixes on save_samples_info
Browse files Browse the repository at this point in the history
  • Loading branch information
mpecchi committed Mar 19, 2024
1 parent 154313e commit 106c05d
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 7 deletions.
Binary file added example/output/samples/samples_info.xlsx
Binary file not shown.
Binary file added example/output/samples/samples_info_std.xlsx
Binary file not shown.
30 changes: 23 additions & 7 deletions src/gcms_data_analysis/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1624,14 +1624,15 @@ class Project:
}

compounds_to_rename = {}
param_to_axis_label = {
param_to_axis_label: dict[str:str] = {
"area": "Peak Area [-]",
"area_if_undiluted": "Peak Area [-]",
"conc_vial_mg_L": "conc. [mg/L] (ppm)",
"conc_vial_if_undiluted_mg_L": "conc. [mg/L] (ppm)",
"fraction_of_sample_fr": "mass fraction [g/g$_{sample}$]",
"fraction_of_feedstock_fr": "mass fraction [g/g$_{feedstock}$]",
}
acceptable_params: list[str] = list(param_to_axis_label.keys())
string_in_deriv_names: list[str] = [
"deriv.",
"derivative",
Expand Down Expand Up @@ -2589,6 +2590,8 @@ def create_samples_info(self):
self.samples_info = _samples_info.loc[:, non_numcol + numcol]
self.samples_info_std = _samples_info_std.loc[:, non_numcol + numcol]
self.samples_info_created = True
if Project.auto_save_to_excel:
self.save_samples_info()
print("Info: create_samples_info: samples_info created")
return self.samples_info, self.samples_info_std

Expand Down Expand Up @@ -2616,7 +2619,9 @@ def create_samples_from_files(self):
self.samples_created = True
return self.samples, self.samples_std

def _create_sample_from_files(self, files_in_sample, samplename):
def _create_sample_from_files(
self, files_in_sample: list[pd.DataFrame], samplename: str
):
"""Creates a sample dataframe and a standard deviation dataframe from files
that are replicates of the same sample. This process includes aligning dataframes,
filling missing values, calculating averages and standard deviations,
Expand All @@ -2626,15 +2631,17 @@ def _create_sample_from_files(self, files_in_sample, samplename):
non_num_columns = ["iupac_name", "compound_used_for_calibration"]
else:
non_num_columns = ["iupac_name"]
aligned_dfs = [
aligned_dfs: list[pd.DataFrame] = [
df.align(files_in_sample[0], join="outer", axis=0)[0]
for df in files_in_sample
] # Align indices
# Keep non-numerical data separately and ensure no duplicates
non_num_data = pd.concat(
non_num_data: pd.DataFrame = pd.concat(
[df[non_num_columns].drop_duplicates() for df in files_in_sample]
).drop_duplicates()
filled_dfs = [f.drop(columns=non_num_columns).fillna(0) for f in aligned_dfs]
filled_dfs: list[pd.DataFrame] = [
f.drop(columns=non_num_columns).fillna(0) for f in aligned_dfs
]
# Calculating the average and std for numerical data
sample = pd.concat(filled_dfs).groupby(level=0).mean().astype(float)
sample_std = pd.concat(filled_dfs).groupby(level=0).std().astype(float)
Expand Down Expand Up @@ -2711,7 +2718,8 @@ def create_files_param_report(self, param="conc_vial_mg_L"):
This report aids in the analysis and comparison of compound
concentrations across FILES."""
print("Info: create_files_param_report: ", param)

if param not in Project.acceptable_params:
raise ValueError(f"{param = } is not an acceptable param")
if not self.calibration_to_files_applied:
self.apply_calibration_to_files()
rep_columns = self.files_info.index.tolist()
Expand Down Expand Up @@ -2746,8 +2754,11 @@ def create_files_param_aggrrep(self, param="conc_vial_mg_L"):
group concentrations. This aggregation facilitates the understanding
of functional group distribution across FILES."""
print("Info: create_param_aggrrep: ", param)
if param not in Project.acceptable_params:
raise ValueError(f"{param = } is not an acceptable param")
if param not in self.list_of_files_param_reports:
self.create_files_param_report(param)

# fg = functional groups, mf = mass fraction
filenames = self.files_info.index.tolist()
_all_comps = self.files_reports[param].index.tolist()
Expand Down Expand Up @@ -2804,6 +2815,8 @@ def create_samples_param_report(self, param="conc_vial_mg_L"):
This report aids in the analysis and comparison of compound
concentrations across SAMPLES."""
print("Info: create_param_report: ", param)
if param not in Project.acceptable_params:
raise ValueError(f"{param = } is not an acceptable param")
if not self.samples_created:
self.create_samples_from_files()
_all_comps = self.compounds_properties["iupac_name"].tolist()
Expand Down Expand Up @@ -2853,6 +2866,8 @@ def create_samples_param_aggrrep(self, param="conc_vial_mg_L"):
group concentrations. This aggregation facilitates the understanding
of functional group distribution across SAMPLES."""
print("Info: create_param_aggrrep: ", param)
if param not in Project.acceptable_params:
raise ValueError(f"{param = } is not an acceptable param")
if param not in self.list_of_samples_param_reports:
self.create_samples_param_report(param)
# fg = functional groups, mf = mass fraction
Expand Down Expand Up @@ -2942,7 +2957,8 @@ def save_samples_info(self):
self.add_stats_to_samples_info()
out_path = plib.Path(Project.out_path, "samples")
out_path.mkdir(parents=True, exist_ok=True)
self.files_info.to_excel(plib.Path(out_path, "samples_info.xlsx"))
self.samples_info.to_excel(plib.Path(out_path, "samples_info.xlsx"))
self.samples_info_std.to_excel(plib.Path(out_path, "samples_info_std.xlsx"))
print("Info: save_samples_info: samples_info saved")

def save_sample(self, sample, sample_std, samplename):
Expand Down

0 comments on commit 106c05d

Please sign in to comment.