Skip to content

Commit

Permalink
added automatic method calling for all methods
Browse files Browse the repository at this point in the history
that depend on previous methods in the Project
  • Loading branch information
mpecchi committed May 20, 2024
1 parent 69c78d6 commit a61e3f9
Show file tree
Hide file tree
Showing 6 changed files with 457 additions and 422 deletions.
4 changes: 2 additions & 2 deletions example/name_to_properties/example_name_to_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
import pubchempy as pcp
from gcms_data_analysis.fragmenter import Fragmenter

from gcms_data_analysis import name_to_properties
from gcms_data_analysis.gcms import name_to_properties


folder_path = plib.Path(
r"C:\Users\mp933\OneDrive - Cornell University\Python\gcms_data_analysis\tests\data_name_to_properties"
r"/Users/matteo/Projects/gcms_data_analysis/example/name_to_properties/data_name_to_properties"
)
# %%
classifications_codes_fractions = pd.read_excel(
Expand Down
4 changes: 2 additions & 2 deletions src/gcms_data_analysis/fragmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
from rdkit import Chem
from rdkit.Chem import DataStructs
from rdkit.Chem import rdmolops
from rdkit.Chem.AllChem import (
from rdkit.Chem.AllChem import ( # pylint: disable=no-name-in-module
GetMorganFingerprintAsBitVect,
) # pylint: disable=no-name-in-module
)


class Fragmenter:
Expand Down
40 changes: 33 additions & 7 deletions src/gcms_data_analysis/gcms.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def load_all_files(self):
for filename in self.files_info.index:
file = self.load_single_file(filename)
self.files[filename] = file
print("Info: load_all_files: files loaded")
print(f"Info: load_all_files: {len(self.files)} files loaded")
return self.files

def load_single_file(self, filename) -> pd.DataFrame:
Expand Down Expand Up @@ -464,7 +464,10 @@ def create_tanimoto_and_molecular_weight_similarity_dfs(
self.load_compounds_properties()
if self.dict_names_to_iupacs is None:
self.create_dict_names_to_iupacs()
if "iupac_name" not in list(self.files.values())[0].columns:
if (
"iupac_name" not in list(self.files.values())[0].columns
or "iupac_name" not in list(self.calibrations.values())[0].columns
):
self.add_iupac_to_files_and_calibrations()
prop_index_iupac = self.compounds_properties.set_index("iupac_name")
prop_index_iupac = prop_index_iupac[
Expand Down Expand Up @@ -542,12 +545,23 @@ def apply_calibration_to_files(self):
in the loaded files, adjusting concentrations based on calibration
data, and updates the 'files' attribute with calibrated data."""
print("Info: apply_calibration_to_files: loop started")
if "iupac_name" not in list(self.files.values())[0].columns:
if not self.files:
self.load_all_files()
if not self.calibrations:
self.load_calibrations()
if self.compounds_properties is None:
self.load_compounds_properties()
if self.dict_names_to_iupacs is None:
self.create_dict_names_to_iupacs()
if (
"iupac_name" not in list(self.files.values())[0].columns
or "iupac_name" not in list(self.calibrations.values())[0].columns
):
self.add_iupac_to_files_and_calibrations()
if self.use_semi_calibration and not self.semi_calibration_dict:
self.create_semi_calibration_dict()

for filename in self.files.keys():
for filename in self.files:
self.files[filename] = self.apply_calib_to_single_file(filename)
return self.files

Expand Down Expand Up @@ -630,7 +644,8 @@ def add_stats_to_files_info(self) -> pd.DataFrame:
DataFrame, such as maximum height, area, and concentrations,
updating the 'files_info' with these statistics."""
print("Info: add_stats_to_files_info: started")

if not self.files:
self.load_all_files()
numeric_columns = [
col
for col in self.acceptable_params
Expand Down Expand Up @@ -658,8 +673,8 @@ def create_samples_info(self):
"""Creates a summary 'samples_info' DataFrame from 'files_info',
aggregating data for each sample, and updates the 'samples_info'
attribute with this summarized data."""
if self.files_info is None:
self.load_files_info()
if not self.files:
self.load_all_files()
numeric_columns = [
col
for col in self.acceptable_params
Expand Down Expand Up @@ -801,6 +816,12 @@ def create_files_param_report(self, param="conc_vial_mg_L"):
self.load_all_files()
if param not in self.acceptable_params:
raise ValueError(f"{param = } is not an acceptable param")
self.load_calibrations()
if self.calibrations:
self.apply_calibration_to_files()
for filename in self.files_info.index:
if param not in self.files[filename].columns:
raise ValueError(f"{param = } not found in {filename = }")
# Create a dictionary of Series, each Series named after the file and containing the 'param' values
series_dict = {
filename: self.files[filename][param].rename(filename)
Expand Down Expand Up @@ -829,6 +850,8 @@ def create_files_param_aggrrep(self, param="conc_vial_mg_L"):
raise ValueError(f"{param = } is not an acceptable param")
if param not in self.files_reports:
self.create_files_param_report(param)
if self.compounds_properties is None:
self.load_compounds_properties()
# create a df with iupac name index and fg_mf columns (underiv and deriv)
comps_df = self.compounds_properties # .set_index("iupac_name")
# comps_df = comps_df[~comps_df.index.duplicated(keep="first")]
Expand Down Expand Up @@ -872,6 +895,9 @@ def create_samples_param_report(self, param: str = "conc_vial_mg_L"):
print(f"Info: create_samples_param_report: {param = }")
if param not in self.acceptable_params:
raise ValueError(f"{param = } is not an acceptable param")
self.load_calibrations()
if self.calibrations:
self.apply_calibration_to_files()
if param not in self.files_reports:
self.create_files_param_report(param)
file_to_sample_rename = dict(
Expand Down
Binary file modified tests/data_minimal_case/compounds_properties.xlsx
Binary file not shown.
Binary file modified tests/data_minimal_case/files_info.xlsx
Binary file not shown.
Loading

0 comments on commit a61e3f9

Please sign in to comment.