added automatic method calling for all methods

that depend on previous methods in the Project
mpecchi · May 20, 2024 · a61e3f9 · a61e3f9
1 parent 69c78d6
commit a61e3f9
Show file tree

Hide file tree

Showing 6 changed files with 457 additions and 422 deletions.
diff --git a/example/name_to_properties/example_name_to_properties.py b/example/name_to_properties/example_name_to_properties.py
@@ -8,11 +8,11 @@
 import pubchempy as pcp
 from gcms_data_analysis.fragmenter import Fragmenter
 
-from gcms_data_analysis import name_to_properties
+from gcms_data_analysis.gcms import name_to_properties
 
 
 folder_path = plib.Path(
-    r"C:\Users\mp933\OneDrive - Cornell University\Python\gcms_data_analysis\tests\data_name_to_properties"
+    r"/Users/matteo/Projects/gcms_data_analysis/example/name_to_properties/data_name_to_properties"
 )
 # %%
 classifications_codes_fractions = pd.read_excel(

diff --git a/src/gcms_data_analysis/fragmenter.py b/src/gcms_data_analysis/fragmenter.py
@@ -2,9 +2,9 @@
 from rdkit import Chem
 from rdkit.Chem import DataStructs
 from rdkit.Chem import rdmolops
-from rdkit.Chem.AllChem import (
+from rdkit.Chem.AllChem import (  # pylint: disable=no-name-in-module
     GetMorganFingerprintAsBitVect,
-)  # pylint: disable=no-name-in-module
+)
 
 
 class Fragmenter:

diff --git a/src/gcms_data_analysis/gcms.py b/src/gcms_data_analysis/gcms.py
@@ -266,7 +266,7 @@ def load_all_files(self):
         for filename in self.files_info.index:
             file = self.load_single_file(filename)
             self.files[filename] = file
-        print("Info: load_all_files: files loaded")
+        print(f"Info: load_all_files: {len(self.files)} files loaded")
         return self.files
 
     def load_single_file(self, filename) -> pd.DataFrame:
@@ -464,7 +464,10 @@ def create_tanimoto_and_molecular_weight_similarity_dfs(
             self.load_compounds_properties()
         if self.dict_names_to_iupacs is None:
             self.create_dict_names_to_iupacs()
-        if "iupac_name" not in list(self.files.values())[0].columns:
+        if (
+            "iupac_name" not in list(self.files.values())[0].columns
+            or "iupac_name" not in list(self.calibrations.values())[0].columns
+        ):
             self.add_iupac_to_files_and_calibrations()
         prop_index_iupac = self.compounds_properties.set_index("iupac_name")
         prop_index_iupac = prop_index_iupac[
@@ -542,12 +545,23 @@ def apply_calibration_to_files(self):
         in the loaded files, adjusting concentrations based on calibration
         data, and updates the 'files' attribute with calibrated data."""
         print("Info: apply_calibration_to_files: loop started")
-        if "iupac_name" not in list(self.files.values())[0].columns:
+        if not self.files:
+            self.load_all_files()
+        if not self.calibrations:
+            self.load_calibrations()
+        if self.compounds_properties is None:
+            self.load_compounds_properties()
+        if self.dict_names_to_iupacs is None:
+            self.create_dict_names_to_iupacs()
+        if (
+            "iupac_name" not in list(self.files.values())[0].columns
+            or "iupac_name" not in list(self.calibrations.values())[0].columns
+        ):
             self.add_iupac_to_files_and_calibrations()
         if self.use_semi_calibration and not self.semi_calibration_dict:
             self.create_semi_calibration_dict()
 
-        for filename in self.files.keys():
+        for filename in self.files:
             self.files[filename] = self.apply_calib_to_single_file(filename)
         return self.files
 
@@ -630,7 +644,8 @@ def add_stats_to_files_info(self) -> pd.DataFrame:
         DataFrame, such as maximum height, area, and concentrations,
         updating the 'files_info' with these statistics."""
         print("Info: add_stats_to_files_info: started")
-
+        if not self.files:
+            self.load_all_files()
         numeric_columns = [
             col
             for col in self.acceptable_params
@@ -658,8 +673,8 @@ def create_samples_info(self):
         """Creates a summary 'samples_info' DataFrame from 'files_info',
         aggregating data for each sample, and updates the 'samples_info'
         attribute with this summarized data."""
-        if self.files_info is None:
-            self.load_files_info()
+        if not self.files:
+            self.load_all_files()
         numeric_columns = [
             col
             for col in self.acceptable_params
@@ -801,6 +816,12 @@ def create_files_param_report(self, param="conc_vial_mg_L"):
             self.load_all_files()
         if param not in self.acceptable_params:
             raise ValueError(f"{param = } is not an acceptable param")
+        self.load_calibrations()
+        if self.calibrations:
+            self.apply_calibration_to_files()
+        for filename in self.files_info.index:
+            if param not in self.files[filename].columns:
+                raise ValueError(f"{param = } not found in {filename = }")
         # Create a dictionary of Series, each Series named after the file and containing the 'param' values
         series_dict = {
             filename: self.files[filename][param].rename(filename)
@@ -829,6 +850,8 @@ def create_files_param_aggrrep(self, param="conc_vial_mg_L"):
             raise ValueError(f"{param = } is not an acceptable param")
         if param not in self.files_reports:
             self.create_files_param_report(param)
+        if self.compounds_properties is None:
+            self.load_compounds_properties()
         # create a df with iupac name index and fg_mf columns (underiv and deriv)
         comps_df = self.compounds_properties  # .set_index("iupac_name")
         # comps_df = comps_df[~comps_df.index.duplicated(keep="first")]
@@ -872,6 +895,9 @@ def create_samples_param_report(self, param: str = "conc_vial_mg_L"):
         print(f"Info: create_samples_param_report: {param = }")
         if param not in self.acceptable_params:
             raise ValueError(f"{param = } is not an acceptable param")
+        self.load_calibrations()
+        if self.calibrations:
+            self.apply_calibration_to_files()
         if param not in self.files_reports:
             self.create_files_param_report(param)
         file_to_sample_rename = dict(

diff --git a/tests/data_minimal_case/compounds_properties.xlsx b/tests/data_minimal_case/compounds_properties.xlsx
diff --git a/tests/data_minimal_case/files_info.xlsx b/tests/data_minimal_case/files_info.xlsx