From 96277c6cfe3af7aac20e31e5c148fd1f53dea3a9 Mon Sep 17 00:00:00 2001 From: mpecchi Date: Sun, 24 Mar 2024 13:33:27 -0400 Subject: [PATCH 1/7] added separated plotting module with my_figure --- pyproject.toml | 2 +- src/gcms_data_analysis/my_figure.py | 421 +++++++++++++++++++++ src/gcms_data_analysis/plotting.py | 549 ++++++++++++++++++++++++++++ 3 files changed, 971 insertions(+), 1 deletion(-) create mode 100644 src/gcms_data_analysis/my_figure.py create mode 100644 src/gcms_data_analysis/plotting.py diff --git a/pyproject.toml b/pyproject.toml index c01bbcb..e716113 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ classifiers = [ "Operating System :: OS Independent", ] -requires-python = ">=3.6" +requires-python = ">=3.8" [project.urls] Homepage = "https://github.com/mpecchi/gcms_data_analysis/tree/main" diff --git a/src/gcms_data_analysis/my_figure.py b/src/gcms_data_analysis/my_figure.py new file mode 100644 index 0000000..8c3a2cd --- /dev/null +++ b/src/gcms_data_analysis/my_figure.py @@ -0,0 +1,421 @@ +from __future__ import annotations +import string +import pathlib as plib +from typing import Any, Dict +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.figure import Figure +from matplotlib.axes import Axes +import seaborn as sns + + +class MyFigure: + """ + A class for creating and customizing figures using matplotlib and seaborn. + + MyFigure provides a structured way to create figures with multiple subplots, + allowing for detailed customization of each subplot. It supports features like + adjusting axis limits, adding legends, annotating, and creating inset plots, + all with an emphasis on easy configurability through keyword arguments. + + :ivar broad_props: A dictionary to store properties that are broadcasted across all axes. + :type broad_props: dict + :ivar kwargs: A dictionary to store all the configuration keyword arguments. + :type kwargs: dict + :ivar fig: The main figure object from matplotlib. + :type fig: matplotlib.figure.Figure + :ivar axs: A list of axes objects corresponding to the subplots in the figure. + :type axs: list[matplotlib.axes.Axes] + :ivar axts: A list of twin axes objects if 'twinx' is enabled, otherwise None. + :type axts: list[matplotlib.axes.Axes] or None + :ivar n_axs: The number of axes/subplots in the figure. + :type n_axs: int + + The class is designed to work seamlessly with seaborn's styling features, + making it suitable for creating publication-quality figures with minimal code. + """ + + @staticmethod + def _adjust_lims(lims: tuple[float] | None, gap=0.05) -> tuple[float] | None: + """ + Adjusts the provided axis limits by a specified gap percentage to add padding + around the data. + + :param lims: _description_ + :type lims: tuple[float] | None + :param gap: _description_, defaults to 0.05 + :type gap: float, optional + :return: _description_ + :rtype: tuple[float] | None + """ + if lims is None: + return None + else: + new_lims = ( + lims[0] * (1 + gap) - gap * lims[1], + lims[1] * (1 + gap) - gap * lims[0], + ) + return new_lims + + def __init__(self, **kwargs: Any) -> None: + """ + Initializes a MyFigure object with custom or default settings for creating plots. + + :param kwargs: Keyword arguments to override default figure settings. + """ + self.broad_props: dict[str, list] = {} # broadcasted properties for each axis + self.kwargs = self.default_kwargs() + self.kwargs.update(kwargs) # Override defaults with any kwargs provided + self.process_kwargs() + + sns.set_palette(self.kwargs["color_palette"]) + sns.set_style( + self.kwargs["sns_style"], {"font.family": self.kwargs["text_font"]} + ) + + self.create_figure() + + self.update_axes_single_props() + + self.update_axes_list_props() + + def default_kwargs(self) -> Dict[str, Any]: + """ + Defines the default settings for the figure. + + :return: A dictionary of default settings. + """ + defaults = { + "rows": 1, + "cols": 1, + "width": 6.0, + "height": 6.0, + "x_lab": None, + "y_lab": None, + "x_lim": None, + "y_lim": None, + "x_ticks": None, + "y_ticks": None, + "x_ticklabels": None, + "y_ticklabels": None, + "twinx": False, + "yt_lab": None, + "yt_lim": None, + "yt_ticks": None, + "yt_ticklabels": None, + "legend": True, + "legend_loc": "best", + "legend_ncols": 1, + "annotate_lttrs": False, + "annotate_lttrs_xy": None, + "grid": False, + "color_palette": "deep", + "text_font": "Dejavu Sans", + "sns_style": "ticks", + } + return defaults + + def process_kwargs(self) -> None: + """ + Validates and processes the provided keyword arguments for figure configuration. + + + :raises ValueError: _description_ + :raises ValueError: _description_ + :raises ValueError: _description_ + :raises ValueError: _description_ + :raises ValueError: _description_ + """ + self.kwargs["rows"] = int(self.kwargs["rows"]) + self.kwargs["cols"] = int(self.kwargs["cols"]) + self.kwargs["width"] = float(self.kwargs["width"]) + self.kwargs["height"] = float(self.kwargs["height"]) + self.kwargs["legend_ncols"] = int(self.kwargs["legend_ncols"]) + + if self.kwargs["rows"] <= 0: + raise ValueError("Number of rows must be positive.") + if self.kwargs["cols"] <= 0: + raise ValueError("Number of cols must be positive.") + if self.kwargs["width"] <= 0: + raise ValueError("Width must be positive.") + if self.kwargs["height"] <= 0: + raise ValueError("Height must be positive.") + if self.kwargs["legend_ncols"] <= 0: + raise ValueError("Number of legend columns must be positive.") + + def create_figure(self) -> MyFigure: + """ + Creates the figure and its axes. + + :return: _description_ + :rtype: MyFigure + """ + self.fig: Figure + self.axs: Axes + self.axts: Axes | None + self.fig, axes = plt.subplots( + self.kwargs["rows"], + self.kwargs["cols"], + figsize=(self.kwargs["width"], self.kwargs["height"]), + constrained_layout=True, + ) + # Ensure ax is always an array, even if it's just one subplot + self.axs: list[Axes] = np.atleast_1d(axes).flatten().tolist() + if self.kwargs["twinx"]: + self.axts: list[Axes] = [a.twinx() for a in self.axs] + + self.n_axs = len(self.axs) + return self + + def save_figure( + self, + filename: str = "figure", + out_path: plib.Path | None = plib.Path("."), + tight_layout: bool = True, + save_as_png: bool = True, + save_as_pdf: bool = False, + save_as_svg: bool = False, + save_as_eps: bool = False, + png_transparency: bool = False, + ) -> None: + """_summary_ + + :param filename: _description_, defaults to "figure" + :type filename: str, optional + :param out_path: _description_, defaults to plib.Path(".") + :type out_path: plib.Path | None, optional + :param tight_layout: _description_, defaults to True + :type tight_layout: bool, optional + :param save_as_png: _description_, defaults to True + :type save_as_png: bool, optional + :param save_as_pdf: _description_, defaults to False + :type save_as_pdf: bool, optional + :param save_as_svg: _description_, defaults to False + :type save_as_svg: bool, optional + :param save_as_eps: _description_, defaults to False + :type save_as_eps: bool, optional + :param png_transparency: _description_, defaults to False + :type png_transparency: bool, optional + """ + self.update_axes_single_props() + + self.update_axes_list_props() + + self.add_legend() + try: + self.fig.align_labels() # align labels of subplots, needed only for multi plot + except AttributeError: + print("align_labels not performed") + self.annotate_letters() + # Saving the figure + formats = { + "png": save_as_png, + "pdf": save_as_pdf, + "svg": save_as_svg, + "eps": save_as_eps, + } + + for fmt, should_save in formats.items(): + if should_save: + full_path = plib.Path(out_path, f"{filename}.{fmt}") + self.fig.savefig( + full_path, + dpi=300, + transparent=png_transparency, + bbox_inches="tight" if tight_layout else None, + ) + + def add_legend(self) -> None: + """_summary_""" + for sprop in ["legend", "legend_loc", "legend_ncols"]: + self.broad_props[sprop] = self._broadcast_value_prop( + self.kwargs[sprop], sprop + ) + + if self.kwargs["twinx"] is False: + for i, ax in enumerate(self.axs): + if self.broad_props["legend"][i]: + ax.legend( + loc=self.broad_props["legend_loc"][i], + ncol=self.broad_props["legend_ncols"][i], + ) + else: + for i, (ax, axt) in enumerate(zip(self.axs, self.axts)): + if self.broad_props["legend"][i]: + hnd_ax, lab_ax = ax.get_legend_handles_labels() + hnd_axt, lab_axt = axt.get_legend_handles_labels() + ax.legend( + hnd_ax + hnd_axt, + lab_ax + lab_axt, + loc=self.broad_props["legend_loc"][i], + ncol=self.broad_props["legend_ncols"][i], + ) + + def annotate_letters(self) -> None: + """_summary_""" + if self.kwargs["annotate_lttrs_xy"] is not None and isinstance( + self.kwargs["annotate_lttrs_xy"], + (list, tuple) and len(self.kwargs["annotate_lttrs_xy"]) >= 2, + ): + xylttrs: list | tuple = self.kwargs["annotate_lttrs_xy"] + x_lttrs = xylttrs[0] # pylint: disable=unsubscriptable-object + y_lttrs = xylttrs[1] # pylint: disable=unsubscriptable-object + else: + x_lttrs = -0.15 + y_lttrs = -0.15 + if self.kwargs["annotate_lttrs"] is not False: + if isinstance(self.kwargs["annotate_lttrs"], str): + letters_list = [self.kwargs["annotate_lttrs"]] + elif isinstance(self.kwargs["annotate_lttrs"], list, tuple): + letters_list = self.kwargs["annotate_lttrs"] + for i, ax in enumerate(self.axs): + ax.annotate( + f"({letters_list[i]})", + xycoords="axes fraction", + xy=(0, 0), + xytext=(x_lttrs, y_lttrs), + weight="bold", + ) + + def create_inset( + self, + ax: Axes, + ins_x_loc: list[float, float], + ins_y_loc: list[float, float], + ins_x_lim: list[float, float], + ins_y_lim: list[float, float], + ) -> Axes: + """_summary_ + + :param ax: _description_ + :type ax: Axes + :param ins_x_loc: _description_ + :type ins_x_loc: list[float, float] + :param ins_y_loc: _description_ + :type ins_y_loc: list[float, float] + :param ins_x_lim: _description_ + :type ins_x_lim: list[float, float] + :param ins_y_lim: _description_ + :type ins_y_lim: list[float, float] + :return: _description_ + :rtype: Axes + """ + wdt = ins_x_loc[1] - ins_x_loc[0] + hgt = ins_y_loc[1] - ins_y_loc[0] + inset = ax.inset_axes([ins_x_loc[0], ins_y_loc[0], wdt, hgt]) + + inset.set_xlim(MyFigure._adjust_lims(ins_x_lim)) + inset.set_ylim(MyFigure._adjust_lims(ins_y_lim)) + return inset + + def update_axes_single_props(self): + """_summary_""" + for sprop in ["x_lab", "y_lab", "yt_lab", "grid"]: + self.broad_props[sprop] = self._broadcast_value_prop( + self.kwargs[sprop], sprop + ) + + # Update each axis with the respective properties + for i, ax in enumerate(self.axs): + ax.set_xlabel(self.broad_props["x_lab"][i]) + ax.set_ylabel(self.broad_props["y_lab"][i]) + if self.broad_props["grid"][i] is not None: + ax.grid(self.broad_props["grid"][i]) + + if self.kwargs["twinx"]: + for i, axt in enumerate(self.axts): + axt.set_ylabel(self.broad_props["yt_lab"][i]) + + def update_axes_list_props(self): + """_summary_""" + for lprop in [ + "x_lim", + "y_lim", + "yt_lim", + "x_ticks", + "y_ticks", + "yt_ticks", + "x_ticklabels", + "y_ticklabels", + "yt_ticklabels", + ]: + self.broad_props[lprop] = self._broadcast_list_prop( + self.kwargs[lprop], lprop + ) + + # Update each axis with the respective properties + for i, ax in enumerate(self.axs): + if self.broad_props["x_lim"][i] is not None: + ax.set_xlim(MyFigure._adjust_lims(self.broad_props["x_lim"][i])) + if self.broad_props["y_lim"][i] is not None: + ax.set_ylim(MyFigure._adjust_lims(self.broad_props["y_lim"][i])) + if self.broad_props["x_ticks"][i] is not None: + ax.set_xticks(self.broad_props["x_ticks"][i]) + if self.broad_props["y_ticks"][i] is not None: + ax.set_yticks(self.broad_props["y_ticks"][i]) + if self.broad_props["x_ticklabels"][i] is not None: + ax.set_xticklabels(self.broad_props["x_ticklabels"][i]) + if self.broad_props["y_ticklabels"][i] is not None: + ax.set_yticklabels(self.broad_props["y_ticklabels"][i]) + + if self.kwargs["twinx"]: + for i, axt in enumerate(self.axts): + if self.broad_props["yt_lim"][i] is not None: + axt.set_ylim(MyFigure._adjust_lims(self.broad_props["yt_lim"][i])) + if self.broad_props["yt_ticks"][i] is not None: + axt.set_yticks(self.broad_props["yt_ticks"][i]) + if self.broad_props["yt_ticklabels"][i] is not None: + axt.set_yticklabels(self.broad_props["yt_ticklabels"][i]) + + def _broadcast_value_prop( + self, prop: list | str | float | int | bool, prop_name: str + ) -> list: + """_summary_ + + :param prop: _description_ + :type prop: list | str | float | int | bool + :param prop_name: The name of the property for error messages. + :type prop_name: str + :raises ValueError: _description_ + :return: _description_ + :rtype: list + """ + if prop is None: + return [None] * self.n_axs + if isinstance(prop, (list, tuple)): + if len(prop) == self.n_axs: + return prop + else: + raise ValueError( + f"The size of the property '{prop_name}' does not match the number of axes." + ) + if isinstance(prop, (str, float, int, bool)): + return [prop] * self.n_axs + + def _broadcast_list_prop(self, prop: list | None, prop_name: str): + """_summary_ + + :param prop: _description_ + :type prop: list | None + :param prop_name: The name of the property for error messages. + :type prop_name: str + :raises ValueError: _description_ + :return: _description_ + :rtype: _type_ + """ + if prop is None: + return [None] * self.n_axs + + if ( + all(isinstance(item, (list, tuple)) for item in prop) + and len(prop) == self.n_axs + ): + return prop + elif isinstance(prop, (list, tuple)) and all( + isinstance(item, (int, float, str)) for item in prop + ): + return [prop] * self.n_axs + else: + raise ValueError( + f"The structure of '{prop_name = }' does not match expected pair-wise input." + ) diff --git a/src/gcms_data_analysis/plotting.py b/src/gcms_data_analysis/plotting.py new file mode 100644 index 0000000..1c75484 --- /dev/null +++ b/src/gcms_data_analysis/plotting.py @@ -0,0 +1,549 @@ +from __future__ import annotations +from typing import Literal +import string +import pathlib as plib +import pandas as pd +import seaborn as sns +import numpy as np +from matplotlib.transforms import blended_transform_factory +from gcms_data_analysis.my_figure import MyFigure +from gcms_data_analysis.main import Project + +lttrs: list[str] = list(string.ascii_lowercase) + +# list with colors +clrs: list[tuple] = sns.color_palette("deep", 30) + +# list with linestyles for plotting +lnstls: list[tuple] = [ + (0, ()), # solid + (0, (1, 1)), # 'densely dotted' + (0, (5, 1)), # 'densely dashed' + (0, (3, 1, 1, 1)), # 'densely dashdotted' + (0, (3, 1, 1, 1, 1, 1)), # 'densely dashdotdotted' + (0, (5, 5)), # 'dashed' + (0, (3, 5, 1, 5)), # 'dashdotted' + (0, (1, 5)), # dotted + (0, (3, 5, 1, 5, 1, 5)), # 'dashdotdotted' + (0, (1, 10)), # 'loosely dotted' + (0, (5, 10)), # 'loosely dashed' + (0, (3, 10, 1, 10)), # 'loosely dashdotted' + (0, (3, 10, 1, 10, 1, 10)), + (0, ()), # solid + (0, (1, 1)), # 'densely dotted' + (0, (5, 1)), # 'densely dashed' + (0, (3, 1, 1, 1)), # 'densely dashdotted' + (0, (3, 1, 1, 1, 1, 1)), # 'densely dashdotdotted' + (0, (5, 5)), # 'dashed' + (0, (3, 5, 1, 5)), # 'dashdotted' + (0, (1, 5)), # dotted + (0, (3, 5, 1, 5, 1, 5)), # 'dashdotdotted' + (0, (1, 10)), # 'loosely dotted' + (0, (5, 10)), # 'loosely dashed' + (0, (3, 10, 1, 10)), # 'loosely dashdotted' + (0, (3, 10, 1, 10, 1, 10)), +] # 'loosely dashdotdotted' + +# list with markers for plotting +mrkrs: list[str] = [ + "o", + "v", + "X", + "s", + "p", + "^", + "P", + "<", + ">", + "*", + "d", + "1", + "2", + "3", + "o", + "v", + "X", + "s", + "p", + "^", + "P", + "<", + ">", + "*", + "d", + "1", + "2", + "3", +] + +htchs: list[str] = [ + None, + "//", + "...", + "--", + "O", + "\\\\", + "oo", + "\\\\\\", + "/////", + ".....", + "//", + "...", + "--", + "O", + "\\\\", + "oo", + "\\\\\\", + "/////", + ".....", + "//", + "...", + "--", + "O", + "\\\\", + "oo", + "\\\\\\", + "/////", + ".....", + "//", + "...", + "--", + "O", + "\\\\", + "oo", + "\\\\\\", + "/////", + ".....", +] + + +def _annotate_outliers_in_plot(ax, df_ave, df_std, y_lim): + """ + Annotates the bars in a bar plot with their average value and standard + deviation if these values exceed the specified y-axis limits. + The function iterates over the bars in the plot and checks if their average + values, considering their standard deviations, are outside the provided + y-axis limits. For such bars, it annotates the average and standard + deviation on the + plot, using a specific format for better visualization and understanding. + + Parameters + ---------- + ax : matplotlib.axes.Axes + The matplotlib Axes object where the plot is drawn. + df_ave : pandas.DataFrame + DataFrame containing the average values used in the plot. + df_std : pandas.DataFrame + DataFrame containing the standard deviation values corresponding + to df_ave. + y_lim : list of [float, float] + A list of two floats representing the minimum (y_lim[0]) and + maximum (y_lim[1]) limits of the y-axis. + + Returns + ------- + None + Modifies the provided Axes object (ax) by adding annotations. + + """ + dx = 0.15 * len(df_ave.index) + dy = 0.04 + tform = blended_transform_factory(ax.transData, ax.transAxes) + dfao = pd.DataFrame(columns=["H/L", "xpos", "ypos", "ave", "std", "text"]) + dfao["ave"] = df_ave.transpose().to_numpy().flatten().tolist() + if df_std.empty: + df_std = np.zeros(len(dfao["ave"])) + else: + dfao["std"] = df_std.transpose().to_numpy().flatten().tolist() + try: + dfao["xpos"] = [p.get_x() + p.get_width() / 2 for p in ax.patches] + except ValueError: # otherwise the masking adds twice the columns + dfao["xpos"] = [ + p.get_x() + p.get_width() / 2 for p in ax.patches[: len(ax.patches) // 2] + ] + cond = (dfao["ave"] < y_lim[0]) | (dfao["ave"] > y_lim[1]) + dfao = dfao.drop(dfao[~cond].index) + for ao in dfao.index.tolist(): # loop through bars + if dfao.loc[ao, "ave"] == float("inf"): + dfao.loc[ao, "text"] = "inf" + dfao.loc[ao, "H/L"] = "H" + elif dfao.loc[ao, "ave"] == float("-inf"): + dfao.loc[ao, "text"] = "-inf" + dfao.loc[ao, "H/L"] = "L" + elif dfao.loc[ao, "ave"] > y_lim[1]: + dfao.loc[ao, "H/L"] = "H" + dfao.loc[ao, "text"] = "{:.2f}".format( + round(dfao.loc[ao, "ave"], 2) + ).strip() + if (dfao.loc[ao, "std"] != 0) & (~np.isnan(dfao.loc[ao, "std"])): + dfao.loc[ao, "text"] += r"$\pm$" + "{:.2f}".format( + round(dfao.loc[ao, "std"], 2) + ) + elif dfao.loc[ao, "ave"] < y_lim[0]: + dfao.loc[ao, "H/L"] = "L" + dfao.loc[ao, "text"] = str(round(dfao.loc[ao, "ave"], 2)).strip() + if dfao.loc[ao, "std"] != 0: + dfao.loc[ao, "text"] += r"$\pm$" + "{:.2f}".format( + round(dfao.loc[ao, "std"], 2) + ) + else: + print("Something is wrong", dfao.loc[ao, "ave"]) + for hl, ypos, dy in zip(["L", "H"], [0.02, 0.98], [0.04, -0.04]): + dfao1 = dfao[dfao["H/L"] == hl] + dfao1["ypos"] = ypos + if not dfao1.empty: + dfao1 = dfao1.sort_values("xpos", ascending=True) + dfao1["diffx"] = ( + np.diff(dfao1["xpos"].values, prepend=dfao1["xpos"].values[0]) < dx + ) + dfao1.reset_index(inplace=True) + + for i in dfao1.index.tolist()[1:]: + dfao1.loc[i, "ypos"] = ypos + for e in range(i, 0, -1): + if dfao1.loc[e, "diffx"]: + dfao1.loc[e, "ypos"] += dy + else: + break + for ao in dfao1.index.tolist(): + ax.annotate( + dfao1.loc[ao, "text"], + xy=(dfao1.loc[ao, "xpos"], 0), + xycoords=tform, + textcoords=tform, + xytext=(dfao1.loc[ao, "xpos"], dfao1.loc[ao, "ypos"]), + fontsize=9, + ha="center", + va="center", + bbox={ + "boxstyle": "square,pad=0", + "edgecolor": None, + "facecolor": "white", + "alpha": 0.7, + }, + ) + + +def plot_pave_std( + proj: Project, + filename: str = "plot", + files_or_samples: Literal["files", "samples"] = "samples", + param: str = "conc_vial_mg_L", + aggr: bool = False, + show_total_in_twinx: bool = False, + annotate_outliers: bool = True, + min_y_thresh: float | None = None, + only_samples_to_plot: list[str] | None = None, + rename_samples: list[str] | None = None, + reorder_samples: list[str] | None = None, + item_to_color_to_hatch: pd.DataFrame | None = None, + yt_sum_label: str = "total\n(right axis)", + y_lim: tuple[float] | None = None, + y_lab: str | None = None, + yt_lab: str | None = None, + color_palette: str = "deep", + x_label_rotation: int = 0, + legend_location: Literal["best", "outside"] = "best", + legend_columns: int = 1, + legend_x_anchor: float = 1, + legend_y_anchor: float = 1.02, + legend_labelspacing: float = 0.5, + **kwargs, +) -> MyFigure: + """ + Generates a bar plot displaying average values with optional standard deviation + bars for a specified parameter from either files or samples. This function allows + for detailed customization of the plot, including aggregation by functional groups, + filtering based on minimum thresholds, renaming and reordering samples, and applying + specific color schemes and hatching patterns to items. + Additionally, it supports adjusting plot aesthetics such as size, figure height multiplier, + x-label rotation, and outlier annotation. The plot can include a secondary y-axis + to display the sum of values, with customizable limits, labels, ticks, and sum label. + The legend can be placed inside or outside the plot area, with adjustable location, + columns, anchor points, and label spacing. An optional note can be added to the plot + for additional context. + + Parameters: + + filename (str): Name for the output plot file. Default is 'plot'. + + files_or_samples (str): Specifies whether to plot data from 'files' + or 'samples'. Default is 'samples'. + + param (str): The parameter to plot, such as 'conc_vial_mg_L'. + Default is 'conc_vial_mg_L'. + + aggr (bool): Boolean indicating whether to aggregate data by functional groups. + Default is False, meaning no aggregation. + + min_y_thresh (float, optional): Minimum y-value threshold for including data in the plot. + Default is None, including all data. + + only_samples_to_plot (list, optional): List of samples to include in the plot. + Default is None, including all samples. + + rename_samples (dict, optional): Dictionary to rename samples in the plot. + Default is None, using original names. + + reorder_samples (list, optional): List specifying the order of samples in the plot. + Default is None, using original order. + + item_to_color_to_hatch (DataFrame, optional): DataFrame mapping items to specific colors and hatching patterns. + Default is None, using default colors and no hatching. + + paper_col (float): Background color of the plot area. Default is .8, a light grey. + + fig_hgt_mlt (float): Multiplier for the figure height to adjust plot size. Default is 1.5. + + x_label_rotation (int): Rotation angle for x-axis labels. Default is 0, meaning no rotation. + + annotate_outliers (bool): Boolean indicating whether to annotate outliers exceeding y_lim. + Default is True. + + color_palette (str): Color palette for the plot. Default is 'deep'. + + y_lab (str, optional): Label for the y-axis. Default is None, using parameter name as label. + + y_lim (tuple[float, float], optional): Limits for the y-axis. Default is None, automatically determined. + + y_ticks (list[float], optional): Custom tick marks for the y-axis. Default is None, automatically determined. + + yt_sum (bool): Boolean indicating whether to display a sum on a secondary y-axis. Default is False. + + yt_lim (tuple[float, float], optional): Limits for the secondary y-axis. Default is None, automatically determined. + + yt_lab (str, optional): Label for the secondary y-axis. Default is None, using parameter name as label. + + yt_ticks (list[float], optional): Custom tick marks for the secondary y-axis. Default is None, automatically determined. + + yt_sum_label (str): Label for the sum on the secondary y-axis. Default is 'total (right axis)'. + + legend_location (str): Location of the legend within or outside the plot area. Default is 'best'. + + legend_columns (int): Number of columns in the legend. Default is 1. + + legend_x_anchor (float): X-anchor for the legend when placed outside the plot area. Default is 1. + + legend_y_anchor (float): Y-anchor for the legend when placed outside the plot area. Default is 1.02. + + legend_labelspacing (float): Spacing between labels in the legend. Default is 0.5. + + annotate_lttrs (bool): Boolean indicating whether to annotate letters for statistical significance. Default is False. + + note_plt (str, optional): Optional note to add to the plot for additional context. Default is None. + + """ + + # create folder where Plots are stored + out_path = plib.Path(Project.out_path, "plots") + out_path.mkdir(parents=True, exist_ok=True) + if not aggr: # then use compounds reports + if files_or_samples == "files": + df_ave = proj.files_reports[param].T + df_std = pd.DataFrame() + elif files_or_samples == "samples": + df_ave = proj.samples_reports[param].T + df_std = proj.samples_reports_std[param].T + else: # use aggregated reports + if files_or_samples == "files": + df_ave = proj.files_aggrreps[param].T + df_std = pd.DataFrame() + elif files_or_samples == "samples": + df_ave = proj.samples_aggrreps[param].T + df_std = proj.samples_aggrreps_std[param].T + + if only_samples_to_plot is not None: + df_ave = df_ave.loc[only_samples_to_plot, :].copy() + if files_or_samples == "samples": + df_std = df_std.loc[only_samples_to_plot, :].copy() + + if rename_samples is not None: + df_ave.index = rename_samples + if files_or_samples == "samples": + df_std.index = rename_samples + + if reorder_samples is not None: + filtered_reorder_samples = [ + idx for idx in reorder_samples if idx in df_ave.index + ] + df_ave = df_ave.reindex(filtered_reorder_samples) + if files_or_samples == "samples": + df_std = df_std.reindex(filtered_reorder_samples) + + if min_y_thresh is not None: + df_ave = df_ave.loc[:, (df_ave > min_y_thresh).any(axis=0)].copy() + if files_or_samples == "samples": + df_std = df_std.loc[:, df_ave.columns].copy() + + if item_to_color_to_hatch is not None: # specific color and hatches to each fg + colors = [item_to_color_to_hatch.loc[item, "clr"] for item in df_ave.columns] + hatches = [item_to_color_to_hatch.loc[item, "htch"] for item in df_ave.columns] + else: # no specific colors and hatches specified + colors = sns.color_palette(color_palette, df_ave.shape[1]) + hatches = htchs + + if show_total_in_twinx: + plot_twinx: bool = True + else: + plot_twinx: bool = False + + if y_lab is None: + y_lab = Project.param_to_axis_label[param] + if show_total_in_twinx: + legend_x_anchor += 0.14 + yt_lab = y_lab + + myfig = MyFigure( + rows=1, + cols=1, + twinx=plot_twinx, + text_font=Project.plot_font, + y_lab=y_lab, + yt_lab=yt_lab, + y_lim=y_lim, + legend=False, + grid=Project.plot_grid, + **kwargs, + ) + if df_std.isna().all().all() or df_std.empty: # means that no std is provided + df_ave.plot( + ax=myfig.axs[0], + kind="bar", + rot=x_label_rotation, + width=0.9, + edgecolor="k", + legend=False, + capsize=3, + color=colors, + ) + bars = myfig.axs[0].patches # needed to add patches to the bars + n_different_hatches = int(len(bars) / df_ave.shape[0]) + else: # no legend is represented but non-significant values are shaded + mask = (df_ave.abs() > df_std.abs()) | df_std.isna() + + df_ave[mask].plot( + ax=myfig.axs[0], + kind="bar", + rot=x_label_rotation, + width=0.9, + edgecolor="k", + legend=False, + yerr=df_std[mask], + capsize=3, + color=colors, + label="_nolegend", + ) + df_ave[~mask].plot( + ax=myfig.axs[0], + kind="bar", + rot=x_label_rotation, + width=0.9, + legend=False, + edgecolor="grey", + color=colors, + alpha=0.5, + label="_nolegend", + ) + bars = myfig.axs[0].patches # needed to add patches to the bars + n_different_hatches = int(len(bars) / df_ave.shape[0] / 2) + if show_total_in_twinx: + myfig.axts[0].scatter( + df_ave.index, + df_ave.sum(axis=1).values, + color="k", + linestyle="None", + edgecolor="k", + facecolor="grey", + s=100, + label=yt_sum_label, + alpha=0.5, + ) + if not df_std.empty: + myfig.axts[0].errorbar( + df_ave.index, + df_ave.sum(axis=1).values, + df_std.sum(axis=1).values, + capsize=3, + linestyle="None", + color="grey", + ecolor="k", + ) + bar_hatches = [] + # get a list with the hatches + for h in hatches[:n_different_hatches] + hatches[:n_different_hatches]: + for n in range(df_ave.shape[0]): # htcs repeated for samples + bar_hatches.append(h) # append based on samples number + for bar, hatch in zip(bars, bar_hatches): # assign hatches to each bar + bar.set_hatch(hatch) + myfig.axs[0].set(xlabel=None) + if x_label_rotation != 0: + myfig.axs[0].set_xticklabels( + df_ave.index, rotation=x_label_rotation, ha="right", rotation_mode="anchor" + ) + if legend_location is not None: + hnd_ax, lab_ax = myfig.axs[0].get_legend_handles_labels() + if not df_std.empty: + hnd_ax = hnd_ax[: len(hnd_ax) // 2] + lab_ax = lab_ax[: len(lab_ax) // 2] + if legend_labelspacing > 0.5: # large legend spacing for molecules + myfig.axs[0].plot(np.nan, np.nan, "-", color="None", label=" ") + hhhh, aaaa = myfig.axs[0].get_legend_handles_labels() + hnd_ax.append(hhhh[0]) + lab_ax.append(aaaa[0]) + if show_total_in_twinx: + hnd_axt, lab_axt = axt[0].get_legend_handles_labels() + else: + hnd_axt, lab_axt = [], [] + if legend_location == "outside": # legend goes outside of plot area + myfig.axs[0].legend( + hnd_ax + hnd_axt, + lab_ax + lab_axt, + loc="upper left", + ncol=legend_columns, + bbox_to_anchor=(legend_x_anchor, legend_y_anchor), + labelspacing=legend_labelspacing, + ) + else: # legend is inside of plot area + myfig.axs[0].legend( + hnd_ax + hnd_axt, + lab_ax + lab_axt, + loc=legend_location, + ncol=legend_columns, + labelspacing=legend_labelspacing, + ) + # annotate ave+-std at the top of outliers bar (exceeding y_lim) + if annotate_outliers and (y_lim is not None): # and (not df_std.empty): + _annotate_outliers_in_plot(myfig.axs[0], df_ave, df_std, y_lim) + myfig.save_figure(filename, out_path) + return myfig + + +# if __file__ == "__main__": +# f = MyFigure( +# rows=4, +# cols=1, +# width=6, +# height=12, +# twinx=True, +# x_lab=["aaa", "qqq", "aa", "qq"], +# y_lab="bbb", +# yt_lab="ccc", +# x_lim=[0, 1], +# y_lim=[0, 1], +# yt_lim=[[0, 1], [0, 0.5], [0, 1], [0, 0.5]], +# x_ticks=[[0, 0.5, 1], [0, 0.5, 2], [0, 1], [0, 0.5]], +# # x_ticklabels=["a", "c", "d"], +# grid=True, +# annotate_lttrs=["a", "b", "a", "b"], +# annotate_lttrs_xy=[-0.11, -0.15], +# ) + +# f.axs[0].plot([0, 1], [0, 3], label="a") +# f.axts[0].plot([0, 2], [0, 4], label="b") +# f.axts[0].plot([0, 2], [0, 5], label="ccc") +# f.axs[1].plot([0, 1], [0, 3], label="aaa") +# ins = f.create_insex(f.axs[0], [0.6, 0.8], [0.4, 0.6], [0, 0.2], [0, 0.2]) +# ins.plot([0, 1], [0, 3], label="a") +# f.save_figure( +# filename="my_plot", out_path=plib.Path(r"C:\Users\mp933\Desktop\New folder") +# ) From 24f70f9f4c87870caf5ae58736300813edb5735c Mon Sep 17 00:00:00 2001 From: mpecchi Date: Sun, 24 Mar 2024 13:42:57 -0400 Subject: [PATCH 2/7] added plotting with plotting funcitons and classes --- example/example_gcms_data_analysis.py | 42 +++ src/gcms_data_analysis/my_figure.py | 421 -------------------------- src/gcms_data_analysis/plotting.py | 419 ++++++++++++++++++++++++- 3 files changed, 459 insertions(+), 423 deletions(-) delete mode 100644 src/gcms_data_analysis/my_figure.py diff --git a/example/example_gcms_data_analysis.py b/example/example_gcms_data_analysis.py index de0124a..d1c418f 100644 --- a/example/example_gcms_data_analysis.py +++ b/example/example_gcms_data_analysis.py @@ -121,3 +121,45 @@ y_lim=[0, 0.5], color_palette="Set2", ) + +# %% +import pickle + +folder_path: plib.Path = plib.Path(r"C:\Users\mp933\Desktop\New folder") +pickle_path: plib.Path = plib.Path(folder_path, "pickle_object.pkl") +with open(pickle_path, "wb") as output_file: + pickle.dump(gcms, output_file) +# %% +import pickle +import pathlib as plib # Used for handling file and directory paths +from gcms_data_analysis import ( + Project, +) # Import the Project class from the gcms_data_analysis package + +folder_path: plib.Path = plib.Path(r"C:\Users\mp933\Desktop\New folder") +pickle_path: plib.Path = plib.Path(folder_path, "pickle_object.pkl") +with open(pickle_path, "rb") as input_file: + gcms: Project = pickle.load(input_file) +from gcms_data_analysis.plotting import plot_pave_std + +# %% +myfig = plot_pave_std( + gcms, + files_or_samples="files", + width=12, + height=5, + legend_location="outside", + y_lim=[0, 100], +) +# %% +myfig = plot_pave_std( + gcms, + files_or_samples="samples", + width=6, + height=6, + legend_location="best", + y_lim=[0, 100], + min_y_thresh=10, +) + +# %% diff --git a/src/gcms_data_analysis/my_figure.py b/src/gcms_data_analysis/my_figure.py deleted file mode 100644 index 8c3a2cd..0000000 --- a/src/gcms_data_analysis/my_figure.py +++ /dev/null @@ -1,421 +0,0 @@ -from __future__ import annotations -import string -import pathlib as plib -from typing import Any, Dict -import numpy as np -import matplotlib.pyplot as plt -from matplotlib.figure import Figure -from matplotlib.axes import Axes -import seaborn as sns - - -class MyFigure: - """ - A class for creating and customizing figures using matplotlib and seaborn. - - MyFigure provides a structured way to create figures with multiple subplots, - allowing for detailed customization of each subplot. It supports features like - adjusting axis limits, adding legends, annotating, and creating inset plots, - all with an emphasis on easy configurability through keyword arguments. - - :ivar broad_props: A dictionary to store properties that are broadcasted across all axes. - :type broad_props: dict - :ivar kwargs: A dictionary to store all the configuration keyword arguments. - :type kwargs: dict - :ivar fig: The main figure object from matplotlib. - :type fig: matplotlib.figure.Figure - :ivar axs: A list of axes objects corresponding to the subplots in the figure. - :type axs: list[matplotlib.axes.Axes] - :ivar axts: A list of twin axes objects if 'twinx' is enabled, otherwise None. - :type axts: list[matplotlib.axes.Axes] or None - :ivar n_axs: The number of axes/subplots in the figure. - :type n_axs: int - - The class is designed to work seamlessly with seaborn's styling features, - making it suitable for creating publication-quality figures with minimal code. - """ - - @staticmethod - def _adjust_lims(lims: tuple[float] | None, gap=0.05) -> tuple[float] | None: - """ - Adjusts the provided axis limits by a specified gap percentage to add padding - around the data. - - :param lims: _description_ - :type lims: tuple[float] | None - :param gap: _description_, defaults to 0.05 - :type gap: float, optional - :return: _description_ - :rtype: tuple[float] | None - """ - if lims is None: - return None - else: - new_lims = ( - lims[0] * (1 + gap) - gap * lims[1], - lims[1] * (1 + gap) - gap * lims[0], - ) - return new_lims - - def __init__(self, **kwargs: Any) -> None: - """ - Initializes a MyFigure object with custom or default settings for creating plots. - - :param kwargs: Keyword arguments to override default figure settings. - """ - self.broad_props: dict[str, list] = {} # broadcasted properties for each axis - self.kwargs = self.default_kwargs() - self.kwargs.update(kwargs) # Override defaults with any kwargs provided - self.process_kwargs() - - sns.set_palette(self.kwargs["color_palette"]) - sns.set_style( - self.kwargs["sns_style"], {"font.family": self.kwargs["text_font"]} - ) - - self.create_figure() - - self.update_axes_single_props() - - self.update_axes_list_props() - - def default_kwargs(self) -> Dict[str, Any]: - """ - Defines the default settings for the figure. - - :return: A dictionary of default settings. - """ - defaults = { - "rows": 1, - "cols": 1, - "width": 6.0, - "height": 6.0, - "x_lab": None, - "y_lab": None, - "x_lim": None, - "y_lim": None, - "x_ticks": None, - "y_ticks": None, - "x_ticklabels": None, - "y_ticklabels": None, - "twinx": False, - "yt_lab": None, - "yt_lim": None, - "yt_ticks": None, - "yt_ticklabels": None, - "legend": True, - "legend_loc": "best", - "legend_ncols": 1, - "annotate_lttrs": False, - "annotate_lttrs_xy": None, - "grid": False, - "color_palette": "deep", - "text_font": "Dejavu Sans", - "sns_style": "ticks", - } - return defaults - - def process_kwargs(self) -> None: - """ - Validates and processes the provided keyword arguments for figure configuration. - - - :raises ValueError: _description_ - :raises ValueError: _description_ - :raises ValueError: _description_ - :raises ValueError: _description_ - :raises ValueError: _description_ - """ - self.kwargs["rows"] = int(self.kwargs["rows"]) - self.kwargs["cols"] = int(self.kwargs["cols"]) - self.kwargs["width"] = float(self.kwargs["width"]) - self.kwargs["height"] = float(self.kwargs["height"]) - self.kwargs["legend_ncols"] = int(self.kwargs["legend_ncols"]) - - if self.kwargs["rows"] <= 0: - raise ValueError("Number of rows must be positive.") - if self.kwargs["cols"] <= 0: - raise ValueError("Number of cols must be positive.") - if self.kwargs["width"] <= 0: - raise ValueError("Width must be positive.") - if self.kwargs["height"] <= 0: - raise ValueError("Height must be positive.") - if self.kwargs["legend_ncols"] <= 0: - raise ValueError("Number of legend columns must be positive.") - - def create_figure(self) -> MyFigure: - """ - Creates the figure and its axes. - - :return: _description_ - :rtype: MyFigure - """ - self.fig: Figure - self.axs: Axes - self.axts: Axes | None - self.fig, axes = plt.subplots( - self.kwargs["rows"], - self.kwargs["cols"], - figsize=(self.kwargs["width"], self.kwargs["height"]), - constrained_layout=True, - ) - # Ensure ax is always an array, even if it's just one subplot - self.axs: list[Axes] = np.atleast_1d(axes).flatten().tolist() - if self.kwargs["twinx"]: - self.axts: list[Axes] = [a.twinx() for a in self.axs] - - self.n_axs = len(self.axs) - return self - - def save_figure( - self, - filename: str = "figure", - out_path: plib.Path | None = plib.Path("."), - tight_layout: bool = True, - save_as_png: bool = True, - save_as_pdf: bool = False, - save_as_svg: bool = False, - save_as_eps: bool = False, - png_transparency: bool = False, - ) -> None: - """_summary_ - - :param filename: _description_, defaults to "figure" - :type filename: str, optional - :param out_path: _description_, defaults to plib.Path(".") - :type out_path: plib.Path | None, optional - :param tight_layout: _description_, defaults to True - :type tight_layout: bool, optional - :param save_as_png: _description_, defaults to True - :type save_as_png: bool, optional - :param save_as_pdf: _description_, defaults to False - :type save_as_pdf: bool, optional - :param save_as_svg: _description_, defaults to False - :type save_as_svg: bool, optional - :param save_as_eps: _description_, defaults to False - :type save_as_eps: bool, optional - :param png_transparency: _description_, defaults to False - :type png_transparency: bool, optional - """ - self.update_axes_single_props() - - self.update_axes_list_props() - - self.add_legend() - try: - self.fig.align_labels() # align labels of subplots, needed only for multi plot - except AttributeError: - print("align_labels not performed") - self.annotate_letters() - # Saving the figure - formats = { - "png": save_as_png, - "pdf": save_as_pdf, - "svg": save_as_svg, - "eps": save_as_eps, - } - - for fmt, should_save in formats.items(): - if should_save: - full_path = plib.Path(out_path, f"{filename}.{fmt}") - self.fig.savefig( - full_path, - dpi=300, - transparent=png_transparency, - bbox_inches="tight" if tight_layout else None, - ) - - def add_legend(self) -> None: - """_summary_""" - for sprop in ["legend", "legend_loc", "legend_ncols"]: - self.broad_props[sprop] = self._broadcast_value_prop( - self.kwargs[sprop], sprop - ) - - if self.kwargs["twinx"] is False: - for i, ax in enumerate(self.axs): - if self.broad_props["legend"][i]: - ax.legend( - loc=self.broad_props["legend_loc"][i], - ncol=self.broad_props["legend_ncols"][i], - ) - else: - for i, (ax, axt) in enumerate(zip(self.axs, self.axts)): - if self.broad_props["legend"][i]: - hnd_ax, lab_ax = ax.get_legend_handles_labels() - hnd_axt, lab_axt = axt.get_legend_handles_labels() - ax.legend( - hnd_ax + hnd_axt, - lab_ax + lab_axt, - loc=self.broad_props["legend_loc"][i], - ncol=self.broad_props["legend_ncols"][i], - ) - - def annotate_letters(self) -> None: - """_summary_""" - if self.kwargs["annotate_lttrs_xy"] is not None and isinstance( - self.kwargs["annotate_lttrs_xy"], - (list, tuple) and len(self.kwargs["annotate_lttrs_xy"]) >= 2, - ): - xylttrs: list | tuple = self.kwargs["annotate_lttrs_xy"] - x_lttrs = xylttrs[0] # pylint: disable=unsubscriptable-object - y_lttrs = xylttrs[1] # pylint: disable=unsubscriptable-object - else: - x_lttrs = -0.15 - y_lttrs = -0.15 - if self.kwargs["annotate_lttrs"] is not False: - if isinstance(self.kwargs["annotate_lttrs"], str): - letters_list = [self.kwargs["annotate_lttrs"]] - elif isinstance(self.kwargs["annotate_lttrs"], list, tuple): - letters_list = self.kwargs["annotate_lttrs"] - for i, ax in enumerate(self.axs): - ax.annotate( - f"({letters_list[i]})", - xycoords="axes fraction", - xy=(0, 0), - xytext=(x_lttrs, y_lttrs), - weight="bold", - ) - - def create_inset( - self, - ax: Axes, - ins_x_loc: list[float, float], - ins_y_loc: list[float, float], - ins_x_lim: list[float, float], - ins_y_lim: list[float, float], - ) -> Axes: - """_summary_ - - :param ax: _description_ - :type ax: Axes - :param ins_x_loc: _description_ - :type ins_x_loc: list[float, float] - :param ins_y_loc: _description_ - :type ins_y_loc: list[float, float] - :param ins_x_lim: _description_ - :type ins_x_lim: list[float, float] - :param ins_y_lim: _description_ - :type ins_y_lim: list[float, float] - :return: _description_ - :rtype: Axes - """ - wdt = ins_x_loc[1] - ins_x_loc[0] - hgt = ins_y_loc[1] - ins_y_loc[0] - inset = ax.inset_axes([ins_x_loc[0], ins_y_loc[0], wdt, hgt]) - - inset.set_xlim(MyFigure._adjust_lims(ins_x_lim)) - inset.set_ylim(MyFigure._adjust_lims(ins_y_lim)) - return inset - - def update_axes_single_props(self): - """_summary_""" - for sprop in ["x_lab", "y_lab", "yt_lab", "grid"]: - self.broad_props[sprop] = self._broadcast_value_prop( - self.kwargs[sprop], sprop - ) - - # Update each axis with the respective properties - for i, ax in enumerate(self.axs): - ax.set_xlabel(self.broad_props["x_lab"][i]) - ax.set_ylabel(self.broad_props["y_lab"][i]) - if self.broad_props["grid"][i] is not None: - ax.grid(self.broad_props["grid"][i]) - - if self.kwargs["twinx"]: - for i, axt in enumerate(self.axts): - axt.set_ylabel(self.broad_props["yt_lab"][i]) - - def update_axes_list_props(self): - """_summary_""" - for lprop in [ - "x_lim", - "y_lim", - "yt_lim", - "x_ticks", - "y_ticks", - "yt_ticks", - "x_ticklabels", - "y_ticklabels", - "yt_ticklabels", - ]: - self.broad_props[lprop] = self._broadcast_list_prop( - self.kwargs[lprop], lprop - ) - - # Update each axis with the respective properties - for i, ax in enumerate(self.axs): - if self.broad_props["x_lim"][i] is not None: - ax.set_xlim(MyFigure._adjust_lims(self.broad_props["x_lim"][i])) - if self.broad_props["y_lim"][i] is not None: - ax.set_ylim(MyFigure._adjust_lims(self.broad_props["y_lim"][i])) - if self.broad_props["x_ticks"][i] is not None: - ax.set_xticks(self.broad_props["x_ticks"][i]) - if self.broad_props["y_ticks"][i] is not None: - ax.set_yticks(self.broad_props["y_ticks"][i]) - if self.broad_props["x_ticklabels"][i] is not None: - ax.set_xticklabels(self.broad_props["x_ticklabels"][i]) - if self.broad_props["y_ticklabels"][i] is not None: - ax.set_yticklabels(self.broad_props["y_ticklabels"][i]) - - if self.kwargs["twinx"]: - for i, axt in enumerate(self.axts): - if self.broad_props["yt_lim"][i] is not None: - axt.set_ylim(MyFigure._adjust_lims(self.broad_props["yt_lim"][i])) - if self.broad_props["yt_ticks"][i] is not None: - axt.set_yticks(self.broad_props["yt_ticks"][i]) - if self.broad_props["yt_ticklabels"][i] is not None: - axt.set_yticklabels(self.broad_props["yt_ticklabels"][i]) - - def _broadcast_value_prop( - self, prop: list | str | float | int | bool, prop_name: str - ) -> list: - """_summary_ - - :param prop: _description_ - :type prop: list | str | float | int | bool - :param prop_name: The name of the property for error messages. - :type prop_name: str - :raises ValueError: _description_ - :return: _description_ - :rtype: list - """ - if prop is None: - return [None] * self.n_axs - if isinstance(prop, (list, tuple)): - if len(prop) == self.n_axs: - return prop - else: - raise ValueError( - f"The size of the property '{prop_name}' does not match the number of axes." - ) - if isinstance(prop, (str, float, int, bool)): - return [prop] * self.n_axs - - def _broadcast_list_prop(self, prop: list | None, prop_name: str): - """_summary_ - - :param prop: _description_ - :type prop: list | None - :param prop_name: The name of the property for error messages. - :type prop_name: str - :raises ValueError: _description_ - :return: _description_ - :rtype: _type_ - """ - if prop is None: - return [None] * self.n_axs - - if ( - all(isinstance(item, (list, tuple)) for item in prop) - and len(prop) == self.n_axs - ): - return prop - elif isinstance(prop, (list, tuple)) and all( - isinstance(item, (int, float, str)) for item in prop - ): - return [prop] * self.n_axs - else: - raise ValueError( - f"The structure of '{prop_name = }' does not match expected pair-wise input." - ) diff --git a/src/gcms_data_analysis/plotting.py b/src/gcms_data_analysis/plotting.py index 1c75484..2c3f143 100644 --- a/src/gcms_data_analysis/plotting.py +++ b/src/gcms_data_analysis/plotting.py @@ -1,14 +1,17 @@ from __future__ import annotations -from typing import Literal +from typing import Literal, Any, Dict import string import pathlib as plib import pandas as pd import seaborn as sns import numpy as np from matplotlib.transforms import blended_transform_factory -from gcms_data_analysis.my_figure import MyFigure +from matplotlib.figure import Figure +from matplotlib.axes import Axes +import matplotlib.pyplot as plt from gcms_data_analysis.main import Project + lttrs: list[str] = list(string.ascii_lowercase) # list with colors @@ -224,6 +227,418 @@ def _annotate_outliers_in_plot(ax, df_ave, df_std, y_lim): ) +class MyFigure: + """ + A class for creating and customizing figures using matplotlib and seaborn. + + MyFigure provides a structured way to create figures with multiple subplots, + allowing for detailed customization of each subplot. It supports features like + adjusting axis limits, adding legends, annotating, and creating inset plots, + all with an emphasis on easy configurability through keyword arguments. + + :ivar broad_props: A dictionary to store properties that are broadcasted across all axes. + :type broad_props: dict + :ivar kwargs: A dictionary to store all the configuration keyword arguments. + :type kwargs: dict + :ivar fig: The main figure object from matplotlib. + :type fig: matplotlib.figure.Figure + :ivar axs: A list of axes objects corresponding to the subplots in the figure. + :type axs: list[matplotlib.axes.Axes] + :ivar axts: A list of twin axes objects if 'twinx' is enabled, otherwise None. + :type axts: list[matplotlib.axes.Axes] or None + :ivar n_axs: The number of axes/subplots in the figure. + :type n_axs: int + + The class is designed to work seamlessly with seaborn's styling features, + making it suitable for creating publication-quality figures with minimal code. + """ + + @staticmethod + def _adjust_lims(lims: tuple[float] | None, gap=0.05) -> tuple[float] | None: + """ + Adjusts the provided axis limits by a specified gap percentage to add padding + around the data. + + :param lims: _description_ + :type lims: tuple[float] | None + :param gap: _description_, defaults to 0.05 + :type gap: float, optional + :return: _description_ + :rtype: tuple[float] | None + """ + if lims is None: + return None + else: + new_lims = ( + lims[0] * (1 + gap) - gap * lims[1], + lims[1] * (1 + gap) - gap * lims[0], + ) + return new_lims + + def __init__(self, **kwargs: Any) -> None: + """ + Initializes a MyFigure object with custom or default settings for creating plots. + + :param kwargs: Keyword arguments to override default figure settings. + """ + self.broad_props: dict[str, list] = {} # broadcasted properties for each axis + self.kwargs = self.default_kwargs() + self.kwargs.update(kwargs) # Override defaults with any kwargs provided + self.process_kwargs() + + sns.set_palette(self.kwargs["color_palette"]) + sns.set_style( + self.kwargs["sns_style"], {"font.family": self.kwargs["text_font"]} + ) + + self.create_figure() + + self.update_axes_single_props() + + self.update_axes_list_props() + + def default_kwargs(self) -> Dict[str, Any]: + """ + Defines the default settings for the figure. + + :return: A dictionary of default settings. + """ + defaults = { + "rows": 1, + "cols": 1, + "width": 6.0, + "height": 6.0, + "x_lab": None, + "y_lab": None, + "x_lim": None, + "y_lim": None, + "x_ticks": None, + "y_ticks": None, + "x_ticklabels": None, + "y_ticklabels": None, + "twinx": False, + "yt_lab": None, + "yt_lim": None, + "yt_ticks": None, + "yt_ticklabels": None, + "legend": True, + "legend_loc": "best", + "legend_ncols": 1, + "annotate_lttrs": False, + "annotate_lttrs_xy": None, + "grid": False, + "color_palette": "deep", + "text_font": "Dejavu Sans", + "sns_style": "ticks", + } + return defaults + + def process_kwargs(self) -> None: + """ + Validates and processes the provided keyword arguments for figure configuration. + + + :raises ValueError: _description_ + :raises ValueError: _description_ + :raises ValueError: _description_ + :raises ValueError: _description_ + :raises ValueError: _description_ + """ + self.kwargs["rows"] = int(self.kwargs["rows"]) + self.kwargs["cols"] = int(self.kwargs["cols"]) + self.kwargs["width"] = float(self.kwargs["width"]) + self.kwargs["height"] = float(self.kwargs["height"]) + self.kwargs["legend_ncols"] = int(self.kwargs["legend_ncols"]) + + if self.kwargs["rows"] <= 0: + raise ValueError("Number of rows must be positive.") + if self.kwargs["cols"] <= 0: + raise ValueError("Number of cols must be positive.") + if self.kwargs["width"] <= 0: + raise ValueError("Width must be positive.") + if self.kwargs["height"] <= 0: + raise ValueError("Height must be positive.") + if self.kwargs["legend_ncols"] <= 0: + raise ValueError("Number of legend columns must be positive.") + + def create_figure(self) -> MyFigure: + """ + Creates the figure and its axes. + + :return: _description_ + :rtype: MyFigure + """ + self.fig: Figure + self.axs: Axes + self.axts: Axes | None + self.fig, axes = plt.subplots( + self.kwargs["rows"], + self.kwargs["cols"], + figsize=(self.kwargs["width"], self.kwargs["height"]), + constrained_layout=True, + ) + # Ensure ax is always an array, even if it's just one subplot + self.axs: list[Axes] = np.atleast_1d(axes).flatten().tolist() + if self.kwargs["twinx"]: + self.axts: list[Axes] = [a.twinx() for a in self.axs] + + self.n_axs = len(self.axs) + return self + + def save_figure( + self, + filename: str = "figure", + out_path: plib.Path | None = plib.Path("."), + tight_layout: bool = True, + save_as_png: bool = True, + save_as_pdf: bool = False, + save_as_svg: bool = False, + save_as_eps: bool = False, + png_transparency: bool = False, + ) -> None: + """_summary_ + + :param filename: _description_, defaults to "figure" + :type filename: str, optional + :param out_path: _description_, defaults to plib.Path(".") + :type out_path: plib.Path | None, optional + :param tight_layout: _description_, defaults to True + :type tight_layout: bool, optional + :param save_as_png: _description_, defaults to True + :type save_as_png: bool, optional + :param save_as_pdf: _description_, defaults to False + :type save_as_pdf: bool, optional + :param save_as_svg: _description_, defaults to False + :type save_as_svg: bool, optional + :param save_as_eps: _description_, defaults to False + :type save_as_eps: bool, optional + :param png_transparency: _description_, defaults to False + :type png_transparency: bool, optional + """ + self.update_axes_single_props() + + self.update_axes_list_props() + + self.add_legend() + try: + self.fig.align_labels() # align labels of subplots, needed only for multi plot + except AttributeError: + print("align_labels not performed") + self.annotate_letters() + # Saving the figure + formats = { + "png": save_as_png, + "pdf": save_as_pdf, + "svg": save_as_svg, + "eps": save_as_eps, + } + + for fmt, should_save in formats.items(): + if should_save: + full_path = plib.Path(out_path, f"{filename}.{fmt}") + self.fig.savefig( + full_path, + dpi=300, + transparent=png_transparency, + bbox_inches="tight" if tight_layout else None, + ) + + def add_legend(self) -> None: + """_summary_""" + for sprop in ["legend", "legend_loc", "legend_ncols"]: + self.broad_props[sprop] = self._broadcast_value_prop( + self.kwargs[sprop], sprop + ) + + if self.kwargs["twinx"] is False: + for i, ax in enumerate(self.axs): + if self.broad_props["legend"][i]: + ax.legend( + loc=self.broad_props["legend_loc"][i], + ncol=self.broad_props["legend_ncols"][i], + ) + else: + for i, (ax, axt) in enumerate(zip(self.axs, self.axts)): + if self.broad_props["legend"][i]: + hnd_ax, lab_ax = ax.get_legend_handles_labels() + hnd_axt, lab_axt = axt.get_legend_handles_labels() + ax.legend( + hnd_ax + hnd_axt, + lab_ax + lab_axt, + loc=self.broad_props["legend_loc"][i], + ncol=self.broad_props["legend_ncols"][i], + ) + + def annotate_letters(self) -> None: + """_summary_""" + if self.kwargs["annotate_lttrs_xy"] is not None and isinstance( + self.kwargs["annotate_lttrs_xy"], + (list, tuple) and len(self.kwargs["annotate_lttrs_xy"]) >= 2, + ): + xylttrs: list | tuple = self.kwargs["annotate_lttrs_xy"] + x_lttrs = xylttrs[0] # pylint: disable=unsubscriptable-object + y_lttrs = xylttrs[1] # pylint: disable=unsubscriptable-object + else: + x_lttrs = -0.15 + y_lttrs = -0.15 + if self.kwargs["annotate_lttrs"] is not False: + if isinstance(self.kwargs["annotate_lttrs"], str): + letters_list = [self.kwargs["annotate_lttrs"]] + elif isinstance(self.kwargs["annotate_lttrs"], list, tuple): + letters_list = self.kwargs["annotate_lttrs"] + for i, ax in enumerate(self.axs): + ax.annotate( + f"({letters_list[i]})", + xycoords="axes fraction", + xy=(0, 0), + xytext=(x_lttrs, y_lttrs), + weight="bold", + ) + + def create_inset( + self, + ax: Axes, + ins_x_loc: list[float, float], + ins_y_loc: list[float, float], + ins_x_lim: list[float, float], + ins_y_lim: list[float, float], + ) -> Axes: + """_summary_ + + :param ax: _description_ + :type ax: Axes + :param ins_x_loc: _description_ + :type ins_x_loc: list[float, float] + :param ins_y_loc: _description_ + :type ins_y_loc: list[float, float] + :param ins_x_lim: _description_ + :type ins_x_lim: list[float, float] + :param ins_y_lim: _description_ + :type ins_y_lim: list[float, float] + :return: _description_ + :rtype: Axes + """ + wdt = ins_x_loc[1] - ins_x_loc[0] + hgt = ins_y_loc[1] - ins_y_loc[0] + inset = ax.inset_axes([ins_x_loc[0], ins_y_loc[0], wdt, hgt]) + + inset.set_xlim(MyFigure._adjust_lims(ins_x_lim)) + inset.set_ylim(MyFigure._adjust_lims(ins_y_lim)) + return inset + + def update_axes_single_props(self): + """_summary_""" + for sprop in ["x_lab", "y_lab", "yt_lab", "grid"]: + self.broad_props[sprop] = self._broadcast_value_prop( + self.kwargs[sprop], sprop + ) + + # Update each axis with the respective properties + for i, ax in enumerate(self.axs): + ax.set_xlabel(self.broad_props["x_lab"][i]) + ax.set_ylabel(self.broad_props["y_lab"][i]) + if self.broad_props["grid"][i] is not None: + ax.grid(self.broad_props["grid"][i]) + + if self.kwargs["twinx"]: + for i, axt in enumerate(self.axts): + axt.set_ylabel(self.broad_props["yt_lab"][i]) + + def update_axes_list_props(self): + """_summary_""" + for lprop in [ + "x_lim", + "y_lim", + "yt_lim", + "x_ticks", + "y_ticks", + "yt_ticks", + "x_ticklabels", + "y_ticklabels", + "yt_ticklabels", + ]: + self.broad_props[lprop] = self._broadcast_list_prop( + self.kwargs[lprop], lprop + ) + + # Update each axis with the respective properties + for i, ax in enumerate(self.axs): + if self.broad_props["x_lim"][i] is not None: + ax.set_xlim(MyFigure._adjust_lims(self.broad_props["x_lim"][i])) + if self.broad_props["y_lim"][i] is not None: + ax.set_ylim(MyFigure._adjust_lims(self.broad_props["y_lim"][i])) + if self.broad_props["x_ticks"][i] is not None: + ax.set_xticks(self.broad_props["x_ticks"][i]) + if self.broad_props["y_ticks"][i] is not None: + ax.set_yticks(self.broad_props["y_ticks"][i]) + if self.broad_props["x_ticklabels"][i] is not None: + ax.set_xticklabels(self.broad_props["x_ticklabels"][i]) + if self.broad_props["y_ticklabels"][i] is not None: + ax.set_yticklabels(self.broad_props["y_ticklabels"][i]) + + if self.kwargs["twinx"]: + for i, axt in enumerate(self.axts): + if self.broad_props["yt_lim"][i] is not None: + axt.set_ylim(MyFigure._adjust_lims(self.broad_props["yt_lim"][i])) + if self.broad_props["yt_ticks"][i] is not None: + axt.set_yticks(self.broad_props["yt_ticks"][i]) + if self.broad_props["yt_ticklabels"][i] is not None: + axt.set_yticklabels(self.broad_props["yt_ticklabels"][i]) + + def _broadcast_value_prop( + self, prop: list | str | float | int | bool, prop_name: str + ) -> list: + """_summary_ + + :param prop: _description_ + :type prop: list | str | float | int | bool + :param prop_name: The name of the property for error messages. + :type prop_name: str + :raises ValueError: _description_ + :return: _description_ + :rtype: list + """ + if prop is None: + return [None] * self.n_axs + if isinstance(prop, (list, tuple)): + if len(prop) == self.n_axs: + return prop + else: + raise ValueError( + f"The size of the property '{prop_name}' does not match the number of axes." + ) + if isinstance(prop, (str, float, int, bool)): + return [prop] * self.n_axs + + def _broadcast_list_prop(self, prop: list | None, prop_name: str): + """_summary_ + + :param prop: _description_ + :type prop: list | None + :param prop_name: The name of the property for error messages. + :type prop_name: str + :raises ValueError: _description_ + :return: _description_ + :rtype: _type_ + """ + if prop is None: + return [None] * self.n_axs + + if ( + all(isinstance(item, (list, tuple)) for item in prop) + and len(prop) == self.n_axs + ): + return prop + elif isinstance(prop, (list, tuple)) and all( + isinstance(item, (int, float, str)) for item in prop + ): + return [prop] * self.n_axs + else: + raise ValueError( + f"The structure of '{prop_name = }' does not match expected pair-wise input." + ) + + def plot_pave_std( proj: Project, filename: str = "plot", From 037c0ca3e70595bc3a56ed2a208eeca321a222ef Mon Sep 17 00:00:00 2001 From: mpecchi Date: Sun, 24 Mar 2024 20:56:15 -0400 Subject: [PATCH 3/7] fixed name_to_properties and its testing --- .../data/classifications_codes_fractions.xlsx | Bin 11719 -> 11703 bytes .../checked_compounds_properties.xlsx | Bin 0 -> 10552 bytes .../classifications_codes_fractions.xlsx | Bin 0 -> 11780 bytes .../example_name_to_properties.py | 302 +++ pytest.ini | 2 +- src/gcms_data_analysis/__init__.py | 2 +- src/gcms_data_analysis/fragmenter.py | 776 +++++++ src/gcms_data_analysis/main.py | 2062 ++--------------- tests/conftest.py | 119 +- .../compounds_properties.xlsx | Bin 8247 -> 12194 bytes .../checked_compounds_properties_correct.xlsx | Bin 0 -> 10552 bytes .../classifications_codes_fractions.xlsx | Bin 0 -> 11780 bytes tests/test_fragmenter.py | 50 + tests/test_minimal_case.py | 0 tests/test_name_to_properties.py | 98 + 15 files changed, 1450 insertions(+), 1961 deletions(-) create mode 100644 example/name_to_properties/data_name_to_properties/checked_compounds_properties.xlsx create mode 100644 example/name_to_properties/data_name_to_properties/classifications_codes_fractions.xlsx create mode 100644 example/name_to_properties/example_name_to_properties.py create mode 100644 src/gcms_data_analysis/fragmenter.py create mode 100644 tests/data_name_to_properties/checked_compounds_properties_correct.xlsx create mode 100644 tests/data_name_to_properties/classifications_codes_fractions.xlsx create mode 100644 tests/test_fragmenter.py create mode 100644 tests/test_minimal_case.py create mode 100644 tests/test_name_to_properties.py diff --git a/example/data/classifications_codes_fractions.xlsx b/example/data/classifications_codes_fractions.xlsx index 5f69cee2d5ac047d8db884a6ee183bf013179d74..ab959e71f7fbb60210317a0712c305732474d98c 100644 GIT binary patch delta 4016 zcmY+HWmweR*2aex=^8pz5Cn$qaOfD~p+Q0#1gRNXkiUSG0s}J$N-D#Egrt;o_sBy? z3?(59A)taFbvWmX@4G+jYhUYL>&v=-``T*{*frW!&yZ8n8eW7slYl_~pjXIQ0V`jB zi~{{-)WiivZ>YCnw}Htd%k#J8QOr_fM=K<)tv6~%?=^@HCK&d{|Vy>X_@_Y?( zXMLoOUK|VxkgXT_fM>g9Y4PzEu&wbiBi2p8O;bj6gQL{S%&hE%+2Yh-;37!&IXprw zaYK>KrQU^XOzU6Sj2$AaiR?!bj+Pac6d#KfC@OFLz&_5?kb$VF)MXPK=)CQW7#ypb z)7G2G8yjaE8yN2sMb=QohBi@Z30limq-i9*Y6m@%VSBm?4Poft*Qi!a0lLEVT=@wi z(uyj!06(x@a!@4ptf|Tg6UM={|JBl;6o?Wdx_46@9Tl6cP*rXe{tKmn|Q+Ydl|p z80|uT6{PCltS4b64t3(!1`PA3W44C3-Bd~S)h$^x5VkWPn6Z<)Viwx`zGQ189kz+l zD@8g+%Iqnb5gP&!<)HV>abRT!%G58~m`s6x-Sk&_jV0r@GG%;?(&x0H)!Dh3#>N*P!AvtXktV%QDdY;yaaNM6s10<8P)ENzH`Nl93 z!P3jY176u-4E>kMQuC$?u*UZdx>gi&Fa2$b2p_h-AjH#+QVqi|NAA}2{*c;q^NQckaqO#E$=RY zQ+#&%GkE{y#*&OHHTTl!(;#1i{%^G54y*U06I{tkU&ehLof0vw-Z4rhU)<)zp0VqV zbhb_*%|c}kbK$9hiC=nyCuG=^qcV}_^qzIi1pt(BjIZ*((=6TXse)pyP!ql4_&>ou z{gE9J3-&ae5i`rCitQuGB4?9O^VS63{_OnHTxfo&!dc{5Kt%EPv2*9sjDqsqh2jT@ z_9#SXR^-#yfTDf$rTp7*DZZ~ByV6M7E3(Q1h+A}-f+U*DWUy>lczelmuc0kul)l2r(08-C?9X>@5`C)|AC2U1^SDYcLp#{P zbb^*>5;l=w^rX;@d(k>u8glsDDBvN|Bg1AOCuZhTbZ+}IEZqo_7p&_W(m)(QN|`d| zwPBwl>7h;b0v!5punIh@|3eW#A``@?&ek7;|PP*0`CUb@QKgw#4I^ zO@2JxYu{e;Ov4Yq^I_!Q0;U>aR;?;AEVCFP?jpk6170TaJ3%RiTuG=)d{5?DNqfkH zrkkE?d@w;cENRxRWcssnxZd~4>Ju@VwVGuln$NI@Ao@r5E~3RSS@WBUULhe;Dra?q zyu!}Ff7|GE2-KyZc0fN7mYaM(`}!kMZn(m^!up@f zjn9*Woq>TWB;D`NSLZF^m({`ev5~BS&*Hx(&#D(ALk|xL7ra+bRoYbY?gJOqdm;CN zw;dPmFF@g2`&_`;AI_~{@n0RwJU=cL8xzi2N_eXjrmD7%L{g7L;t4FFzt~IYwrO}X zTuP9$UAh^zhFQxw?XRaD8T*X)xm8=68NZGwlmCH1ecP`tW+I;aESXH$%i5eAA)Mm9 z&Z^;2Gm5`){!#doP_ez^Gu4Z&5C{2IjQnYDYW5=!-`+}(t+`7GOW*4gWui_sYsh>zMtSapD&lPtLCqkuLLN<8L z3F#sjfqS(8J)0DE`z|RD!!4~KUE-h>Mz(Pas>dl4ff6Seo3C_tXVuHo+6La4Mt!Hb z6tX&rT?KCUQEOi`b?o6Bg!5eCNl6}#;+btI9`EC3Ty3OTz+r+&3A5`1CdbWh=}W&o zj;LGtR_4{vPPUxn%@vLhPIHs!gu(p&|op4D+oLx#D_$&?@Odey#GqBL(z&V$C9K)KZHoIW@C=JQHN7%)zu( zpyWDwYKklsEhP(YcTZD+8Bpq zC`(g-qjjLuh^cgk%NX*qX{T^~!i*wFdazsOp_Ea-Z<|?yu)&Pfm`eh<_z>lFJgs6O z?3X7LvdCtf@7s1a-0eC%!C1QS1T&sQJ65v1C39 zHa5K>jVPu}PBZ+tmqebdel48aPx2%*O90E!XSJ8@q|QkT#w7Y+)}OZsnc*v5c6G~f ziHhD{5DIsEU{59gQlyN`FGk6JJIybnwF;*_Gch)Sh1zVz$Bqao6>$obc#BnHbFcWGE|unC8~coHu46|1 zhn|mjHMO^$`2l5V44H>Q6SaNtp++ff4jXeTZncRhwY4t`n2D+U=v8>FhSkoxURad< zY^vcqr9n>yx#|!bB9*(}AN0Fg-fA{Q*n;lDo)OB+3J?0N-QB;9y(tS2Gd8B`)Cw-P z7=xC2EEt&hVAMYz7-XDJti_v@;;g}*4-0DO3ga9Lw{a`YbLt~J>?!N=qLN^rq%r}f z@`+M6q%v^hg&#V9NJEUv9^9E2Y%#)fh+dZyajFfVrl3OF=+*BYZso5B^9kkplrL5~TdlTgp0ZP&gXOC@ZZ}{`e4_i{r^G|1Yjl80O zP`GiTX2Q85q-!vzTfMl;VIGACv+=Qgk-bLMnIyF|Ln(;qZbkiQ!{B{`5hBHeb-Ctj zNN!J;J6Cy*BMjH1a*dT`9a-QtR$8N`h<;Q4dYTU&R2%;VF9Wkt_5g;+nU=H@0uo~5 zbc?tIJV_B|c;;N6Xf9eXxLnX9Bl&k1l6i3uLx!j=e7&7rn0csklD@8^J^gD19h19V z=wh|UxnP|>|Ngx#o8Zof>_JeHS^0FqRDnXTF09`-9wR8GbUHf9QZaOm{|e$7+9<5j za}G0q+J&7rlRM@auFB~=l|?{_kT}%tHRM-!S0Ai?+p8hEgz8>M^eedL{+3(!=h^Y! zV5hGuE>zY^SS!4m8S88|BhvWJ0^c8I>wLcNkhJSEu$Uh#c-oYGh$=GW?D>pmWDEWx zYf0lSmwlvjxk+i|lhM@mMng@ilwT;FYWWZo?|O5=Bgo{q24CAW*obstWhKBdW(;oF2 zKCLSHLIjpxW8tHMEL795tHa5HoJ!>SRtYZ{-(zu3)w<+l-Gn}cw`9rLe2JnJ<7L9P z!}_KK;a>(?MSwROKwTnukmhFiIf;!v5eRffhPLI`A|7``zvVxY|N2Q4IH&Z}Wi#$M zd*SL-8eLjrEp!ZcuOH1XJ-yDpx*`FY7?6-MgUCTQuAlx}pbyza5vFxG0e&JN66I*n zBf;FaOxB?}6p?c|7a!k1@=8^a&Bip6YG@JcGm)m%5*0BS@`bi42b!0lSu-xs&&o*IoTJ}KCeX~ArlJlhmoD~T(52p66MK962{MHRP-No z2SumiykctHb=C6qrU4)hP*SlKh`PZoG&wsrB{bjrP6CA!=ekL<|A_HR11A4g#@V z+wyniP@}7;InWBC!o;Uww3{fDI*l6y3iA;U3<~#gM<0r^peb(AQ~fR2LNAL7a{i?Q t1fu^dUY{ho|L6KO4<5uI5X1k|-=iHN_lYFX`H(y0Yr_AP{sTjYigEw| delta 3986 zcmY*cWn9zy8yz4yK)j3%Hc|$NlF}haNq3i&)CdXb_+3g$as#A=!6@klC6rDPkkO^! z2o;r({Of&l@BexA{haeXZ_YU{o~PAv(6VlhjPkKfGo3jJ2&9N!BZC3P9*?n-beG}p zFUb1>P`My=y=jK%I#`9dibw78nNj;1q{_B&Ks4Uuc=a(g$MQp+GIto4cmb1qL6-B_ zr!s3}i&`x)tAuQAfjST2wBfwO?d*$f>m4okswtz7O_<2>y?btZx~AP%{_Pn%pVUWS zV34v%Be;5(u88}NnF^2#HWwkvNit%XtuvY|uVBCi84BU5{%U6=FE6&j-%~7~=uOR~ znKZlE*kZ4E%$|^86m_QEyX$nns=;^dcICr`h2clWBz^RP?JEHg5;>P1! z*?!)Zv?qORd+q8joCxJ-MadDwru-wNtNm#9)6=ZIuCf}H-BWP2R;k9~ZC5NzYL2gP z-ob2yKZHlmgQfM=wQ7W|MBUVE+JqY^xID}Vg}XNEE)YXj$zpY7Y~hFhp-3EGjX0|Q zl%Vhx6vNKU=n4himpN(s*)>=0p=a)!IvP-#WHzU_8}19{MBX;h6bKHVO(o%ViLVTn zhTmg-#%8`!Tx1AuTG>^kdM-C<+Uml)V}fP7rKNlJiAPOK1g&%}XxCwP^(tS9LO+H? zf6l`ETbEW%ky-0c1`X9nVLbtP(c`~Hl!hGWNT-o;)5DU$KzvF)Y%ooRqpuK$1`XnK zm0jVaUfK%2PqRy^1!zlzhC~)g$nwffo=CV*&{`IB2OY4KrAgQ*lODgGVl?Y!`#?+iK!Bnp|1x5`14N}$ zAAq^&Jpp`)bm#&5=y$vW?b#PBY^>y4ebjjMb;47HIjNZNZHy`7#d%5!Y3TbEp3 z6}j!B^&S=P@`^I02qTj2qb}Li8IHIKFO51GF{pc4En<1R$B_0*|0Qw&Td^C+tIm?5 zRZl+{v3;U0{e2LbEf_j9|L%&+`$z8`+sKaPXJ$Ll;K3&CR@(9BtR<@JI2f}cp!)e6 zw{Q`&|5f2Se<)bfALktm;MUJacRc-Y`-dA}4mj@3Nm}%)Kj{j4;!A(w*Lot&9C0a9 zM1i8UzGuWrQ+QPcfEx9w%!p~nB5>mhR3MOtCJ4j`LeuiWfjsnL@`4cE)30|9S@>4z zc{tYTh^qWe%kLZ(?;I>h($~dp&WOhv&jzg4)7x^D@LNMO zw?yOZHD284z>Ej$0HHRt+!(XL-1NDQ2&ph)&9Bg}tK+l_?)zjtH;|(KIuaAjqz)$T z_q`DWC_K~vaK*O0wQO{15$hRcHxSd@4D5qs(d<*iw8s=xX;1~evoCw$&C+*&oMr2= zL_;%A&R?x-5Q#-=89IyRIJ04kjxtcGa{Pw(vdddju@7NdcLj!~)-pDLzy``)QymQ| zY;}!Q^rH`-Lai?`+DtI#_kI@=cInTU@S4EiFr(8J0F!<`rXg6jjm<--J20h7{r`BwRU}dIHI~lkBz0F`q~?V4*Bd4Ft7Wc$>2|=f*TqMqrZ3~99_N( zKE(if2iE&tJ8yYXcaz>~HQai;5q1puG)-a_;gPtdUr?B^Z@=XcU6U5issA2G#>bp9 zZYX4WMm5@DK4L}7l=ogv@0fNkCCV_dTBq9WIl4mogJcVkS9&)kZV4ZaxMH|A2`+aB zN*T;rFOY@ZSwB&jZE8#pI@gaxIaYS$7W^Y08QeVmV{gdbMw6#a0a2@(J}&8 z8S9MY7?`jX>1&ti9@_Um_hp)}pT?MYE%xza#;I8Eqd2ASxg2u=lK?C2s@%QMkhboS z)9uyS>A_~p+JbJm&>&p*^3%he*u?88t-$#);oEuO$@l%uV0?*aoV<4!aBQXo)Yc2i zN5~y(U7cRYtuBXlcXtMz|BBDQR0h%t@w>Xurg=O+GnJ7aJ|!>36K;?PD%bR1)7>7P z-JVV9zUn9yBpiJ6zBnX&3k^ICH(RaETcw@|`&PATav7zWyYH_F_~qI$%13wRP9C1A zEsoQX@D9q4cL}Mko^R;CzzXOWrximmSG7@DykYH~ znJhQO8hcs$b&m3)hMUp1AAOMv(3r6QrdmJ7CJ=5Av$UM0Ly&@>>+}5}r+4Qu9$LA; z^UAl90g%!2-ihZB*w(VZCQEm)pJJzA#*u7zuu5TpHK9;A z66)ohr$elBzbHB3rBl!7Fh@zY>^_~;-B{>@)$2{Az%6i2&j)<)jM~ zH^y8*#OTcN9KIwT)TdhnMT#28I99~i=k+jxGV=8jN8U3r{}4Av5it##b5#g`MCof1 zsMDng-*)(#Up(7vN}pl$5i4(gLN@wpoYBH|!26-P*)2*xBL~SSrxhn^e#uSA2BdMK zub+_{fx7s&ZsFpcB@l-r({*!wbD>vG+K8EPbR#sM6}7T!(|FQ)%R_eJxHu}pj+7pw z2E1_UY95EGx|gbaIcnBhUMO>A)R32e$L`;I;^xl>d~w5OAHaS_<{cQ<8h;l%_+++I z)mD*S{gYz6ql2WF6LAuUApgV@fmboED6c#GGqRA%oLJ3lU~7S8)$%!xOd=M=O2v>X z0$FrrwRk>|NIUjIy4z}{veb!p#ws5NEk7ux6k%_r4C>p7oN>fT9@15ivIf$G;=;c} zD=T`d6NTB^D1(d=#|>V#`%BS4Usm*v-E3)(k=?LDHctk_`K%11vQ?HKF|zi8X)|*1 za6a2V0N@?~mRk2wLa|BH;2ODg#65fd!d~vRl$Pu7&DKytn<{$UA!#1bd36aln)5xMNNS!5-@k=gdo1|9AGyVs!|6}=u1miI}i=$u4N!q!5y zyxHg9K0{}x#9>kJw@mJax-K(B1K!m40G4H$@bi!_pI6?9M>Hla9Q9K|piB_y-O#f= zO6?MowO6l0|GvTh$S}$>eS?llj1!;QF}7aMWqf_!=WInef{kCuFp9^N2?i^Yj(x41 zo;5FhRCM!DD7`zh0%>83BWsN?gb7CP)Ws|Gj`FdB*UU6S!@qvk%H{q;>Oui*K&gjpcbKd)d-0Bbs^lU*ls6OEQ#gfGcZc&~}k4F(8 z4TDsX#b+hX4XK(mo(Nl@&(COt^#J`*PR6#uON+*d)te)uVGtXVo)si(5?SAJ{l_yz zCNx!@*!X5miNO(t8#3U#3r99`;jX?llA%G>0{hvKwQSV=31NP+9GOn)h7uYz-m6iU zVy{VCa+S-eAp!fu;~#1&(hhkd14fL&c;r-eJl+QFb3c%?-4Lj;c*h<8f%;+1(r?LP z>Z`HMdI@^+oRnnaBWKjwBfDJq*cKPjI7n$ zNTNGB&pQO!tXt^>-kPy>x*@DO#H9MWXH7Z$m_LmvVdkxJ;vEk2A9OFugOADH{6rze znTlQjoO_3%L%+LP9PIF5D3C2wcPd}jf}IDPpg#vGeJ1hd7#Uz;q%9~%IiwF`XmnY% zwN^ousm#8SG=EqV(W)6;=?@qH>vT4}QkhK2D{YJ?G9u1(+?I8}ljr1lc^b1rhf;ej z=_Tl_v%#Vt?1xz?mc8(_YkBwEeUq6Q9$=nqMW6(K=Or^ZSe1u9{DFcEUynglF8>lC zx^m>cL2aEF7i<25wzNauiC4u|e5wC%OVK+s`b@*7C(<7%1s@At&pH$ASL7CJ`}M}8 zNH0HlJo)3XmJ&D#qQi+tEK(X9@04^o!!0OE)QEcn?BPxioHIQ=YRU7Yhr@GkKkM1qeA1E8rK>SU3tn=G7)gM8aQ+`4u^+0ZSq44 z%6mQX^=kUSYGzpdJuXxERo9m?GhSQVs`XwYrDxEOdc4HZ?MbJkyyrOesvdADbYTO3`&mssf+yA=Cpc)UM2 ztTfqNe3x$wfDX}6`EDEdYZ8G#)MRLNq%v`;6FLQX$UfCFe7#!fCI5%4TsU-PXHBxaTLN@+MGw<+Htx`ia; zo)n{*ekgc&8R|Zrs@!=QGVNc*a=Pa5Me3K&1lRnjk!dfC@eU#bD|z!4s2{;G%b_ax ziQB5EM;dpn3Qj7u(^Kr{9fJ1O4sx({hSLnF*G2H~YPL^IO-X4Bx13XmHKqmJ68wTN z1Vpw|w*D5|WZMHtay*K6^U@L;^tw)csI2T0ujUc@*lfOAJADOc8QO;b7X|W|~;Yr;3kHHg8hd~Lz>a-;92HB;^5AY`PoBDn1q2Fo7xaA+fAt;;V*bRV6}&q?;< H_8;nh;z(>B diff --git a/example/name_to_properties/data_name_to_properties/checked_compounds_properties.xlsx b/example/name_to_properties/data_name_to_properties/checked_compounds_properties.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f7d745ee69ef5595d3b006a4b755fd13c77c983d GIT binary patch literal 10552 zcmeHt1y@|z(smQv8+Uhi3GVLh?k*i9!3h#1kPZ?wXmEE67F-((?jexi?tDFS=e{$O zx!*6i_w?$sSFb)*tIpo_JXO1PX(+?O;Q|lODeE{Oyek-2%iT?XKTXH%~hugr#|QaP~$9==$}R@UNx9@bia z3tFjB@$ z<s#5+CB@AtMU@pmc*Qw8uceSvvdZS1ib8Id>ebNaofs| z=U~oeoGlSb6*e^w&~T2e$FJaan+W_Oj+d`8a@<3?a!0p;L;7Wn#HKYnA<;&SklK}X zy>K8~M*X<}b68N(;83$*m->Mbr(ISr&?xO~;W>*q{Qp&!P3T;$6K_QCEm<{pyv?ME?45UCE;&52`;EtgC z;iVOk_#cB*mz%uhvDmo6G>yI$k*T+yFA$mOy;9^n%QwDY`OIF-UZ%+_`7`=-BrumX z7w0MutzP# zX%AgI`nk&fR2tr(x0U@uWx${d)x{05uC~K-$7-u=FCl7wa~qegGntG|oQD85wah^+ zYF_MX?g{w;x}1w(qk7(xku3i&0?55(gU3T*u@uFt&oF85gyJSSF($FknVy zPO@QIz>+`wuZYvFnV{%FHS0&-7@xSC{bRM=%l9 zC3k)PF58@Ek_Ri5XC9tA=bT-q@=dj`Ks3aGz8L=Y({{rir1YN818??9^1*+??{fuqGFr{Ir>Vj{`kw7{MHB`!TyegY1G_> zfKfvl_8`{Gd|8;WUIAld?qV$gPHRaKIsx!vtBZnKL~Oyo#1hc*5ox5pAZ0A6-lBo# z;%c$9H#-?cWk0~alem&-2EzcU5teNX=cRqhOp025pCm1;ZZZp78n8hx!uWtgMUk%z zFBLI(HYXIUvtCLL-~HqII0g^>kfm?^+iHr^rm9(*t|XhOrk}NYbGM6pnGq+eWM{H2 zivc{4%^txQ^U?Y-?P||I_aW-eaWy7E@;IC^>aMd7(G@f$9N^dhdlkEnYIBn=CNVC( zjXOYN38P47BWoAV`BvUcFuucO^Zkto;SCJc24nkhq$jRi9cu<0?0ocFunD9;MV_Uh znu$2XeO$_XuQCD1;plfFvjLgNE87d;^hG>#*~Iu{v;8(t5EOM{lY%9ltyZ`1+qQ5y z*}mc8oUQT|BWr~%HDd8uAP16#{9VvKnDIE*uhH~Ag`iPIlQRyXsAVF~U5Qe)$5I&i zViv3n$@gtH*{y3lS$=L}Iz1qybxl-u_!_~-^r(J89rEIp6V5emeP9un7ig+7mjHgx zYrZo@`yEHaw9i4f8Nq%h{+2k@-Tw)QP_d-cGw469P*4y9kYJ#I_*dZgD=hvUL|~vf z9TZmo-B-Dms`3|3U74<6P6h=Y1x_a`ROP(9NUJrz&T`5FmxtEs`9G6$@C zr2o;_fX@{l#wIMzMK?=v3^v?L9G4?uc=xfhF<2z4g9`F6d3YS`{U1Ne4p1@Dy>VK^ zz=N#WxPm*^jC4d%`Jbp|&ReD;d7Z{NUvN8lPm&EoNpz%sJLLAY_I^cJeNz)9qacPSt>m4y9Qn9f4a<)fwg`ObSq#^3jh#8 zpZJ4Ay&Y_ALEap{4_v>IY36&?ghftZeP4w0v zKWg!h;3GjQjW^N@@z}366YN({;!+djR}%HB)zTVsL}Hji_0qK(mckydKWC3Jd79N1 zI;fz+i-zVWlqFE4AX@uYA{@gZSFku71g7%r*q|FV4}vB)Rcy=g$%7A4TE@c|?^IhY z6-g&P;FlJ{QZ-$1s^~HyPskne)dz(bIJ}mBsMO0MaWq;DpYA!lT#+GOp|Z*+{-!RP zgw1xA>grrR&r5jqu1E@;-zU)RF4Vd=;4b1wSK;~T6brnNbHjfRZw#f-f~D)Dp5r7# zKV7O&eVtP=MUl2@FpJBr+Wh`A)B)SG4rbI_VKH%>eSe2dXP)tg+%nq|&R!~Jt6b(? zoQaQOCm(8OYpZfTqF@|SWcbzGAfPK??nY>1qkybO*;xa+bAwpg7TOL`&tvK6bgb5{ zt9FiwW*RS%6!UQ-qDH@e+at>S0+!ZDvdy2&RS#!&B%TRpgD{)Wl~a0>CKXS8g;%X} z7=9lIMi=bV>xQB8I%M&jL%5+MymBlT~zQ@_!Ato|@YvD($GbE$>RTba3U`AE+f z`!1|sAx+khTeawkp!atA2K6_8_}wWA3?0?C<$w%ZFZ_l|O#Oriy2OG)GLRN=PLTf4sjNNpQD6-5(h- zPgpbGt*Tqh=|!9tqm=Ur?M68Jx^sD8h>gAd+)c2<^%44al#iw zRQAg+h`8*R!=r0*-oix4zIevSTCOc&>6rzCg1S$tVPHcL(j(Odhl{_2ms7o734P=$ zS6P#_l|;f4vhoZi_7FqE0CW}7lV_}VYK@}J+D@mDYjwgFiZRR8^}SxRzAZw?wy8Mt z3|+D&VTMMj&6x4h|2FNNKgKEv-<^~6i}~C<7TPsZuq>`i`A_1BZ@$yj9dE{2*uuZ+ z5f3FeZ`tyNN=2U%h;ioAD3Gf*ug5xeUkxB9s&1&Nx@LwhnyO#{=3loKF_H*K`k{R~Dk8}a9LiD$& z!=#~hcK(0^7Xh;*K^PEOQ4{huy6sBJRyR8mMr>V;JWQc<%I27yD}+4b=tIg@_xyv3 z23o5{l6YFnZEa@dUDK(su~{&Ricu*=^k61+-f-HRXtot{%^>!?(LvcHqL%2=+nWRI z7{;_#Y9&De&G;Z@>RnN%c5!^jyNlfDb>P-(9~rnI?dWkzp{U(F^0#l0y!3&l0(B26 zGLx@hA#5_fz?CM*qm0aCTLC<(?3WEzoCR$eo##AQC)SjjZ>g<}LE=2z6Owi|Z!8_O z741=m>V1=0Yb_Gm)C9k3#N#Isw@>%$>ZT=#)!v>$=K|ZvR@y!pJSSsYVurc`t|W?z zfu|}RhAc6@p1psKI6x!~(n`p#x?Bum%PJ)E;9!fBV0~kx4dNe; zyLUNXb+}#_B-^3Q|DL?0Te};kcqFu8qQGb5U=Ls6iXHk+XX%7y)wQ=y$geCgF1@sK zmv01|f7^jlW}8H>W1CHYLYjijg{ws<#Gi;F%^8Wi3|SWqIN_3EK(@V`t;bNTOeG5F zY0m~s<8QN5wTA5y=V7{Di$>g=+AJ8&-&(O?cIpws^pWX%m{X`DB5(MT1EU1RH3~j{Z-+uwOPhAKrhp?^2{`V z|2KuCeCYiHkzpjUja(xvhBS80*NF8(Ywi*?Oqz{6PBnzvqAZV@*4XwMuj$i*Iypx$ zvS%@=CH%PSm^A_1*sdnnqgYyDNze&{-iS5nBocwEK#8pcb4r(%)D8`zDAfX>@V%t97>a|?&rg^N7o99? zG6V&O1#{cg#Y4CMkCJSq3|9>$4_njRfRT$nAnw8uWIP&i9dU^xbeSgLiX;dVQtQ$w zRghZhCSIOj?+kl~LdRw_f3oh8(o`TQtB81HqLk9tYVP^gNJuYak8eDo>GBo| zr11m8C|Pdy+>$SBr62Wg^5|=@iu^87&zXe5$N;>fJpWH}fTF zhC0zHXS4qP9-B~pZCtKXq?55tkcO|}3&wv;bM`*bHQPbAiP|K;l{tS;b3hKZuC^S% z-+#|=_6^6}NCbeN33i2nuhdx=z*a=>V3kug`8}=Xm10OsOr27(d|N1)gj?~3Y9vDQ z93je~t1lTB5fnnqriwf>@>E0HrJ}Z#jEr<#7!Ym=$XwX2w(ci9B3oY`NDfa@VS!J} z@pL%!pUKoF%Jaq=cV`71O}z{$=rMdvG@-!q0Z3%B6!6qh<|j=@ONVr6?*-cwCQ}k% zZ7UmD9rw*P`#st!dW%9$m^@aM%rgp@m86PtJ?8ji+-X(`BDK){gN?*UxC_1^3vZ?N zk##O@EsobJqz+SG7A&hF#idwY)WRw`(xXN&$8WARKZsg!jXoef{ceTLy|i2mu*mgV+BZUW#z81_jog?r4% zy<;(ZiZyiqtL$QyQiqoJ3EizTD!0JWflr6wpw%1V#0Mket}1=vh6-IocL{uR^RBOp z##w5;-6V;90{F)rEMszGeIyz^4*W(E&QX^y;V+161{dBEe8xDwP=fiGPf2VCDKtd$ z|CG{a6m#Safek_6E_aBWp=FRJmBRJ)5CY;lBlv}{Pm#3Y4oA?9p$=(GnlyxAdZQ6> z6SF>kdukuL#Dj=57$}Q-2-k8?1O_IxNf)h1UM*yHwH$y zAHPzF-F*8#@gOmn(;~z1IpB1uj)CLz?fKOa`Kxu~XPCpsVybUsng+2JzccOHp=~Lt zA#4T+-y(k+$H)Y@V(wKck*-%D+liTEH21f$yzVGsMK^(wphMkpl~LnT{J~ zpjN@SIhfI*UJV%T+q3uEE{pX?iLH`lKCe7@>s_2aoda@NQ;eqBA@5D=;TnpTx+SB) z+JqdE7rr(1b!2SbmX0{ooavu&28)pihVrh}OKTsGd@15-UaT2AG@aQQExO=;4tkde z{NVrk2xsTRi4ZwU=B>h|=kPE>=NsKkoKp+x9_~sOJU=1&xO_L6g7bqGHiO>rh&&pKv5XIA++G<8Lda^~RinJh!}FNiNK&}@qLl&1riXUXK?vgOE+uV8 zTAR&x=TaU~{cjb{01)^7yy8W*2UW+*R;y7)oh{!!zn4O;-H>hNH(5Azenf}n27XN^ zf=e{OpRtG&ZLN2ehX_wMt!=ES6GpDz;qlYA{morkw?U5YMGE4`ijipq3isaAk>YD& z;;X{Ce`;QsOky~0U_8WN$DiMq3@b}0R>Hu}luj_^qcLYiovM*}p31qDX3G-*#;G3@ zRLmXYe{X3zf!0W_Q10VzllpbIZX+%(4cD3{SdFN{@tM7CqtCa454sgP%40jFdVG0l zTHlNLwTS|ni-}ps%aKiy!dBtSmqV~jM=yD!(CX98rfik z&XNH=KksN6HJpK6qzBED6WQoXO_X_sU|6QPIV!?Oex|OsmCi#R3R1d0K8}6MnuHRX zN;d{ro{2K@4qg3L6ybL-io?XPl0Aeiq2=n3GG15LvthPz*{NiXYP9i6>lB7&#wH=z zqBNHc`UHFK4-707SeT%*VYejVPG|A)~)~#@?cG5aJN})4&g168>svr;{@3wsY*ca&G~jN}$unegzG|1RZLmh45ON zydzwz+0Ff0T0_Y}868EFG~*h??ZoQOm7(Z?jfjk)8UnKgjIap4+>guOslyR!+`Jx3 zvJzBx&KeP0g{#?8R)YJ@3iOz73-fdKMDJA8`B)IEp3hJC&n(6?3p# zg`OqEBrZ@eoQE!cztb?rx}URHVu8~P9O5TeAW$$SN1PN~kR?$`PjWSgIlSyyVr+iL zF0?{qJ~$R~8z!S72^UvLPNJbA5IV=EhjtBst|0^tc zgZy1=z5i9eTyk6DL<^*-4hEhd4*P7eBD)Z%YQw^}T1p68RK>6{NWI{)DfAZFUz5K9 zH#;<;EhtlK&OoRz6$kX=ad*?w(w&pn*Lwz{OH2q%T(R#DxX$bdXH!bUP$~?mondq`ETp_=;Iwh_`q2{V=LC)v zvPn#TPMHDNA+|%_mnnytqsU_k3LydHT0kuE0K8Y$<1*>M-t3n5m_?o?$rYNFzQgo_ zaCz+Tu?&U5#iL}QJstYQUpaQuP^#fR6nIuf@DGN85?ki-ifyt6(D4FXo4r{|wyoCE zr0pGM4`p22L_a*A^;1zfyJO|r(W*Qh2h$WnTrhr_^G&z&wCN+dO2 zrq@mYzKAI8m(`PFzcyK;y^bX*wvzGg*y{1RP*Z}bb9hUAbEx_=(wb#$18=g_f^G%* z&QDO~*z+L`FM#1YVpY|>;SxS|_rOKR&sd8es}}JcRkY zS>n}bAi3JZPsZbp{VB}$p``Klg>&xHT1F)O>7i5p;XJIX-BHQI_msP0$ZrGct4+AO zXLDwACg)M8yDvE;6D5(o?oEn1|Km{riMMr%4-{esP$1#{0VD@YFIyXJke8#I{qJ~s zt*e@l0j>4v{|J`Il;}~WL|)OMVyP&MA&FYa*3;Y;RI7G!iOF)iz9Lg-t*BPKNn96w zWw+|rZzgW)wDDk7v7@vTbjo=qN?#)6;XnLvRJAslf8;2=9g0L}c<8Hgr=BtKan6VY zb4MxbE}3iWtKeL{6(0n%)DrtczLT@ndXs2ht0U3#-54(ISi!8Z+Gcu_TXJ1{M=$Y= zyH&%}kM@yOUEF5^&SL`3b-6OkLrhr0rrzD>Li`yomc8|}KaxoKf@fStXOuW!;uox3 z)N~D;hxqoZClj3!nA^)3@Y1ut`8TY?)=Ql2tUy`2}YO> zqxjYBI2a|5Pu$Cg(4JD3k4KlmL0)!^1EDZlF^N*XkyFhJO&@Hm8&m9^_jl#6wEqcY-1 zr&IRn06{PWw>!fE%Zx(5a>rnlEui}4Uf0MAMP7n0?1uN3Jxwqe8%*v>FR8Zn0}PPe z?0S*C%cu>Le4>tWcaL`AQp0r=EF9dWm|6uySyCn{3WFkW`2t(Iz$?Rar(nkTo#N0q zX?h7QhW1=UY7PsmWh&yIkBmjRT2Y<3r52hV$f(+p!E*OxfHxo3}sa)?AsN`bl`xjpxpDMo_#kb}6IbCJ;_adk9;3UhD&I(k~U5TUb^Wb(1 zg3|gw3BUZt+C&lR!FEtz#e!DoZQQLjyxcv!Ijr5iZ2xGs_+Mcj3g9@fsd6tTVZ;&q zlk`wvMv%(8NuQir7DQNv7+)&?Cio0Yu*272_eHk`X^!^MS8CarXJ9Y?B-L~HIphlF zBs!)T39hk-Mg3jdwwNQ0Z4uo}ej~{UArA*bbMxd;;3qbha(;^r00BJM>vb9mS-Sw7 z^#BHCiE@}Uyk}8Gff(rlImxitF1bhcrzV52qRf{uuPj{XsfDPXXC&@S^TUSlprI>h ztZuLl9VdJZPd=PL4d?b&+7QMO%Gw-X$c*3I78BwkT542Xw2H<$wZC?KI2X~0kNDx+ z2biU^e{Gm!w$ys$-fFGAxlnCxA)RF&WqJg6d{=3veIzMQVzE!DbiV}5k&b$YCW2Bj zIKNTej*pqJgjagTYyQO%>wBzg_WO)D*3+xUhuFKoVNXhRrbqR58d-jtY?zD0yHop_ z|6J~8!o27Yg!7wkN>$J)bIYdGQof8brF4mf|NVVB_;CZnM@(R0g3&T zL9@>`QC>=wJvbT+rM$D|RBVV^ZyF;Tx{<43-Ag~rMK-Il5S0xZHov_LL=WBe z%>Jl24}6V1oNs5(h%jKN{KUtWe{g;5-{MMWP&V@EWSn@@_y?lB_7?+9pSqg$WKs^i zX|}G<4D!ZVEYIC$dx$ahUCLRuu#)b{i-kE&9lW=&oo1)X;gz{;cf|x7t>1MJzs4v6 zRw$0cS(U5t^9CM}6cm_Sz3e3p(NM%k>-|jXJkTIKJ8jn0_qm#fP{OY!p&7IdLs8bE zc#e1iQd#CfYlj`r+aeh!J-g7~c^~Ar)^;4JKE`cE5>~Z;8Revz0Y^8`G2$bj;Y(_K znl+^B+JiF8F?J+-H8s)eJ2zdY7z93}d26Q>6m0T<{BO#HdBz3}A&>W{3`fshryqs?a*SX#Z!#KMw4E)$(fv>`yJx~(l+igxI=IV!CeBuo#4UU8Jyq}WFQdS-7UB~1b26bpursy^qu6~`<-)g z?)MAs?X|jl_3ZuB>e*dSRqd)$l!b=DhJb@WfPjD?hsbyZw6lhUfB?WkKwv>2KxvEG z**crrI_s;t+nYM+GP&6RU*y0*(PcqEf#?74_%B|8(zs5AZWffFQ>j+bcm}H)hc^gi z%Z42oG_0gAs7?qO<#Tw5mY288j75q{MGo_oi0+@iG`i5#=o(iu!pw{%0CmDN;MDo@ zkBd!ChxZ;bOz}Z#T|CIt%h1(}S9ey`E1n%;)r<+H5tBmp5!FEw244j>! zZt4xxX+VA<2|sCc=JF4B2XZbrUm(OUBqhB*{&wj{^+7mc6y2}v-C*FYge*o#oL$-7 zP7E0d(vTdhhIJ%&V5eK5Xv<|oC+lgv3|6f_kX*zI1J&>>{c9#usZg3W-kvxgovxZC z;?SVHC=T5OJOIi1!$J~8nK)(b@{DZu7z)jM51;MuI^FXS(awB7oMPD2IK34C z!0r^Z5g1s;(DQxF-{T-qXEmNXV+%MPwnN=%=NRRPAC-Vq{VXX-8Gx}Z*IV+-aTozQ> zz@2pvJD%!0pHQ%mD)f<8?r16*XTZtGe6ieX07!oQgHTPyg4eRrD8rGD!d>44*l{VA z`UUIJi%}tMK$(IA^E=zPWIuJ*wXaq!$N6x&dk;5acgeunp#LYbqBXF)`X@=$u4oor zp&=kV!F~-LyfSV;W>-5$Ya=^5>z__6Pu<2Ymk03VGxZ4H<7$=KfH~s~Oa1L-T}jb# zoo6NmUYY?8i+L)|q}B6%gtSyc;_S=$a7?1{FtSMV3^MZKTJRUN02Hl0i#-PlC9OY5phn?#oQWs&jmJ0mCl8~Y0fvIxdW1qCY-Yy?Zs|| z614?9u;tko`KM_j93*c9E>jMVA*{6JG4w06$oHL4=hvb-mZZK1ol)Y$xe6evN=kCJ zMu~*v3qo7eI^BJHf}s>y0~t6G2TVtKQ7tlX^9s08ccXjYkXCkav-tZGg+tWAmG=Nz z(Q{zo*LChMj0UZ58gt9VTTa+*m#)nZ>%9xWFu@w4)r_f1rXzEPw8&@)GJ->?@Wd<+5F;A5GhC>Xa=cyD@L zy#^~StcD{}Us}#jFH^Xf@i|mx4sq?|`>ZgEn5^}TGuF8FQ8-@d(WC}F(r807CZiTk zxK^C%G%ScjyBEiZXvvvVl(EMyyBA6UQfxF?B;f1YZ6W(iQoN;S10hDaGYl9nytvg% z-vIQg=<|WIrUYC8eRJ-p9nU)?URpz#9gUNgF3#=vDDkAg*jI9w-voL!KU8HFgb-n1 z%3aX)(BI@zjmHA+h*aO}AS@kPjnf1O)Olx(MIR*CN}zT+j1O}&x8K=fj&%)=kT)Ij z`y@~+5P~wCNPm!`g=B&RpUh6I?{+E8t*`s|YihaPwlek-uJmW%EzG&-@Kass6lGp# z`EKTy7u4fHuF>6pI#+{Lh<(8(LdUwbGV>IAO9sC?g0e99l^rG5)*k6`V|F61v&FC; z1+w6#&2j~14bF_z2&m1*Wj=5igJMbCfK$t~JH<9_z(c-}j*nbF$t{D<4oC~|v;^cY zq&7xm#;%+X!th%X-YG=w=-De&muN=$@=uJuU=mv3r{Fj;Q@vGmE?5hC5A5?|evOdG z-}!WKia@e%y8H5e^$S~TaH7Si0>>EHTaMN{uX}sXoa50W!}GXItF)B@O~+<$N7uxM zOO|^!Qa>L;MGNLAdv>&B$*t$0;ZraJ|H;u9__!k&Fnfi-JSBucfCO{&4`23IrvBTn zL4spW@ZA6IqcmpNs)q$d{1)^SFxl?F=B*gyU^c0oWRvRi!9I>h&In%Hd;8Q{nQyQo z%C?$Ss_T|Cq-ELHwd^|;8KF6ji8PXbsf($xHSB5c@F?ZS4hvaeIyEdb?84ssyOwuW z(OZQEDS{u1i~^v#SM2gpmK1}@9bPZ;d`>%!GTygd%oQ|}cZ@ASpBm?UAvBF0tn4Qg z*YF}VJ96yVx~15P<2C=|Wb_U)F5B5eAFP;PJB|98Hf=PkIuO<29^y&z%D` z3wuKUQHSD8JPF?+K|tWkflu&^f7y8_3sX~PC+1%_)}IDHEp|B~l?4!V%5Y8cY8@Wh zk18ul4Q%Zhu5%75GN=;{kBo-F2Vb|iD&kNI3^52}B zMxFVdHzDB=++a2gH2$!cE~WPB>XAT2XRekuZ7soH=hK3Z{X<4T4Qh5}W30B3f$Dq&nW1 zDBGb^JEM>x;?|}}bJ;VK9Do%Xe$l6c;iZ?hjIfs>=-t9qc;zDOoNc1?E!+4vW?vo} z`~a7;itg`6+`5cTA^MGFq4;!ccjQuJc;u!frt@#ssfmyEoi@Mk!oCEcO;Ba^?C)Y> zpI{Ut^(u)-FIN$jxt3iK=wD3sxWvo_BGaO?v_?quJRNy^s<=4lGFDu&2E3o&$wVE@ z-nbv~p6ri&(U9M*O2OQ9uzVwZd5NXr=ly9{k2rGnhSka%t6795LTvEHw})%Hkcrgv zL?%tX?L`KNmDM|fqJ)$$9eXkDP8XSLy3NAQfu0(EfF^Wq%S^^ zk32BEt_xf5C*zLMtv2zA3-zc&e!X3kL2E26I%kz6wF$|QV~o^6%7=aX?qp}Y`|JH) z|7j$;>-h6Si$}YD%k{y|^42`w-Ev{2-lP5b`TjK7v$tTY07<*g7k$^%IKFe*aQ znQy1mH^;OSdwSpW^~o+)j}N<7kEh4q9$nFT{0M1#>S6KOOvrHXQnYhZ3?(6}3Xt5^ zk~?$G(DBxM{C;2Y_KNy7$;teB20C-_-hB6+;m$*W(@9^2kM<9uM6TuIgr@^8UG9~@ zmSyIhd#Po4JZSntq1kp8$@Pa#rL2WXovRInk1Txml@lb7w34pf1;`;ypZ0>82v?MV z?Ajm;Qb;H`YdT4LBW}_H2J2#9nw$u7%Z#mUL>eUv={)2iHMN`))c4M z2KQ=1o%!46)7e?}xMMU|!4#OSH5Pl@q8fcd4oC#pNVvlbFC9S=JjxJ$iO^WZ46i$>Ay0b;!mc;?xOm;z{qA2?PJ>m>Oo-!I}v zxJlHonC_lIb?_i}#%(kYQu-j(Ns3U`DJ5vp^&y5qDlQ_{z0>6LGhU;WWqh&4Q$ocV zq;ov0%;#q^#+R0!j~nNZrXE8mS`Js2pkR=EOCX9o!ZXHzh)kW0W`#mKF>)W44Qoy; znUH7YP>yEBnEr;toMdbjy$xg`bJmzZvw{vwgf1Ks)7aL<%-&4U1Z^==4-IK@}lCF(szzf)Hi_yHgu@!Mm-2$>8SW8i?3jV1Pqv? zXShNLUm5)T0qBa%`_)m@iii)jpoT_)tGpXC_u;5HzeouV z71R5>{I}8N6sE%o&{$rp2P$oZ_p5=nB?c`alLjCQ-j#%Q5LG0^hj5$Ebn)`@GD2YW zCdZ`s>Wpg0g1?n;?V0TM!<4Zwo$6kEZ^*p)QGmtQM7m~nH{1HyYDo#oAJB>5SWs*- zCd6tVySg9AKW915ygEu2v7E@D=e_C>>3Qrn7r#NZy&R$$sRMa@b$_mBOrpDhntxR- z`eh(d0&bKvlo~~Op7ITXH5K=T1$i>n5}T4P6;8FJR_-@b(*9wf^Zw%7BB~fzYZ;bd zwWD%r-WTQ3D;9z&<toR7M-%`i0)3f@C{bON@hdL>shmjEY?{>m0-#0BdQEhq1(f8=2SJ$#4O!7qrNsO7qzHVccD}RNPvs^2~yW14?)} zH5Tgzy5d&3;@SUmJb*?fTzD^h`SrQ;48PfzKDG3wXYO5nka62Tz5SNcI<19wu(;bY>O2n}`P zU6*>cV!Fp@#CD{dD028!nnWdcJyvn?jP&CGM=d~#84+ffFRF_4#ShBM34fBd4m2RY z6wcS^8U2ME@ruKDTP@F9MpAo+M=K&}g15Mhbe%Yu=Qk0GYR`c)`wD&66|JR1Z;hFO zt!*G$8GBvYIEHg5%sC5e&(Ya&SHfYmS>49LN$tjLn0aBPX^9oJ&LH$&pN%XHKn@;4 zwhm&T0XJjBDM00nKWxar5QM1{i!@B1O)(O!;SG(G9X~;As~-Vw z5&fqP&3*xOqZu3ogbH{R>yKd6*}~Mul=;{7R~&kzIr=l|Xvh1;5Ab~ZYB9!$AR1aW zVUx?jXh9~F__MB6B8F?z3wr)$oWZI$emRztvVLnGbW3njAM~aQ9a3}T{aQtWw`H`n zRDrZ`cX%W~#@nrjar>ZVk7MDX337~>i&7j_=66tH)lrfhpR_yEeNLx5KIC<21`>=* zF(d&9be6rI+e+L-si~+DZpPT$bpF z7UWsTccn}LMMxu!mu{0tFL$v@fqJ@~M7=Zx2}g{_hskfNDi#3Y>Zorq(9PsZ+O#f* zZo=9jS?WF>y0oO*zLyw>u*df=P)}eR)v{|_%9wiPcQh})lrGbz>~v0jXN}Cpy?pG_ zra52~LKyX^rQK2SjQUrD#R;7{e{?4l0@lLYgCDmQ8A9UVlh^0_(f6-!`y)w7gdQfR z9Jx+5UNPBu{Wv!F^?78y+uG-5CiDH;Kg{+tPbTU<@q{!Ulc6T!wg~4AD$bC$+J! zg(1uB`-l1dTM|ulOjh-o?jI5{d>y)DnyJVYkhT_j)X2B}nn&hLeWojdeZhijBx$_m z#}Q6NDbrcbz;)@5l)I$e$z802A4Tp+$S^i1Pe}74bX_g!8n;D3CrUGYGuAPJBs_i` z>$RdPXTv>(?2SuRqbIsEyCa3yT)fWFQGg`(z*DT&oGL~* zWQP{ew-H3Jb6=Nxo#~#Kee{77+5moTIaS_lyuT8$5AKVQx`ddt>&q!xQh1cNylFNX zp;O&*?RE3uv+11NmJLbAqILswR>Q5~oM)L5aQF#+yrucR`~>d#M{~=o#BnW~Jy_h7 zZFhZO^R}~PtUz8ENf9C?cm94XH8HLdI<7plT|wjGL>%o|9o-2U6Yj#1uzyKpkqjDU znrNghC#C)?YO>r%6mmzDg<7QMTD=$N)dGt{MTShS3Y3q4x`l| zno&);G&o((Fs59Cnx$xXC_s_5Yo7#*32EhxO$jAP$@Y0qvB}~aIXM>_He7IN;}>TW zDpNg9ZCW`p^=d5j7VV%ia|w{5MK6KQu5LqQoREI{J*r0`A`z`3qvi+w&W~^!NEt z!eL``Im9?7J{8Ygi85AhoIu-1*T5%Um<;?%6KT$tM9WZyf$n@cWE;o-f4YgfTfVnqU5$mR z_f6B+NrW*sCZPv67JLKv{YmEy8}&~{>X$6+b**JTJgRK0a_X2u!rkVrG<$l z-+ZX0i^BdEpFo$2Kn5jV=otFsndq^=7QZ zZl`&}Dt5!}HkrG-s9qIE)uI=()ts|kO33G01q*Z4)RI7d0f7$yJD0&2s znPuiB48zUN$Yb?oJSB)p-gHlHZO=5l2SI6IP_p4{9<~WF8Sb zn4=v(ifJ7YA~=wv?IY^abVT(jN+uA*cfwq6-Fw5jD=J>-3Y4ugtPzRc3gd@!McurS z^5cT7U3d}3-^89@WsncTM_mW5&L1H8%r9xtb#pNBtXhbJhW9ItTd}mTnprwskaame zO1Y}v5@N!y=YBa@cC?^xJwW1sPpG(a>1-vICBv4AWhXz;VObv*&Cn_1qE_ZlmiAPk z!e^p=r`Zh<+Yi=NXbu^k#V_k7?QFh!7J6kh1pSoh0^e~;t!zN%LaC)@beb|LJu{kM z<4(Mn=x-Nik}gXqdi!3bb~Z?;Q+>Xkx0JnccH^X8z!eveU)EM|#16q4iRl?LC+Z({ zSPtXs&`^s%sdL&`snZsv-0>Bsh_N~n+j+}|}cVj(Vdi=0#Zypn|+hsOi+fEZ-6VNJewQ#J{ zDXVTGyLR^E(*`MT%J{@WJXbroH8vYP78J4?QaL%wTX50%Qrm$f!!b3T*r z_w0nTE=1Imo9)J`=SF0w)in!!ZSWeJo(6Ai+ z=K|XI;eb-TjkNGffMK4y3I}!L=#u{EGXI7*)VJO)WGPAo+$8NZ1egYx#82!) z_bd|GA@!dN`i;@@y$-wKB-WwH5pLUHsWM}3t>T*A1uGPYc{VhWzDUze6Vq5tgKEIm zP%)564W3y(nWukEiC=OkU2M}M_*APPbO@|`m$rzvn-WaFHqJO%W0uBB=a~{pzmgiI zOoFvILn&YDnij@!F4(sd#j@{}x9IUI*+YQsJC>Yv`6ruP79sN2QL4VWjDF7J*YSvy zNW4(BRue46`?~JZutu4{DsJo_I{w)~_?5zPqf;chys}Evv$#Fj1d>datF$(T!26L7-$F{ta>m>RSVnc^8a91Cq5^1e@y@O{*|S?pvLk?a-;U zeWJ!ui!qg1k_39VlDAGY?AlmCv)%(8rF_H^vtLJU!VMHx-OU(t-!xs07)iK0?g^(8 zk8OWi+I}iBxDYpW+)MU?jb$E+x$NGS{fPPW+C=E+@pU*h%Be*g+E$-_ZgDaOWdY?mPfku z-6Uh}qhOWd@DRWZh8ZOt%^1}H^}i%m|H|a)sM}Iv_ZrO%z9wL?^T@!3g+GaL1lJ6? z2DI1(G^}UQ{`Cl5<2Uif;spnQ5CJt1P6RqTNYVHm=Ln%0YE6)4cd6kVi**vt2$C6W zjlX7Rsew8R!>>n(uUq5q@1M^BD-jzQ8?CJ!JaCPK&4j7JM$d&3dZ*{Bc;IFIry)C6 zh~`iPFT5Eze82z~;!Ny}6&>yDotTa798G_&Jh-pqfAu%;(s{+`%67BhgHBsD?{Mqh_8>1a{T%HijK0CEchzFj*trl? zYGK2oA$kY$ZoyE8A5HRvU)wie6h*Q2red1d@QHSDmLIK zg!`ycaBzQkGK1~Lb%eJ7IIQ=X_e48tFRAb0RqRBgdwl`*%;olN!jK9Y*{j&=$X-dW zCb-`tpqN9UhD9&%yjr1JK;aU2z4pcT{g=1AXg$2W&tj3oUCv5{mhD#)J7I2+s-h={ znM&UOk^l7Q*L~h#HGBfAA>glSXlQT$KP?2S-@lHum|<||9YFk6>Opk-^?M|Q>>*YW zfgUKad|VA8^C^u+mu-TagbMSRk1O?zozUnCu8pM_cQk<8+?tD;ezuq!cFZ1cypB= z4JDVFs*QMJW}Iopj&@o}?Q{lS+u1Hc^mo8ghAoV^2ht*b7F`RcEljJ~sZv;3R?}~S zJoU!WZG?eng@7{YF&Lv#1uhQnW1_q~eIt)=C&uq`ey_j(DX9wO|GmUNs`0-|`8{#^rxZc3#{$32?`hQE1^m9n{Zqg! z$u9waZg_u({vO5u31uYz1NwVR|GR|WHh#D1G#qq literal 0 HcmV?d00001 diff --git a/example/name_to_properties/example_name_to_properties.py b/example/name_to_properties/example_name_to_properties.py new file mode 100644 index 0000000..48f6ee6 --- /dev/null +++ b/example/name_to_properties/example_name_to_properties.py @@ -0,0 +1,302 @@ +import pathlib as plib +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib.transforms import blended_transform_factory +import seaborn as sns +import ele +import pubchempy as pcp +from gcms_data_analysis.fragmenter import Fragmenter + +from gcms_data_analysis import name_to_properties + + +# def get_compound_from_pubchempy(comp_name: str) -> pcp.Compound: +# if not isinstance(comp_name, str): +# return None +# if comp_name == " " or comp_name == "": +# return None +# cond = True +# while cond: # to deal with HTML issues on server sides (timeouts) +# try: +# # comp contains all info about the chemical from pubchem +# try: +# comp_inside_list = pcp.get_compounds(comp_name, "name") +# except ValueError: +# print(f"{comp_name = }") +# return None +# if comp_inside_list: +# comp = comp_inside_list[0] +# else: +# print( +# f"WARNING: name_to_properties {comp_name=} does not find an entry in pcp", +# ) +# return None +# cond = False +# except pcp.PubChemHTTPError: # timeout error, simply try again +# print("Caught: pcp.PubChemHTTPError (keep trying)") +# return comp + + +# def _order_columns_in_compounds_properties( +# unsorted_df: pd.DataFrame | None, +# ) -> pd.DataFrame | None: +# if unsorted_df is None: +# return None + +# # Define a custom sort key function +# def sort_key(col): +# if col.startswith("el_mf"): +# return (2, col) +# elif col.startswith("el_"): +# return (1, col) +# elif col.startswith("fg_mf_unclassified"): +# return (5, col) +# elif col.startswith("fg_mf"): +# return (4, col) +# elif col.startswith("fg_"): +# return (3, col) +# else: +# return (0, col) + +# # Sort columns using the custom key +# sorted_columns = sorted(unsorted_df.columns, key=sort_key) +# sorted_df = unsorted_df.reindex(sorted_columns, axis=1) +# sorted_df.index.name = "comp_name" +# # Reindex the DataFrame with the sorted columns +# return sorted_df + + +# def name_to_properties2( +# comp_name: str, +# dict_classes_to_codes: dict[str:str], +# dict_classes_to_mass_fractions: dict[str:float], +# df: pd.DataFrame | None = None, +# precision_sum_elements: float = 0.05, +# precision_sum_functional_group: float = 0.05, +# ) -> pd.DataFrame | None: +# """ +# used to retrieve chemical properties of the compound indicated by the +# comp_name and to store those properties in the df + +# Parameters +# ---------- +# GCname : str +# name from GC, used as a unique key. +# search_name : str +# name to be used to search on pubchem. +# df : pd.DataFrame +# that contains all searched compounds. +# df_class_code_frac : pd.DataFrame +# contains the list of functional group names, codes to be searched +# and the weight fraction of each one to automatically calculate the +# mass fraction of each compounds for each functional group. +# Classes are given as smarts and are looked into the smiles of the comp. + +# Returns +# ------- +# df : pd.DataFrame +# updated dataframe with the searched compound. +# CompNotFound : str +# if GCname did not yield anything CompNotFound=GCname. + +# """ +# # classes used to split compounds into functional groups +# comp = get_compound_from_pubchempy(comp_name) + +# if comp is None: +# if not isinstance(comp_name, str): +# return df +# else: +# if not comp_name or comp_name.isspace(): +# return df +# else: +# if df is not None: +# df.loc[comp_name, "iupac_name"] = "unidentified" +# return df +# if df is None: +# df = pd.DataFrame(dtype=float) +# try: +# df.loc[comp_name, "iupac_name"] = comp.iupac_name.lower() +# except AttributeError: # iupac_name not give +# df.loc[comp_name, "iupac_name"] = comp_name.lower() +# df.loc[comp_name, "molecular_formula"] = comp.molecular_formula +# df.loc[comp_name, "canonical_smiles"] = comp.canonical_smiles +# df.loc[comp_name, "molecular_weight"] = float(comp.molecular_weight) + +# try: +# df.loc[comp_name, "xlogp"] = float(comp.xlogp) +# except ( +# TypeError +# ): # float() argument must be a string or a real number, not 'NoneType' +# df.loc[comp_name, "xlogp"] = np.nan +# elements = set(comp.to_dict()["elements"]) +# el_dict = {} +# el_mf_dict = {} + +# for el in elements: +# el_count = comp.to_dict()["elements"].count(el) +# el_mass = ele.element_from_symbol(el).mass + +# # Using similar logic as in the fg_dict example +# if el not in el_dict: +# el_dict[el] = 0 +# el_mf_dict[el] = 0.0 + +# el_dict[el] += int(el_count) +# el_mf_dict[el] += ( +# float(el_count) * float(el_mass) / float(comp.molecular_weight) +# ) +# # Now, update the DataFrame in a similar way to the fg_dict example +# for key, value in el_dict.items(): +# df.at[comp_name, f"el_{key}"] = int(value) + +# for key, value in el_mf_dict.items(): +# df.at[comp_name, f"el_{key}"] = float(value) +# cols_el_mf = [col for col in df.columns if col.startswith("el_mf")] +# residual_els = df.loc[comp_name, cols_el_mf].sum() - 1 +# # check element sum +# try: +# assert residual_els <= precision_sum_elements +# except AssertionError: +# raise AssertionError( +# f"the total mass fraction of elements in {comp_name =} is > 0.001" +# ) +# # apply fragmentation using the Fragmenter class (thanks simonmb) +# frg = Fragmenter( +# dict_classes_to_codes, +# fragmentation_scheme_order=dict_classes_to_codes.keys(), +# algorithm="simple", +# ) +# fragmentation, _, _ = frg.fragment(comp.canonical_smiles) +# fg_dict = {} +# fg_mf_dict = {} +# # Iterate over each item in the dictionary +# for key, value in fragmentation.items(): +# # Determine the root key (the part before an underscore, if present) +# root_key = key.split("_")[0] +# # if root_key in hetero_atoms: +# # pass +# # Check if the root key is in the sum_dict; if not, initialize it +# if root_key not in fg_dict: +# fg_dict[root_key] = 0 +# fg_mf_dict[root_key] = 0 +# # Add the value to the corresponding root key in the sum_dict +# fg_dict[root_key] += int(fragmentation[key]) +# fg_mf_dict[root_key] += ( +# float(fragmentation[key]) +# * float(dict_classes_to_mass_fractions[key]) +# / df.loc[comp_name, "molecular_weight"].astype(float) +# ) # mass fraction of total + +# # Update df with fg_dict +# for key, value in fg_dict.items(): +# df.at[comp_name, f"fg_{key}"] = int(value) # Update the cell +# # Update df with fg_mf_dict +# for key, value in fg_mf_dict.items(): +# df.at[comp_name, f"fg_mf_{key}"] = float(value) # Update the cell +# cols_fg_mf = [col for col in df.columns if col.startswith("fg_mf")] +# residual_fgs = df.loc[comp_name, cols_fg_mf].sum() - 1 +# try: +# assert residual_fgs <= precision_sum_functional_group +# except AssertionError: +# print(f"{df.loc[comp_name, cols_fg_mf].sum()=}") +# raise AssertionError( +# f"the total mass fraction of functional groups in {comp_name =} is > 0.05" +# ) +# if residual_fgs < -precision_sum_functional_group: +# df.at[comp_name, f"fg_mf_unclassified"] = abs(residual_fgs) +# df.loc[df["iupac_name"] != "unidentified"] = df.loc[ +# df["iupac_name"] != "unidentified" +# ].fillna(0) +# df = _order_columns_in_compounds_properties(df) + +# return df + + +folder_path = plib.Path( + r"C:\Users\mp933\OneDrive - Cornell University\Python\gcms_data_analysis\tests\data_name_to_properties" +) +# %% +classifications_codes_fractions = pd.read_excel( + plib.Path( + folder_path, + "classifications_codes_fractions.xlsx", + ) +) +checked_compounds_properties = pd.read_excel( + plib.Path( + folder_path, + "checked_compounds_properties.xlsx", + ), + index_col="comp_name", +) +dict_cl_to_codes: dict[str, str] = dict( + zip( + classifications_codes_fractions.classes.tolist(), + classifications_codes_fractions.codes.tolist(), + ) +) +dict_cl_to_mass_fractions: dict[str, float] = dict( + zip( + classifications_codes_fractions.classes.tolist(), + classifications_codes_fractions.mfs.tolist(), + ) +) +# %% + +compounds = [ + "2-methylcyclopent-2-en-1-one", # small ketone + "hexadecanoic acid", + "n-hexadecanoic acid", # different names same compounds + "phenol", # ring + "phenol", # repeated compound + "2,4,5-trichlorophenol", # clorine (udentified) + "phenoxytrimethylsilane", # silane (not listed in fg) + "bromophenol", # Br not listed + "9-octadecenoic acid, 1,2,3-propanetriyl ester, (e,e,e)-", # large compound + "wrong_name", # test for legit string that gives no pcp result + " ", # wrong entry or datatype + None, + False, + np.nan, +] + +list_of_compound_properties: list[pd.DataFrame] = [] +for compound in compounds: + print(compound) + n2p = name_to_properties( + compound, dict_cl_to_codes, dict_cl_to_mass_fractions, None + ) + list_of_compound_properties.append(n2p) + if n2p is not None: + to_check = n2p.loc[[compound], :] + to_check = to_check.loc[:, (to_check != 0).any(axis=0)] + checked = checked_compounds_properties.loc[[compound], :] + checked = checked.loc[:, (checked != 0).any(axis=0)] + pd.testing.assert_frame_equal( + to_check, + checked, + check_exact=False, + atol=1e-5, + rtol=1e-5, + ) +# %% +to_check = pd.DataFrame() +for compound in compounds: + print(compound) + to_check = name_to_properties( + compound, + dict_cl_to_codes, + dict_cl_to_mass_fractions, + to_check, + ) +pd.testing.assert_frame_equal( + to_check, + checked_compounds_properties, + check_exact=False, + atol=1e-5, + rtol=1e-5, +) + +# %% diff --git a/pytest.ini b/pytest.ini index 4530dfc..e4fa8c1 100644 --- a/pytest.ini +++ b/pytest.ini @@ -6,4 +6,4 @@ filterwarnings = markers = slow: marks tests as slow (deselect with '-m "not slow"') -addopts = --cov=gcms_data_analysis --cov-report=html:docs/_coverage_html \ No newline at end of file +addopts = -s \ No newline at end of file diff --git a/src/gcms_data_analysis/__init__.py b/src/gcms_data_analysis/__init__.py index 4e8e883..aea19f1 100644 --- a/src/gcms_data_analysis/__init__.py +++ b/src/gcms_data_analysis/__init__.py @@ -1 +1 @@ -from .main import Fragmenter, Project, figure_create, figure_save, name_to_properties +from .main import Project, name_to_properties diff --git a/src/gcms_data_analysis/fragmenter.py b/src/gcms_data_analysis/fragmenter.py new file mode 100644 index 0000000..c89e20c --- /dev/null +++ b/src/gcms_data_analysis/fragmenter.py @@ -0,0 +1,776 @@ +import marshal +from rdkit import Chem +from rdkit.Chem import DataStructs +from rdkit.Chem import rdmolops +from rdkit.Chem.AllChem import ( + GetMorganFingerprintAsBitVect, +) # pylint: disable=no-name-in-module + + +class Fragmenter: + """ + Class taken from https://github.com/simonmb/fragmentation_algorithm. + The original version of this algorithm was published in: + "Flexible Heuristic Algorithm for Automatic Molecule Fragmentation: + Application to the UNIFAC Group Contribution Model + DOI: 10.1186/s13321-019-0382-39." + MIT License + + ... + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + """ + + # tested with Python 3.8.8 and RDKit version 2021.09.4 + + # does a substructure match and then checks whether the match + # is adjacent to previous matches + @classmethod + def get_substruct_matches( + cls, + mol_searched_for, + mol_searched_in, + atomIdxs_to_which_new_matches_have_to_be_adjacent, + ): + + valid_matches = [] + + if mol_searched_in.GetNumAtoms() >= mol_searched_for.GetNumAtoms(): + matches = mol_searched_in.GetSubstructMatches(mol_searched_for) + + if matches: + for match in matches: + add_this_match = True + if len(atomIdxs_to_which_new_matches_have_to_be_adjacent) > 0: + add_this_match = False + + for i in match: + for neighbor in mol_searched_in.GetAtomWithIdx( + i + ).GetNeighbors(): + if ( + neighbor.GetIdx() + in atomIdxs_to_which_new_matches_have_to_be_adjacent + ): + add_this_match = True + break + + if add_this_match: + valid_matches.append(match) + + return valid_matches + + # count heavier isotopes of hydrogen correctly + @classmethod + def get_heavy_atom_count(cls, mol): + heavy_atom_count = 0 + for atom in mol.GetAtoms(): + if atom.GetAtomicNum() != 1: + heavy_atom_count += 1 + + return heavy_atom_count + + def __init__( + self, + fragmentation_scheme={}, + fragmentation_scheme_order=None, + match_hydrogens=False, + algorithm="", + n_atoms_cuttoff=-1, + function_to_choose_fragmentation=False, + n_max_fragmentations_to_find=-1, + ): + + if not type(fragmentation_scheme) is dict: + raise TypeError( + "fragmentation_scheme must be a dctionary with integers as keys and either strings or list of strings as values." + ) + + if len(fragmentation_scheme) == 0: + raise ValueError("fragmentation_scheme must be provided.") + + if not algorithm in ["simple", "complete", "combined"]: + raise ValueError("Algorithm must be either simple ,complete or combined.") + + if algorithm == "simple": + if n_max_fragmentations_to_find != -1: + raise ValueError( + "Setting n_max_fragmentations_to_find only makes sense with complete or combined algorithm." + ) + + self.algorithm = algorithm + + if algorithm in ["combined", "complete"]: + if n_atoms_cuttoff == -1: + raise ValueError( + "n_atoms_cuttoff needs to be specified for complete or combined algorithms." + ) + + if function_to_choose_fragmentation == False: + raise ValueError( + "function_to_choose_fragmentation needs to be specified for complete or combined algorithms." + ) + + if not callable(function_to_choose_fragmentation): + raise TypeError( + "function_to_choose_fragmentation needs to be a function." + ) + else: + if type(function_to_choose_fragmentation([{}, {}])) != dict: + raise TypeError( + "function_to_choose_fragmentation needs to take a list of fragmentations and choose one of it" + ) + + if n_max_fragmentations_to_find != -1: + if n_max_fragmentations_to_find < 1: + raise ValueError( + "n_max_fragmentations_to_find has to be 1 or higher." + ) + + if fragmentation_scheme_order is None: + fragmentation_scheme_order = [] + + if algorithm in ["simple", "combined"]: + assert len(fragmentation_scheme) == len(fragmentation_scheme_order) + else: + fragmentation_scheme_order = [key for key in fragmentation_scheme.keys()] + + self.n_max_fragmentations_to_find = n_max_fragmentations_to_find + + self.n_atoms_cuttoff = n_atoms_cuttoff + + self.match_hydrogens = match_hydrogens + + self.fragmentation_scheme = fragmentation_scheme + + self.function_to_choose_fragmentation = function_to_choose_fragmentation + + # create a lookup dictionaries to faster finding a group number + self._fragmentation_scheme_group_number_lookup = {} + self._fragmentation_scheme_pattern_lookup = {} + self.fragmentation_scheme_order = fragmentation_scheme_order + + for group_number, list_SMARTS in fragmentation_scheme.items(): + + if type(list_SMARTS) is not list: + list_SMARTS = [list_SMARTS] + + for SMARTS in list_SMARTS: + if SMARTS != "": + self._fragmentation_scheme_group_number_lookup[SMARTS] = ( + group_number + ) + + mol_SMARTS = Chem.MolFromSmarts(SMARTS) + self._fragmentation_scheme_pattern_lookup[SMARTS] = mol_SMARTS + + def fragment(self, SMILES_or_molecule): + + if type(SMILES_or_molecule) is str: + mol_SMILES = Chem.MolFromSmiles(SMILES_or_molecule) + mol_SMILES = Chem.AddHs(mol_SMILES) if self.match_hydrogens else mol_SMILES + is_valid_SMILES = mol_SMILES is not None + + if not is_valid_SMILES: + raise ValueError("Following SMILES is not valid: " + SMILES_or_molecule) + + else: + mol_SMILES = SMILES_or_molecule + + # iterate over all separated molecules + success = [] + fragmentation = {} + fragmentation_matches = {} + for mol in rdmolops.GetMolFrags(mol_SMILES, asMols=True): + + this_mol_fragmentation, this_mol_success = self.__get_fragmentation(mol) + + for SMARTS, matches in this_mol_fragmentation.items(): + group_number = self._fragmentation_scheme_group_number_lookup[SMARTS] + + if not group_number in fragmentation: + fragmentation[group_number] = 0 + fragmentation_matches[group_number] = [] + + fragmentation[group_number] += len(matches) + fragmentation_matches[group_number].extend(matches) + + success.append(this_mol_success) + + return fragmentation, all(success), fragmentation_matches + + def fragment_complete(self, SMILES_or_molecule): + + if type(SMILES_or_molecule) is str: + mol_SMILES = Chem.MolFromSmiles(SMILES_or_molecule) + mol_SMILES = Chem.AddHs(mol_SMILES) if self.match_hydrogens else mol_SMILES + is_valid_SMILES = mol_SMILES is not None + + if not is_valid_SMILES: + raise ValueError("Following SMILES is not valid: " + SMILES_or_molecule) + + else: + mol_SMILES = SMILES_or_molecule + + if len(rdmolops.GetMolFrags(mol_SMILES)) != 1: + raise ValueError( + "fragment_complete does not accept multifragment molecules." + ) + + temp_fragmentations, success = self.__complete_fragmentation(mol_SMILES) + + fragmentations = [] + fragmentations_matches = [] + for temp_fragmentation in temp_fragmentations: + fragmentation = {} + fragmentation_matches = {} + for SMARTS, matches in temp_fragmentation.items(): + group_number = self._fragmentation_scheme_group_number_lookup[SMARTS] + + fragmentation[group_number] = len(matches) + fragmentation_matches[group_number] = matches + + fragmentations.append(fragmentation) + fragmentations_matches.append(fragmentation_matches) + + return fragmentations, success, fragmentations_matches + + def __get_fragmentation(self, mol_SMILES): + + success = False + fragmentation = {} + if self.algorithm in ["simple", "combined"]: + fragmentation, success = self.__simple_fragmentation(mol_SMILES) + + if success: + return fragmentation, success + + if self.algorithm in ["combined", "complete"]: + fragmentations, success = self.__complete_fragmentation(mol_SMILES) + + if success: + fragmentation = self.function_to_choose_fragmentation(fragmentations) + + return fragmentation, success + + def __simple_fragmentation(self, mol_SMILES): + + if self.match_hydrogens: + target_atom_count = len(mol_SMILES.GetAtoms()) + else: + target_atom_count = Fragmenter.get_heavy_atom_count(mol_SMILES) + + success = False + fragmentation = {} + + fragmentation, atomIdxs_included_in_fragmentation = ( + self.__search_non_overlapping_solution(mol_SMILES, {}, set(), set()) + ) + success = len(atomIdxs_included_in_fragmentation) == target_atom_count + + # if not successful, clean up molecule and search again + level = 1 + while not success: + fragmentation_so_far, atomIdxs_included_in_fragmentation_so_far = ( + Fragmenter.__clean_molecule_surrounding_unmatched_atoms( + mol_SMILES, fragmentation, atomIdxs_included_in_fragmentation, level + ) + ) + level += 1 + + if len(atomIdxs_included_in_fragmentation_so_far) == 0: + break + + fragmentation_so_far, atomIdxs_included_in_fragmentation_so_far = ( + self.__search_non_overlapping_solution( + mol_SMILES, + fragmentation_so_far, + atomIdxs_included_in_fragmentation_so_far, + atomIdxs_included_in_fragmentation_so_far, + ) + ) + + success = ( + len(atomIdxs_included_in_fragmentation_so_far) == target_atom_count + ) + + if success: + fragmentation = fragmentation_so_far + + return fragmentation, success + + def __search_non_overlapping_solution( + self, + mol_searched_in, + fragmentation, + atomIdxs_included_in_fragmentation, + atomIdxs_to_which_new_matches_have_to_be_adjacent, + ): + + n_atomIdxs_included_in_fragmentation = ( + len(atomIdxs_included_in_fragmentation) - 1 + ) + + while n_atomIdxs_included_in_fragmentation != len( + atomIdxs_included_in_fragmentation + ): + n_atomIdxs_included_in_fragmentation = len( + atomIdxs_included_in_fragmentation + ) + + for group_number in self.fragmentation_scheme_order: + list_SMARTS = self.fragmentation_scheme[group_number] + if type(list_SMARTS) is not list: + list_SMARTS = [list_SMARTS] + + for SMARTS in list_SMARTS: + if SMARTS != "": + fragmentation, atomIdxs_included_in_fragmentation = ( + self.__get_next_non_overlapping_match( + mol_searched_in, + SMARTS, + fragmentation, + atomIdxs_included_in_fragmentation, + atomIdxs_to_which_new_matches_have_to_be_adjacent, + ) + ) + + return fragmentation, atomIdxs_included_in_fragmentation + + def __get_next_non_overlapping_match( + self, + mol_searched_in, + SMARTS, + fragmentation, + atomIdxs_included_in_fragmentation, + atomIdxs_to_which_new_matches_have_to_be_adjacent, + ): + + mol_searched_for = self._fragmentation_scheme_pattern_lookup[SMARTS] + + if atomIdxs_to_which_new_matches_have_to_be_adjacent: + matches = Fragmenter.get_substruct_matches( + mol_searched_for, + mol_searched_in, + atomIdxs_to_which_new_matches_have_to_be_adjacent, + ) + else: + matches = Fragmenter.get_substruct_matches( + mol_searched_for, mol_searched_in, set() + ) + + if matches: + for match in matches: + all_atoms_of_new_match_are_unassigned = ( + atomIdxs_included_in_fragmentation.isdisjoint(match) + ) + + if all_atoms_of_new_match_are_unassigned: + if not SMARTS in fragmentation: + fragmentation[SMARTS] = [] + + fragmentation[SMARTS].append(match) + atomIdxs_included_in_fragmentation.update(match) + + return fragmentation, atomIdxs_included_in_fragmentation + + @classmethod + def __clean_molecule_surrounding_unmatched_atoms( + cls, mol_searched_in, fragmentation, atomIdxs_included_in_fragmentation, level + ): + + for i in range(0, level): + + atoms_missing = set( + range(0, Fragmenter.get_heavy_atom_count(mol_searched_in)) + ).difference(atomIdxs_included_in_fragmentation) + + new_fragmentation = marshal.loads(marshal.dumps(fragmentation)) + + for atomIdx in atoms_missing: + for neighbor in mol_searched_in.GetAtomWithIdx(atomIdx).GetNeighbors(): + for smart, atoms_found in fragmentation.items(): + for atoms in atoms_found: + if neighbor.GetIdx() in atoms: + if smart in new_fragmentation: + if new_fragmentation[smart].count(atoms) > 0: + new_fragmentation[smart].remove(atoms) + + if smart in new_fragmentation: + if len(new_fragmentation[smart]) == 0: + new_fragmentation.pop(smart) + + new_atomIdxs_included_in_fragmentation = set() + for i in new_fragmentation.values(): + for j in i: + new_atomIdxs_included_in_fragmentation.update(j) + + atomIdxs_included_in_fragmentation = new_atomIdxs_included_in_fragmentation + fragmentation = new_fragmentation + + return fragmentation, atomIdxs_included_in_fragmentation + + def __complete_fragmentation(self, mol_SMILES): + + heavy_atom_count = Fragmenter.get_heavy_atom_count(mol_SMILES) + + if heavy_atom_count > self.n_atoms_cuttoff: + return {}, False + + completed_fragmentations = [] + groups_leading_to_incomplete_fragmentations = [] + ( + completed_fragmentations, + groups_leading_to_incomplete_fragmentations, + incomplete_fragmentation_found, + ) = self.__get_next_non_overlapping_adjacent_match_recursively( + mol_SMILES, + heavy_atom_count, + completed_fragmentations, + groups_leading_to_incomplete_fragmentations, + {}, + set(), + set(), + self.n_max_fragmentations_to_find, + ) + success = len(completed_fragmentations) > 0 + + return completed_fragmentations, success + + def __get_next_non_overlapping_adjacent_match_recursively( + self, + mol_searched_in, + heavy_atom_count, + completed_fragmentations, + groups_leading_to_incomplete_fragmentations, + fragmentation_so_far, + atomIdxs_included_in_fragmentation_so_far, + atomIdxs_to_which_new_matches_have_to_be_adjacent, + n_max_fragmentations_to_find=-1, + ): + + n_completed_fragmentations = len(completed_fragmentations) + incomplete_fragmentation_found = False + complete_fragmentation_found = False + + if len(completed_fragmentations) == n_max_fragmentations_to_find: + return ( + completed_fragmentations, + groups_leading_to_incomplete_fragmentations, + incomplete_fragmentation_found, + ) + + for group_number in self.fragmentation_scheme_order: + list_SMARTS = self.fragmentation_scheme[group_number] + + if complete_fragmentation_found: + break + + if type(list_SMARTS) is not list: + list_SMARTS = [list_SMARTS] + + for SMARTS in list_SMARTS: + if complete_fragmentation_found: + break + + if SMARTS != "": + matches = Fragmenter.get_substruct_matches( + self._fragmentation_scheme_pattern_lookup[SMARTS], + mol_searched_in, + atomIdxs_included_in_fragmentation_so_far, + ) + + for match in matches: + + # only allow non-overlapping matches + all_atoms_are_unassigned = ( + atomIdxs_included_in_fragmentation_so_far.isdisjoint(match) + ) + if not all_atoms_are_unassigned: + continue + + # only allow matches that do not contain groups leading to incomplete matches + for ( + groups_leading_to_incomplete_fragmentation + ) in groups_leading_to_incomplete_fragmentations: + if Fragmenter.__is_fragmentation_subset_of_other_fragmentation( + groups_leading_to_incomplete_fragmentation, + fragmentation_so_far, + ): + return ( + completed_fragmentations, + groups_leading_to_incomplete_fragmentations, + incomplete_fragmentation_found, + ) + + # only allow matches that will lead to new fragmentations + use_this_match = True + n_found_groups = len(fragmentation_so_far) + + for completed_fragmentation in completed_fragmentations: + + if not SMARTS in completed_fragmentation: + continue + + if n_found_groups == 0: + use_this_match = not Fragmenter.__is_match_contained_in_fragmentation( + match, SMARTS, completed_fragmentation + ) + else: + if Fragmenter.__is_fragmentation_subset_of_other_fragmentation( + fragmentation_so_far, completed_fragmentation + ): + use_this_match = not Fragmenter.__is_match_contained_in_fragmentation( + match, SMARTS, completed_fragmentation + ) + + if not use_this_match: + break + + if not use_this_match: + continue + + # make a deepcopy here, otherwise the variables are modified down the road + # marshal is used here because it works faster than copy.deepcopy + this_SMARTS_fragmentation_so_far = marshal.loads( + marshal.dumps(fragmentation_so_far) + ) + this_SMARTS_atomIdxs_included_in_fragmentation_so_far = ( + atomIdxs_included_in_fragmentation_so_far.copy() + ) + + if not SMARTS in this_SMARTS_fragmentation_so_far: + this_SMARTS_fragmentation_so_far[SMARTS] = [] + + this_SMARTS_fragmentation_so_far[SMARTS].append(match) + this_SMARTS_atomIdxs_included_in_fragmentation_so_far.update( + match + ) + + # only allow matches that do not contain groups leading to incomplete matches + for ( + groups_leading_to_incomplete_match + ) in groups_leading_to_incomplete_fragmentations: + if Fragmenter.__is_fragmentation_subset_of_other_fragmentation( + groups_leading_to_incomplete_match, + this_SMARTS_fragmentation_so_far, + ): + use_this_match = False + break + + if not use_this_match: + continue + + # if the complete molecule has not been fragmented, continue to do so + if ( + len(this_SMARTS_atomIdxs_included_in_fragmentation_so_far) + < heavy_atom_count + ): + ( + completed_fragmentations, + groups_leading_to_incomplete_fragmentations, + incomplete_fragmentation_found, + ) = self.__get_next_non_overlapping_adjacent_match_recursively( + mol_searched_in, + heavy_atom_count, + completed_fragmentations, + groups_leading_to_incomplete_fragmentations, + this_SMARTS_fragmentation_so_far, + this_SMARTS_atomIdxs_included_in_fragmentation_so_far, + this_SMARTS_atomIdxs_included_in_fragmentation_so_far, + n_max_fragmentations_to_find, + ) + break + + # if the complete molecule has been fragmented, save and return + if ( + len(this_SMARTS_atomIdxs_included_in_fragmentation_so_far) + == heavy_atom_count + ): + completed_fragmentations.append( + this_SMARTS_fragmentation_so_far + ) + complete_fragmentation_found = True + break + + # if until here no new fragmentation was found check whether an incomplete fragmentation was found + if n_completed_fragmentations == len(completed_fragmentations): + + if not incomplete_fragmentation_found: + + incomplete_matched_groups = {} + + if len(atomIdxs_included_in_fragmentation_so_far) > 0: + unassignes_atom_idx = set(range(0, heavy_atom_count)).difference( + atomIdxs_included_in_fragmentation_so_far + ) + for atom_idx in unassignes_atom_idx: + neighbor_atoms_idx = [ + i.GetIdx() + for i in mol_searched_in.GetAtomWithIdx( + atom_idx + ).GetNeighbors() + ] + + for neighbor_atom_idx in neighbor_atoms_idx: + for ( + found_smarts, + found_matches, + ) in fragmentation_so_far.items(): + for found_match in found_matches: + if neighbor_atom_idx in found_match: + if ( + not found_smarts + in incomplete_matched_groups + ): + incomplete_matched_groups[found_smarts] = [] + + if ( + found_match + not in incomplete_matched_groups[ + found_smarts + ] + ): + incomplete_matched_groups[ + found_smarts + ].append(found_match) + + is_subset_of_groups_already_found = False + indexes_to_remove = [] + + for idx, groups_leading_to_incomplete_match in enumerate( + groups_leading_to_incomplete_fragmentations + ): + is_subset_of_groups_already_found = ( + Fragmenter.__is_fragmentation_subset_of_other_fragmentation( + incomplete_matched_groups, + groups_leading_to_incomplete_match, + ) + ) + if is_subset_of_groups_already_found: + indexes_to_remove.append(idx) + + for index in sorted(indexes_to_remove, reverse=True): + del groups_leading_to_incomplete_fragmentations[index] + + groups_leading_to_incomplete_fragmentations.append( + incomplete_matched_groups + ) + groups_leading_to_incomplete_fragmentations = sorted( + groups_leading_to_incomplete_fragmentations, key=len + ) + + incomplete_fragmentation_found = True + + return ( + completed_fragmentations, + groups_leading_to_incomplete_fragmentations, + incomplete_fragmentation_found, + ) + + @classmethod + def __is_fragmentation_subset_of_other_fragmentation( + cls, fragmentation, other_fragmentation + ): + n_found_groups = len(fragmentation) + n_found_other_groups = len(other_fragmentation) + + if n_found_groups == 0: + return False + + if n_found_other_groups < n_found_groups: + return False + + n_found_SMARTS_that_are_subset = 0 + for found_SMARTS, _ in fragmentation.items(): + if found_SMARTS in other_fragmentation: + found_matches_set = set( + frozenset(i) for i in fragmentation[found_SMARTS] + ) + found_other_matches_set = set( + frozenset(i) for i in other_fragmentation[found_SMARTS] + ) + + if found_matches_set.issubset(found_other_matches_set): + n_found_SMARTS_that_are_subset += 1 + else: + return False + + return n_found_SMARTS_that_are_subset == n_found_groups + + @classmethod + def __is_match_contained_in_fragmentation(cls, match, SMARTS, fragmentation): + if not SMARTS in fragmentation: + return False + + found_matches_set = set(frozenset(i) for i in fragmentation[SMARTS]) + match_set = set(match) + + return match_set in found_matches_set + + +if __name__ == "__main__": + + smiles = ["CCCCO", "CCCO", "CCO", "CO"] + fragmentation_scheme = { + "CH2": "[CH2]", + "OH": "[OH]", + "CH3": "[CH3]", + "CH2-CH2": "[CH2][CH2]", + } + fragmentation_scheme_order1 = ["CH2-CH2", "CH3", "CH2", "OH"] + + print("simple algorithm 1") + frg = Fragmenter( + fragmentation_scheme, + fragmentation_scheme_order=fragmentation_scheme_order1, + algorithm="simple", + ) + for smi in smiles: + fragmentation, success, fragmentation_matches = frg.fragment(smi) + print(smi, fragmentation) + + print() + print("simple algorithm 2") + fragmentation_scheme_order2 = ["CH3", "CH2", "CH2-CH2", "OH"] + frg = Fragmenter( + fragmentation_scheme, + fragmentation_scheme_order=fragmentation_scheme_order2, + algorithm="simple", + ) + for smi in smiles: + fragmentation, success, fragmentation_matches = frg.fragment(smi) + print(smi, fragmentation) + + print() + print("complete algorithm 1") + frg = Fragmenter( + fragmentation_scheme, + algorithm="complete", + n_atoms_cuttoff=30, + function_to_choose_fragmentation=lambda x: x[0], + ) + for smi in smiles: + fragmentation, success, fragmentation_matches = frg.fragment(smi) + print(smi, fragmentation) + + print() + print("complete algorithm 2") + frg = Fragmenter( + fragmentation_scheme, + algorithm="complete", + n_atoms_cuttoff=30, + function_to_choose_fragmentation=lambda x: x[0], + ) + for smi in smiles: + fragmentations, success, fragmentations_matches = frg.fragment_complete(smi) + print(smi, fragmentations) + print( + fragmentations_matches + ) # some of the fragmentations are the same, but the found fragmentation_matches are different. diff --git a/src/gcms_data_analysis/main.py b/src/gcms_data_analysis/main.py index da8095b..d5ee66f 100644 --- a/src/gcms_data_analysis/main.py +++ b/src/gcms_data_analysis/main.py @@ -5,9 +5,6 @@ @author: mp933 """ - -# %% -import marshal import pathlib as plib import numpy as np import pandas as pd @@ -16,510 +13,73 @@ import seaborn as sns import ele import pubchempy as pcp -from rdkit import Chem -from rdkit.Chem import DataStructs -from rdkit.Chem import rdmolops -from rdkit.Chem.AllChem import ( - GetMorganFingerprintAsBitVect, -) # pylint: disable=no-name-in-module - - -def figure_create( - rows=1, - cols=1, - plot_type=0, - paper_col=1, - hgt_mltp=1, - font="Dejavu Sans", - sns_style="ticks", -): - """ - This function creates all the necessary objects to produce plots with - replicable characteristics. +from gcms_data_analysis.fragmenter import Fragmenter - Parameters - ---------- - rows : int, optional - Number of plot rows in the grid. The default is 1. - cols : int, optional - Number of plot columns in the grid. The default is 1. - plot_type : int, optional - One of the different plot types available. The default is 0. - Plot types and their labels: - 0. Std: standard plot (single or grid rows x cols) - 1. Twin-x: secondary axis plot (single or grid rows x cols) - 5. Subplots with different heights - 6. Multiplot without internal x and y tick labels - 7. Multiplot without internal x tick labels - 8. Plot with specific distances between subplots and different heights - paper_col : int, optional - Single or double column size for the plot, meaning the actual space - it will fit in a paper. The default is 1. - hgt_mltp: float, optional - Multiplies the figure height. Default is 1. Best using values between - 0.65 and 2. May not work with multiplot and paper_col=1 or out of the - specified range. - font: str, optional - If the string 'Times' is given, it sets Times New Roman as the default - font for the plot, otherwise the default Dejavu Sans is maintained. - Default is 'Dejavu Sans'. - sns_style: str, optional - The style of the seaborn plot. The default is 'ticks'. - Returns - ------- - fig : object - The figure object to be passed to figure_save. - lst_ax : list of axis - List of axis (it is a list even with 1 axis) on which to plot. - lst_axt : list of axis - List of secondary axis (it is a list even with 1 axis). - fig_par : list of float - List of parameters to reserve space around the plot canvas. - - Raises - ------ - ValueError - If cols > 2, which is not supported. - - """ - sns.set_palette("deep") - # set Times New Roman as the plot font fot text - if font == "Times" or font == "Times New Roman": - # this may require the installation of the font package - sns.set_style(sns_style, {"font.family": "Times New Roman"}) - else: # leave Dejavu Sans (default) as the plot font fot text - sns.set_style(sns_style) - # single or double column in paperthat the figure will occupy - if cols > 2: # numer of columns (thus of plots in the figure) - raise ValueError("\n figure_create: cols>2 not supported") - - # width of the figure in inches, it's fixed to keep the same text size - # is 6, 9, 12 for 1, 1.5, and 3 paper_col (columns in paper) - fig_wdt = 6 * paper_col # width of the plot in inches - fig_hgt = 4 * paper_col * rows / cols * hgt_mltp # heigth of the figure in inches - px = 0.06 * (6 / fig_wdt) * cols # set px so that (A) fits the square - py = px * fig_wdt / fig_hgt / cols * rows / hgt_mltp # set py so that (A) fits - # if more rows are added, it increases, but if cols areadded it decreases - # to maintain the plot ratio - # set plot margins - sp_lab_wdt = 0.156 / paper_col # hor. space for labels - sp_nar_wdt = 0.02294 / paper_col # space narrow no labels (horiz) - sp_lab_hgt = 0.147 / paper_col / rows * cols / hgt_mltp # space for labels (vert) - sp_nar_hgt = 0.02 / paper_col / rows * cols / hgt_mltp # space narrow no labels - # (vert) - # ========================================================================= - # # 0. Std: standard plot (single or grid rows x cols) - # ========================================================================= - if plot_type == 0: - fig, ax = plt.subplots(rows, cols, figsize=(fig_wdt, fig_hgt)) - if rows * cols == 1: # only 1 plot - lst_ax = [ax] # create ax list for uniform iterations over 1 obj. - elif rows * cols > 1: # more than one plot - lst_ax = [axs for axs in ax.flatten()] # create list of axis - lst_axt = None # no secondary axis in this plot_type - # horizontal space between plot in percentage - sp_btp_wdt = 0.26 * paper_col**2 - 1.09 * paper_col + 1.35 - # vertical space between plot in percentage !!! needs DEBUG - sp_btp_hgt = 0.2 / paper_col * cols / hgt_mltp - # left, bottom, right, top, widthspace, heightspace - fig_par = [ - sp_lab_wdt, - sp_lab_hgt, - 1 - sp_nar_wdt, - 1 - sp_nar_hgt, - sp_btp_wdt, - sp_btp_hgt, - px, - py, - ] - # ========================================================================= - # # 1. Twin-x: secondary axis plot (single or grid rows x cols) - # ========================================================================= - elif plot_type == 1: - fig, ax = plt.subplots(rows, cols, figsize=(fig_wdt, fig_hgt)) - if rows * cols == 1: # only 1 plot - lst_ax = [ax] # create ax list for uniform iterations over 1 obj. - lst_axt = [ax.twinx()] # create a list with secondary axis object - elif rows * cols > 1: # more than one plot - lst_ax = [axs for axs in ax.flatten()] # create list of axis - # create list of secondary twin axis - lst_axt = [axs.twinx() for axs in ax.flatten()] - # horizontal space between plot in percentage !!! needs DEBUG - sp_btp_wdt = 1.36 * paper_col**2 - 5.28 * paper_col + 5.57 - # vertical space between plot in percentage !!! needs DEBUG - sp_btp_hgt = 0.2 / paper_col * cols / hgt_mltp - # left, bottom, right(DIFFERENT FROM STD), top, widthspace, heightspace - fig_par = [ - sp_lab_wdt, - sp_lab_hgt, - 1 - sp_lab_wdt, - 1 - sp_nar_hgt, - sp_btp_wdt, - sp_btp_hgt, - px, - py, - ] - - return fig, lst_ax, lst_axt, fig_par - - -def figure_save( - filename, - out_path, - fig, - lst_ax, - lst_axt, - fig_par, - x_lab=None, - y_lab=None, - yt_lab=None, - x_lim=None, - y_lim=None, - yt_lim=None, - x_ticks=None, - y_ticks=None, - yt_ticks=None, - x_tick_labels=None, - y_tick_labels=None, - yt_tick_labels=None, - legend=None, - ncol_leg=1, - annotate_lttrs=False, - annotate_lttrs_loc="down", - pdf=False, - svg=False, - eps=False, - transparency=False, - subfolder=None, - tight_layout=False, - grid=False, - title=False, - set_size_inches=None, -): - """ - This function takes the objects created in figure_create and allows modifying - their appearance and saving the results. - - Parameters - ---------- - filename : str - Name of the figure. It is the name of the PNG or PDF file to be saved. - out_path : pathlib.Path object - Path to the output folder. - fig : figure object - Created in figure_save. - lst_ax : list of axis - Created in figure_create. - lst_axt : list of twin (secondary) axis - Created in figure_create. - fig_par : list - Figure parameters for space settings: left, bottom, right, top, widthspace, heightspace, px, py. Created in figure_create. - tight_layout : bool, optional - If True, ignores fig_par[0:6] and fits the figure to the tightest layout possible. Avoids losing part of the figure but loses control of margins. The default is False. - x_lab : str or list, optional - Label of the x-axis. The default is None. Can be given as: - - None: No axis gets an xlabel. - - 'label': A single string; all axes get the same xlabel. - - ['label1', None, 'Label2', ...]: A list matching the size of lst_ax containing labels and/or None values. Each axis is assigned its label; where None is given, no label is set. - y_lab : str, optional - Label of the y-axis. The default is None. Same options as x_lab. - yt_lab : str, optional - Label of the secondary y-axis. The default is None. Same options as x_lab. - x_lim : list, optional - Limits of the x-axis. The default is None. Can be given as: - - None: No axis gets an xlim. - - [a,b]: All axes get the same xlim. - - [[a,b], None, [c,d], ...]: A list matching the size of lst_ax containing [a,b] ranges and/or None values. Each axis is assigned its limit; where None is given, no limit is set. - y_lim : list, optional - Limits of the y-axis. The default is None. Same options as x_lim. - yt_lim : list, optional - Limits of the secondary y-axis. The default is None. Same options as x_lim. - x_ticks : list, optional - Ticks values to be shown on the x-axis. The default is None. - y_ticks : list, optional - Ticks values to be shown on the y-axis. The default is None. - yt_ticks : list, optional - Ticks values to be shown on the secondary y-axis. The default is None. - legend : str, optional - Contains info on the legend location. To avoid printing the legend (also in case it is empty), set it to None. The default is 'best'. - ncol_leg : int, optional - Number of columns in the legend. The default is 1. - annotate_lttrs : bool, optional - If True, each plot is assigned a letter in the lower left corner. The default is False. If a string is given, the string is used as the letter in the plot even for single plots. - annotate_lttrs_loc : str - Placement of annotation letters. 'down' for bottom-left, 'up' for top-left. The default is 'down'. - pdf : bool, optional - If True, saves the figure also in PDF format in the output folder. The default is False, so only a PNG file with - """ - - fig_adj_par = fig_par[0:6] - if not any(fig_par[0:6]): # True if all element in fig_par[0:6] are False - tight_layout = True - px = fig_par[6] - py = fig_par[7] - n_ax = len(lst_ax) # number of ax objects - # for x_lab, y_lab, yt_lab creates a list with same length as n_ax. - # only one value is given all axis are given the same label - # if a list is given, each axis is given a different value, where False - # is specified, no value is given to that particular axis - vrbls = [x_lab, y_lab, yt_lab, legend] # collect variables for iteration - lst_x_lab, lst_y_lab, lst_yt_lab, lst_legend = ( - [], - [], - [], - [], - ) # create lists for iteration - lst_vrbls = [lst_x_lab, lst_y_lab, lst_yt_lab, lst_legend] # collect lists - for vrbl, lst_vrbl in zip(vrbls, lst_vrbls): - if vrbl is None: # label is not given for any axis - lst_vrbl[:] = [None] * n_ax - else: # label is given - if np.size(vrbl) == 1: # only one value is given - if isinstance(vrbl, str): # create a list before replicating it - lst_vrbl[:] = [vrbl] * n_ax # each axis gets same label - elif isinstance(vrbl, list): # replicate the list - lst_vrbl[:] = vrbl * n_ax # each axis gets same label - elif np.size(vrbl) == n_ax: # each axis has been assigned its lab - lst_vrbl[:] = vrbl # copy the label inside the list +def get_compound_from_pubchempy(comp_name: str) -> pcp.Compound: + if not isinstance(comp_name, str): + return None + if comp_name == " " or comp_name == "": + return None + cond = True + while cond: # to deal with HTML issues on server sides (timeouts) + try: + # comp contains all info about the chemical from pubchem + try: + comp_inside_list = pcp.get_compounds(comp_name, "name") + except ValueError: + print(f"{comp_name = }") + return None + if comp_inside_list: + comp = comp_inside_list[0] else: - print(vrbl) - print("Labels/legend size does not match axes number") - # for x_lim, y_lim, yt_lim creates a list with same length as n_ax. - # If one list like [a,b] is given, all axis have the same limits, if a list - # of the same length of the axis is given, each axis has its lim. Where - # None is given, no lim is set on that axis - vrbls = [ - x_lim, - y_lim, - yt_lim, - x_ticks, - y_ticks, - yt_ticks, - x_tick_labels, - y_tick_labels, - yt_tick_labels, - ] # collect variables for iteration - ( - lst_x_lim, - lst_y_lim, - lst_yt_lim, - lst_x_ticks, - lst_y_ticks, - lst_yt_ticks, - lst_x_tick_labels, - lst_y_tick_labels, - lst_yt_tick_labels, - ) = ( - [], - [], - [], - [], - [], - [], - [], - [], - [], - ) # create lists for iteration - lst_vrbls = [ - lst_x_lim, - lst_y_lim, - lst_yt_lim, - lst_x_ticks, - lst_y_ticks, - lst_yt_ticks, - lst_x_tick_labels, - lst_y_tick_labels, - lst_yt_tick_labels, - ] # collect lists - for vrbl, lst_vrbl in zip(vrbls, lst_vrbls): - if vrbl is None: # limit is not given for any axis - lst_vrbl[:] = [None] * n_ax - else: - # if only list and None are in vrbl, it is [[], None, [], ..] - # each axis has been assigned its limits - if any([isinstance(v, (int, float, np.int32, str)) for v in vrbl]): - temporary = [] # necessary to allow append on [:] - for i in range(n_ax): - temporary.append(vrbl) # give it to all axis - lst_vrbl[:] = temporary - else: # x_lim=[[a,b], None, ...] = [list, bool] # no float - lst_vrbl[:] = vrbl # a lim for each axis is already given - # loops over each axs in the ax array and set the different properties - for i, axs in enumerate(lst_ax): - # for each property, if the variable is not false, it is set - if lst_x_lab[i] is not None: - axs.set_xlabel(lst_x_lab[i]) - if lst_y_lab[i] is not None: - axs.set_ylabel(lst_y_lab[i]) - if lst_x_lim[i] is not None: - axs.set_xlim( - [ - lst_x_lim[i][0] * (1 + px) - px * lst_x_lim[i][1], - lst_x_lim[i][1] * (1 + px) - px * lst_x_lim[i][0], - ] - ) - if lst_y_lim[i] is not None: - axs.set_ylim( - [ - lst_y_lim[i][0] * (1 + py) - py * lst_y_lim[i][1], - lst_y_lim[i][1] * (1 + py) - py * lst_y_lim[i][0], - ] - ) - if lst_x_ticks[i] is not None: - axs.set_xticks(lst_x_ticks[i]) - if lst_y_ticks[i] is not None: - axs.set_yticks(lst_y_ticks[i]) - if lst_x_tick_labels[i] is not None: - axs.set_xticklabels(lst_x_tick_labels[i]) - if lst_y_tick_labels[i] is not None: - axs.set_yticklabels(lst_y_tick_labels[i]) - if grid: - axs.grid(True) - if annotate_lttrs is not False: - if annotate_lttrs_loc == "down": - y_lttrs = py / px * 0.02 - elif annotate_lttrs_loc == "up": - y_lttrs = 1 - py - if n_ax == 1: # if only one plot is given, do not put the letters - axs.annotate( - "(" + annotate_lttrs + ")", - xycoords="axes fraction", - xy=(0, 0), - rotation=0, - size="large", - xytext=(0, y_lttrs), - weight="bold", + print( + f"WARNING: name_to_properties {comp_name=} does not find an entry in pcp", ) - elif n_ax > 1: # if only one plot is given, do not put the letters - try: # if specific letters are provided - axs.annotate( - "(" + annotate_lttrs[i] + ")", - xycoords="axes fraction", - xy=(0, 0), - rotation=0, - size="large", - xytext=(0, y_lttrs), - weight="bold", - ) - except TypeError: # if no specific letters, use lttrs - lttrs = [ - "a", - "b", - "c", - "d", - "e", - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - ] - axs.annotate( - "(" + lttrs[i] + ")", - xycoords="axes fraction", - xy=(0, 0), - rotation=0, - size="large", - xytext=(0, y_lttrs), - weight="bold", - ) + return None + cond = False + except pcp.PubChemHTTPError: # timeout error, simply try again + print("Caught: pcp.PubChemHTTPError (keep trying)") + return comp + + +def _order_columns_in_compounds_properties( + unsorted_df: pd.DataFrame | None, +) -> pd.DataFrame | None: + if unsorted_df is None: + return None + + # Define a custom sort key function + def sort_key(col): + if col.startswith("el_mf"): + return (2, col) + elif col.startswith("el_"): + return (1, col) + elif col.startswith("fg_mf_unclassified"): + return (5, col) + elif col.startswith("fg_mf"): + return (4, col) + elif col.startswith("fg_"): + return (3, col) + else: + return (0, col) - # if secondary (twin) axis are given, set thier properties - if lst_axt is not None: - for i, axst in enumerate(lst_axt): - axst.grid(False) # grid is always false on secondaty axis - # for each property, if the variable is not false, it is set - if lst_yt_lab[i] is not None: - axst.set_ylabel(lst_yt_lab[i]) - if lst_yt_lim[i] is not None: - axst.set_ylim( - [ - lst_yt_lim[i][0] * (1 + py) - py * lst_yt_lim[i][1], - lst_yt_lim[i][1] * (1 + py) - py * lst_yt_lim[i][0], - ] - ) - if lst_yt_ticks[i] is not None: - axst.set_yticks(lst_yt_ticks[i]) - if lst_yt_tick_labels[i] is not None: - axst.set_yticklabels(lst_yt_tick_labels[i]) - # create a legend merging the entries for each couple of ax and axt - if any(lst_legend): - if lst_axt is None: # with no axt, only axs in ax needs a legend - for i, axs in enumerate(lst_ax): - axs.legend(loc=lst_legend[i], ncol=ncol_leg) - else: # merge the legend for each couple of ax and axt - i = 0 - for axs, axst in zip(lst_ax, lst_axt): - hnd_ax, lab_ax = axs.get_legend_handles_labels() - hnd_axt, lab_axt = axst.get_legend_handles_labels() - axs.legend( - hnd_ax + hnd_axt, lab_ax + lab_axt, loc=lst_legend[i], ncol=ncol_leg - ) - i += 1 - try: - fig.align_labels() # align labels of subplots, needed only for multi plot - except AttributeError: - print("align_labels not performed") - # if a subfolder is specified, create the subfolder inside the output - # folder if not already there and save the figure in it - if subfolder is not None: - out_path = plib.Path(out_path, subfolder) # update out_path - plib.Path(out_path).mkdir(parents=True, exist_ok=True) # check if - # folder is there, if not create it - # set figure margins and save the figure in the output folder - if set_size_inches: - fig.set_size_inches(set_size_inches) - if tight_layout is False: # if margins are given sets margins and save - fig.subplots_adjust(*fig_adj_par[0:6]) # set margins - plt.savefig( - plib.Path(out_path, filename + ".png"), dpi=300, transparent=transparency - ) - if pdf is not False: # save also as pdf - plt.savefig(plib.Path(out_path, filename + ".pdf")) - if svg is not False: # save also as pdf - plt.savefig(plib.Path(out_path, filename + ".svg")) - if eps is not False: # save also as pdf - plt.savefig(plib.Path(out_path, filename + ".eps")) - else: # margins are not given, use a tight layout option and save - plt.savefig( - plib.Path(out_path, filename + ".png"), - bbox_inches="tight", - dpi=300, - transparent=transparency, - ) - if pdf is not False: # save also as pdf - plt.savefig(plib.Path(out_path, filename + ".pdf"), bbox_inches="tight") - if svg is not False: # save also as pdf - plt.savefig(plib.Path(out_path, filename + ".svg"), bbox_inches="tight") - if eps is not False: # save also as pdf - plt.savefig(plib.Path(out_path, filename + ".eps"), bbox_inches="tight") - # add the title after saving, so it's only visible in the console - if title is True: - lst_ax[0].annotate( - filename, - xycoords="axes fraction", - size="small", - xy=(0, 0), - xytext=(0.05, 0.95), - clip_on=True, - ) + # Sort columns using the custom key + sorted_columns = sorted(unsorted_df.columns, key=sort_key) + sorted_df = unsorted_df.reindex(sorted_columns, axis=1) + sorted_df.index.name = "comp_name" + # Reindex the DataFrame with the sorted columns + return sorted_df def name_to_properties( comp_name: str, - df: pd.DataFrame, dict_classes_to_codes: dict[str:str], dict_classes_to_mass_fractions: dict[str:float], -): + df: pd.DataFrame | None = None, + precision_sum_elements: float = 0.05, + precision_sum_functional_group: float = 0.05, +) -> pd.DataFrame | None: """ used to retrieve chemical properties of the compound indicated by the comp_name and to store those properties in the df @@ -547,29 +107,18 @@ def name_to_properties( """ # classes used to split compounds into functional groups - cond = True - while cond: # to deal with HTML issues on server sides (timeouts) - try: - # comp contains all info about the chemical from pubchem - try: - comp_inside_list = pcp.get_compounds(comp_name, "name") - except ValueError: - print(f"{comp_name = }") - if comp_inside_list: - comp = comp_inside_list[0] + comp = get_compound_from_pubchempy(comp_name) + + if comp is None: + if not isinstance(comp_name, str): + return df + else: + if not comp_name or comp_name.isspace(): + return df else: - print( - "WARNING: name_to_properties ", - comp_name, - " does not find an entry in pcp", - ) - df.loc[comp_name, "iupac_name"] = "unidentified" + if df is not None: + df.loc[comp_name, "iupac_name"] = "unidentified" return df - cond = False - except pcp.PubChemHTTPError: # timeout error, simply try again - print("Caught: pcp.PubChemHTTPError") - - # fill the df with the data if df is None: df = pd.DataFrame(dtype=float) try: @@ -586,19 +135,38 @@ def name_to_properties( TypeError ): # float() argument must be a string or a real number, not 'NoneType' df.loc[comp_name, "xlogp"] = np.nan - # count all atoms presence and compoute mass percentage elements = set(comp.to_dict()["elements"]) + el_dict = {} + el_mf_dict = {} + for el in elements: el_count = comp.to_dict()["elements"].count(el) el_mass = ele.element_from_symbol(el).mass - if not "el_" + el in df: - df["el_" + el] = 0 - df["el_mf_" + el] = 0.0 - df.loc[comp_name, "el_" + el] = int(el_count) - df.loc[comp_name, "el_mf_" + el] = ( + + # Using similar logic as in the fg_dict example + if el not in el_dict: + el_dict[el] = 0 + el_mf_dict[el] = 0.0 + + el_dict[el] += int(el_count) + el_mf_dict[el] += ( float(el_count) * float(el_mass) / float(comp.molecular_weight) ) - + # Now, update the DataFrame in a similar way to the fg_dict example + for key, value in el_dict.items(): + df.at[comp_name, f"el_{key}"] = int(value) + + for key, value in el_mf_dict.items(): + df.at[comp_name, f"el_{key}"] = float(value) + cols_el_mf = [col for col in df.columns if col.startswith("el_mf")] + residual_els = df.loc[comp_name, cols_el_mf].sum() - 1 + # check element sum + try: + assert residual_els <= precision_sum_elements + except AssertionError: + raise AssertionError( + f"the total mass fraction of elements in {comp_name =} is > 0.001" + ) # apply fragmentation using the Fragmenter class (thanks simonmb) frg = Fragmenter( dict_classes_to_codes, @@ -606,108 +174,60 @@ def name_to_properties( algorithm="simple", ) fragmentation, _, _ = frg.fragment(comp.canonical_smiles) - classes = list(fragmentation.keys()) - classes_mf = ["mf_" + cl for cl in classes] - # df is the intermediate df for classes that helps with sums of - # similar classes (ex. there are 27 different configs for ketones that - # go in the same final class) - - newdf = pd.DataFrame( - 0, columns=classes + classes_mf, index=[comp_name], dtype=float - ) - - # print(f"{df.loc[comp_name, :]}") - # mol_weight = df.loc[comp_name, "molecular_weight"] - - for cl in classes: # get counts and mf of each class in compound - newdf.loc[comp_name, cl] = fragmentation[cl] # counts in + fg_dict = {} + fg_mf_dict = {} + # Iterate over each item in the dictionary + for key, value in fragmentation.items(): + # Determine the root key (the part before an underscore, if present) + root_key = key.split("_")[0] + # if root_key in hetero_atoms: + # pass + # Check if the root key is in the sum_dict; if not, initialize it + if root_key not in fg_dict: + fg_dict[root_key] = 0 + fg_mf_dict[root_key] = 0 + # Add the value to the corresponding root key in the sum_dict + fg_dict[root_key] += int(fragmentation[key]) + fg_mf_dict[root_key] += ( + float(fragmentation[key]) + * float(dict_classes_to_mass_fractions[key]) + / df.loc[comp_name, "molecular_weight"].astype(float) + ) # mass fraction of total + + # Update df with fg_dict + for key, value in fg_dict.items(): + df.at[comp_name, f"fg_{key}"] = int(value) # Update the cell + # Update df with fg_mf_dict + for key, value in fg_mf_dict.items(): + df.at[comp_name, f"fg_mf_{key}"] = float(value) # Update the cell + cols_fg_mf = [col for col in df.columns if col.startswith("fg_mf")] + residual_fgs = df.loc[comp_name, cols_fg_mf].sum() - 1 try: - for cl in classes: # get counts and mf of each class in compound - newdf.loc[comp_name, "mf_" + cl] = ( - float(fragmentation[cl]) - * float(dict_classes_to_mass_fractions[cl]) - / df.loc[comp_name, "molecular_weight"].astype(float) - ) # mass fraction of total - except ValueError: - print(f"{comp_name = }") - # classes that must be summed and considered a single one are identified - # by the same name followed by _#. if _ is in a class, its not unique - unique_classes = [c if "_" not in c else c.split("_")[0] for c in classes] - for unique_cl in unique_classes: # sum classes that must be merged - sum_cls = [k for k in classes if unique_cl in k] # classes to be summed - occurr = 0 # counts, or occurrencies - cl_mf = 0.0 # class mass fracations - for cl in sum_cls: # for each class that must be summed - occurr += newdf.loc[comp_name, cl].astype(int) # sum counts - cl_mf += newdf.loc[comp_name, "mf_" + cl].astype( - float - ) # sum mass fractions - if not "fg_" + unique_cl in df: # create columns if missing - df["fg_" + unique_cl] = 0 - df["fg_mf_" + unique_cl] = 0.0 - df.loc[comp_name, "fg_" + unique_cl] = occurr # put values in DF - df.loc[comp_name, "fg_mf_" + unique_cl] = float(cl_mf) - # heteroatoms and Si are considered functional groups as they usually - # enter the discussion in a similar way. The atom count is used here - hetero_atoms = [e for e in elements if e not in ["H", "C", "O", "N", "Si"]] - - if hetero_atoms is not None: - for ha in hetero_atoms: - ha_col = "el_" + ha - ha_mf_col = "el_mf_" + ha - fg_col = "fg_" + ha - fg_mf_col = "fg_mf_" + ha - - # Initialize columns if they don't exist - if fg_col not in df.columns: - df[fg_col] = 0 - if fg_mf_col not in df.columns: - df[fg_mf_col] = 0.0 - - # Aggregate counts and mass fractions for hetero atoms - if ha in elements: # Ensure the element is present before processing - df.loc[comp_name, fg_col] = df.loc[comp_name, ha_col].astype(int) - df.loc[comp_name, fg_mf_col] = df.loc[comp_name, ha_mf_col] - # Handle hetero atoms sum separately if needed - if hetero_atoms: - fg_columns = ["fg_" + e for e in hetero_atoms] - fg_mf_columns = ["fg_mf_" + e for e in hetero_atoms] - - # Handle case when selection returns a Series or a single value - if isinstance(df.loc[comp_name, fg_columns], pd.Series): - fg_sum = df.loc[comp_name, fg_columns].astype(int).sum() - else: # If it's not a Series, it could be a single value (if only one column is selected) - fg_sum = df.loc[comp_name, fg_columns].astype(int) - - df.loc[comp_name, "fg_hetero_atoms"] = fg_sum - - # For 'fg_mf_hetero_atoms', assuming you want to assign the value directly - # Here, you might need to handle single/multiple selections differently based on your needs - if isinstance(df.loc[comp_name, fg_mf_columns], pd.Series): - # This assumes you want to somehow aggregate or select from the Series - # Example: selecting the first element if there are multiple. Adjust as needed. - df.loc[comp_name, "fg_mf_hetero_atoms"] = df.loc[ - comp_name, fg_mf_columns - ].iloc[0] - else: - # Direct assignment if it's a single value - df.loc[comp_name, "fg_mf_hetero_atoms"] = df.loc[ - comp_name, fg_mf_columns - ] - df["fg_hetero_atoms"] = df["fg_hetero_atoms"].fillna(0).astype("int64") - df["fg_mf_hetero_atoms"] = df["fg_mf_hetero_atoms"].fillna(0).astype(float) - # Ensure Si is handled correctly if present - if "Si" in elements: - df.loc[comp_name, "fg_Si"] = df.loc[comp_name, "el_Si"].astype(int) - df.loc[comp_name, "fg_mf_Si"] = df.loc[comp_name, "el_mf_Si"] - - fg_mf_cols = [c for c in list(df) if "fg_mf" in c and c != "fg_mf_total"] - df["fg_mf_total"] = df.loc[comp_name, fg_mf_cols].sum() - print("\tInfo: name_to_properties ", comp_name) + assert residual_fgs <= precision_sum_functional_group + except AssertionError: + print(f"{df.loc[comp_name, cols_fg_mf].sum()=}") + raise AssertionError( + f"the total mass fraction of functional groups in {comp_name =} is > 0.05" + ) + if residual_fgs < -precision_sum_functional_group: + df.at[comp_name, f"fg_mf_unclassified"] = abs(residual_fgs) + df.loc[df["iupac_name"] != "unidentified"] = df.loc[ + df["iupac_name"] != "unidentified" + ].fillna(0) + df = _order_columns_in_compounds_properties(df) + return df +# %% def get_iupac_from_pcp(comp_name: str) -> str: + """get iupac name for compound using pubchempy, needs internet connection + + :param comp_name: _description_ + :type comp_name: str + :return: lowercase iupac name for the compound + :rtype: str + """ cond = True while cond: # to deal with HTML issues on server sides (timeouts) try: @@ -786,821 +306,6 @@ def report_difference(rep1, rep2, diff_type="absolute"): return dif_ave, dif_std, dif_stdp -def _annotate_outliers_in_plot(ax, df_ave, df_std, y_lim): - """ - Annotates the bars in a bar plot with their average value and standard - deviation if these values exceed the specified y-axis limits. - The function iterates over the bars in the plot and checks if their average - values, considering their standard deviations, are outside the provided - y-axis limits. For such bars, it annotates the average and standard - deviation on the - plot, using a specific format for better visualization and understanding. - - Parameters - ---------- - ax : matplotlib.axes.Axes - The matplotlib Axes object where the plot is drawn. - df_ave : pandas.DataFrame - DataFrame containing the average values used in the plot. - df_std : pandas.DataFrame - DataFrame containing the standard deviation values corresponding - to df_ave. - y_lim : list of [float, float] - A list of two floats representing the minimum (y_lim[0]) and - maximum (y_lim[1]) limits of the y-axis. - - Returns - ------- - None - Modifies the provided Axes object (ax) by adding annotations. - - """ - dx = 0.15 * len(df_ave.index) - dy = 0.04 - tform = blended_transform_factory(ax.transData, ax.transAxes) - dfao = pd.DataFrame(columns=["H/L", "xpos", "ypos", "ave", "std", "text"]) - dfao["ave"] = df_ave.transpose().to_numpy().flatten().tolist() - if df_std.empty: - df_std = np.zeros(len(dfao["ave"])) - else: - dfao["std"] = df_std.transpose().to_numpy().flatten().tolist() - try: - dfao["xpos"] = [p.get_x() + p.get_width() / 2 for p in ax.patches] - except ValueError: # otherwise the masking adds twice the columns - dfao["xpos"] = [ - p.get_x() + p.get_width() / 2 for p in ax.patches[: len(ax.patches) // 2] - ] - cond = (dfao["ave"] < y_lim[0]) | (dfao["ave"] > y_lim[1]) - dfao = dfao.drop(dfao[~cond].index) - for ao in dfao.index.tolist(): # loop through bars - if dfao.loc[ao, "ave"] == float("inf"): - dfao.loc[ao, "text"] = "inf" - dfao.loc[ao, "H/L"] = "H" - elif dfao.loc[ao, "ave"] == float("-inf"): - dfao.loc[ao, "text"] = "-inf" - dfao.loc[ao, "H/L"] = "L" - elif dfao.loc[ao, "ave"] > y_lim[1]: - dfao.loc[ao, "H/L"] = "H" - dfao.loc[ao, "text"] = "{:.2f}".format( - round(dfao.loc[ao, "ave"], 2) - ).strip() - if (dfao.loc[ao, "std"] != 0) & (~np.isnan(dfao.loc[ao, "std"])): - dfao.loc[ao, "text"] += r"$\pm$" + "{:.2f}".format( - round(dfao.loc[ao, "std"], 2) - ) - elif dfao.loc[ao, "ave"] < y_lim[0]: - dfao.loc[ao, "H/L"] = "L" - dfao.loc[ao, "text"] = str(round(dfao.loc[ao, "ave"], 2)).strip() - if dfao.loc[ao, "std"] != 0: - dfao.loc[ao, "text"] += r"$\pm$" + "{:.2f}".format( - round(dfao.loc[ao, "std"], 2) - ) - else: - print("Something is wrong", dfao.loc[ao, "ave"]) - for hl, ypos, dy in zip(["L", "H"], [0.02, 0.98], [0.04, -0.04]): - dfao1 = dfao[dfao["H/L"] == hl] - dfao1["ypos"] = ypos - if not dfao1.empty: - dfao1 = dfao1.sort_values("xpos", ascending=True) - dfao1["diffx"] = ( - np.diff(dfao1["xpos"].values, prepend=dfao1["xpos"].values[0]) < dx - ) - dfao1.reset_index(inplace=True) - - for i in dfao1.index.tolist()[1:]: - dfao1.loc[i, "ypos"] = ypos - for e in range(i, 0, -1): - if dfao1.loc[e, "diffx"]: - dfao1.loc[e, "ypos"] += dy - else: - break - for ao in dfao1.index.tolist(): - ax.annotate( - dfao1.loc[ao, "text"], - xy=(dfao1.loc[ao, "xpos"], 0), - xycoords=tform, - textcoords=tform, - xytext=(dfao1.loc[ao, "xpos"], dfao1.loc[ao, "ypos"]), - fontsize=9, - ha="center", - va="center", - bbox={ - "boxstyle": "square,pad=0", - "edgecolor": None, - "facecolor": "white", - "alpha": 0.7, - }, - ) - - -class Fragmenter: - """ - Class taken from https://github.com/simonmb/fragmentation_algorithm. - The original version of this algorithm was published in: - "Flexible Heuristic Algorithm for Automatic Molecule Fragmentation: - Application to the UNIFAC Group Contribution Model - DOI: 10.1186/s13321-019-0382-39." - MIT License - - ... - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - - # tested with Python 3.8.8 and RDKit version 2021.09.4 - - # does a substructure match and then checks whether the match - # is adjacent to previous matches - @classmethod - def get_substruct_matches( - cls, - mol_searched_for, - mol_searched_in, - atomIdxs_to_which_new_matches_have_to_be_adjacent, - ): - - valid_matches = [] - - if mol_searched_in.GetNumAtoms() >= mol_searched_for.GetNumAtoms(): - matches = mol_searched_in.GetSubstructMatches(mol_searched_for) - - if matches: - for match in matches: - add_this_match = True - if len(atomIdxs_to_which_new_matches_have_to_be_adjacent) > 0: - add_this_match = False - - for i in match: - for neighbor in mol_searched_in.GetAtomWithIdx( - i - ).GetNeighbors(): - if ( - neighbor.GetIdx() - in atomIdxs_to_which_new_matches_have_to_be_adjacent - ): - add_this_match = True - break - - if add_this_match: - valid_matches.append(match) - - return valid_matches - - # count heavier isotopes of hydrogen correctly - @classmethod - def get_heavy_atom_count(cls, mol): - heavy_atom_count = 0 - for atom in mol.GetAtoms(): - if atom.GetAtomicNum() != 1: - heavy_atom_count += 1 - - return heavy_atom_count - - def __init__( - self, - fragmentation_scheme={}, - fragmentation_scheme_order=None, - match_hydrogens=False, - algorithm="", - n_atoms_cuttoff=-1, - function_to_choose_fragmentation=False, - n_max_fragmentations_to_find=-1, - ): - - if not type(fragmentation_scheme) is dict: - raise TypeError( - "fragmentation_scheme must be a dctionary with integers as keys and either strings or list of strings as values." - ) - - if len(fragmentation_scheme) == 0: - raise ValueError("fragmentation_scheme must be provided.") - - if not algorithm in ["simple", "complete", "combined"]: - raise ValueError("Algorithm must be either simple ,complete or combined.") - - if algorithm == "simple": - if n_max_fragmentations_to_find != -1: - raise ValueError( - "Setting n_max_fragmentations_to_find only makes sense with complete or combined algorithm." - ) - - self.algorithm = algorithm - - if algorithm in ["combined", "complete"]: - if n_atoms_cuttoff == -1: - raise ValueError( - "n_atoms_cuttoff needs to be specified for complete or combined algorithms." - ) - - if function_to_choose_fragmentation == False: - raise ValueError( - "function_to_choose_fragmentation needs to be specified for complete or combined algorithms." - ) - - if not callable(function_to_choose_fragmentation): - raise TypeError( - "function_to_choose_fragmentation needs to be a function." - ) - else: - if type(function_to_choose_fragmentation([{}, {}])) != dict: - raise TypeError( - "function_to_choose_fragmentation needs to take a list of fragmentations and choose one of it" - ) - - if n_max_fragmentations_to_find != -1: - if n_max_fragmentations_to_find < 1: - raise ValueError( - "n_max_fragmentations_to_find has to be 1 or higher." - ) - - if fragmentation_scheme_order is None: - fragmentation_scheme_order = [] - - if algorithm in ["simple", "combined"]: - assert len(fragmentation_scheme) == len(fragmentation_scheme_order) - else: - fragmentation_scheme_order = [key for key in fragmentation_scheme.keys()] - - self.n_max_fragmentations_to_find = n_max_fragmentations_to_find - - self.n_atoms_cuttoff = n_atoms_cuttoff - - self.match_hydrogens = match_hydrogens - - self.fragmentation_scheme = fragmentation_scheme - - self.function_to_choose_fragmentation = function_to_choose_fragmentation - - # create a lookup dictionaries to faster finding a group number - self._fragmentation_scheme_group_number_lookup = {} - self._fragmentation_scheme_pattern_lookup = {} - self.fragmentation_scheme_order = fragmentation_scheme_order - - for group_number, list_SMARTS in fragmentation_scheme.items(): - - if type(list_SMARTS) is not list: - list_SMARTS = [list_SMARTS] - - for SMARTS in list_SMARTS: - if SMARTS != "": - self._fragmentation_scheme_group_number_lookup[SMARTS] = ( - group_number - ) - - mol_SMARTS = Chem.MolFromSmarts(SMARTS) - self._fragmentation_scheme_pattern_lookup[SMARTS] = mol_SMARTS - - def fragment(self, SMILES_or_molecule): - - if type(SMILES_or_molecule) is str: - mol_SMILES = Chem.MolFromSmiles(SMILES_or_molecule) - mol_SMILES = Chem.AddHs(mol_SMILES) if self.match_hydrogens else mol_SMILES - is_valid_SMILES = mol_SMILES is not None - - if not is_valid_SMILES: - raise ValueError("Following SMILES is not valid: " + SMILES_or_molecule) - - else: - mol_SMILES = SMILES_or_molecule - - # iterate over all separated molecules - success = [] - fragmentation = {} - fragmentation_matches = {} - for mol in rdmolops.GetMolFrags(mol_SMILES, asMols=True): - - this_mol_fragmentation, this_mol_success = self.__get_fragmentation(mol) - - for SMARTS, matches in this_mol_fragmentation.items(): - group_number = self._fragmentation_scheme_group_number_lookup[SMARTS] - - if not group_number in fragmentation: - fragmentation[group_number] = 0 - fragmentation_matches[group_number] = [] - - fragmentation[group_number] += len(matches) - fragmentation_matches[group_number].extend(matches) - - success.append(this_mol_success) - - return fragmentation, all(success), fragmentation_matches - - def fragment_complete(self, SMILES_or_molecule): - - if type(SMILES_or_molecule) is str: - mol_SMILES = Chem.MolFromSmiles(SMILES_or_molecule) - mol_SMILES = Chem.AddHs(mol_SMILES) if self.match_hydrogens else mol_SMILES - is_valid_SMILES = mol_SMILES is not None - - if not is_valid_SMILES: - raise ValueError("Following SMILES is not valid: " + SMILES_or_molecule) - - else: - mol_SMILES = SMILES_or_molecule - - if len(rdmolops.GetMolFrags(mol_SMILES)) != 1: - raise ValueError( - "fragment_complete does not accept multifragment molecules." - ) - - temp_fragmentations, success = self.__complete_fragmentation(mol_SMILES) - - fragmentations = [] - fragmentations_matches = [] - for temp_fragmentation in temp_fragmentations: - fragmentation = {} - fragmentation_matches = {} - for SMARTS, matches in temp_fragmentation.items(): - group_number = self._fragmentation_scheme_group_number_lookup[SMARTS] - - fragmentation[group_number] = len(matches) - fragmentation_matches[group_number] = matches - - fragmentations.append(fragmentation) - fragmentations_matches.append(fragmentation_matches) - - return fragmentations, success, fragmentations_matches - - def __get_fragmentation(self, mol_SMILES): - - success = False - fragmentation = {} - if self.algorithm in ["simple", "combined"]: - fragmentation, success = self.__simple_fragmentation(mol_SMILES) - - if success: - return fragmentation, success - - if self.algorithm in ["combined", "complete"]: - fragmentations, success = self.__complete_fragmentation(mol_SMILES) - - if success: - fragmentation = self.function_to_choose_fragmentation(fragmentations) - - return fragmentation, success - - def __simple_fragmentation(self, mol_SMILES): - - if self.match_hydrogens: - target_atom_count = len(mol_SMILES.GetAtoms()) - else: - target_atom_count = Fragmenter.get_heavy_atom_count(mol_SMILES) - - success = False - fragmentation = {} - - fragmentation, atomIdxs_included_in_fragmentation = ( - self.__search_non_overlapping_solution(mol_SMILES, {}, set(), set()) - ) - success = len(atomIdxs_included_in_fragmentation) == target_atom_count - - # if not successful, clean up molecule and search again - level = 1 - while not success: - fragmentation_so_far, atomIdxs_included_in_fragmentation_so_far = ( - Fragmenter.__clean_molecule_surrounding_unmatched_atoms( - mol_SMILES, fragmentation, atomIdxs_included_in_fragmentation, level - ) - ) - level += 1 - - if len(atomIdxs_included_in_fragmentation_so_far) == 0: - break - - fragmentation_so_far, atomIdxs_included_in_fragmentation_so_far = ( - self.__search_non_overlapping_solution( - mol_SMILES, - fragmentation_so_far, - atomIdxs_included_in_fragmentation_so_far, - atomIdxs_included_in_fragmentation_so_far, - ) - ) - - success = ( - len(atomIdxs_included_in_fragmentation_so_far) == target_atom_count - ) - - if success: - fragmentation = fragmentation_so_far - - return fragmentation, success - - def __search_non_overlapping_solution( - self, - mol_searched_in, - fragmentation, - atomIdxs_included_in_fragmentation, - atomIdxs_to_which_new_matches_have_to_be_adjacent, - ): - - n_atomIdxs_included_in_fragmentation = ( - len(atomIdxs_included_in_fragmentation) - 1 - ) - - while n_atomIdxs_included_in_fragmentation != len( - atomIdxs_included_in_fragmentation - ): - n_atomIdxs_included_in_fragmentation = len( - atomIdxs_included_in_fragmentation - ) - - for group_number in self.fragmentation_scheme_order: - list_SMARTS = self.fragmentation_scheme[group_number] - if type(list_SMARTS) is not list: - list_SMARTS = [list_SMARTS] - - for SMARTS in list_SMARTS: - if SMARTS != "": - fragmentation, atomIdxs_included_in_fragmentation = ( - self.__get_next_non_overlapping_match( - mol_searched_in, - SMARTS, - fragmentation, - atomIdxs_included_in_fragmentation, - atomIdxs_to_which_new_matches_have_to_be_adjacent, - ) - ) - - return fragmentation, atomIdxs_included_in_fragmentation - - def __get_next_non_overlapping_match( - self, - mol_searched_in, - SMARTS, - fragmentation, - atomIdxs_included_in_fragmentation, - atomIdxs_to_which_new_matches_have_to_be_adjacent, - ): - - mol_searched_for = self._fragmentation_scheme_pattern_lookup[SMARTS] - - if atomIdxs_to_which_new_matches_have_to_be_adjacent: - matches = Fragmenter.get_substruct_matches( - mol_searched_for, - mol_searched_in, - atomIdxs_to_which_new_matches_have_to_be_adjacent, - ) - else: - matches = Fragmenter.get_substruct_matches( - mol_searched_for, mol_searched_in, set() - ) - - if matches: - for match in matches: - all_atoms_of_new_match_are_unassigned = ( - atomIdxs_included_in_fragmentation.isdisjoint(match) - ) - - if all_atoms_of_new_match_are_unassigned: - if not SMARTS in fragmentation: - fragmentation[SMARTS] = [] - - fragmentation[SMARTS].append(match) - atomIdxs_included_in_fragmentation.update(match) - - return fragmentation, atomIdxs_included_in_fragmentation - - @classmethod - def __clean_molecule_surrounding_unmatched_atoms( - cls, mol_searched_in, fragmentation, atomIdxs_included_in_fragmentation, level - ): - - for i in range(0, level): - - atoms_missing = set( - range(0, Fragmenter.get_heavy_atom_count(mol_searched_in)) - ).difference(atomIdxs_included_in_fragmentation) - - new_fragmentation = marshal.loads(marshal.dumps(fragmentation)) - - for atomIdx in atoms_missing: - for neighbor in mol_searched_in.GetAtomWithIdx(atomIdx).GetNeighbors(): - for smart, atoms_found in fragmentation.items(): - for atoms in atoms_found: - if neighbor.GetIdx() in atoms: - if smart in new_fragmentation: - if new_fragmentation[smart].count(atoms) > 0: - new_fragmentation[smart].remove(atoms) - - if smart in new_fragmentation: - if len(new_fragmentation[smart]) == 0: - new_fragmentation.pop(smart) - - new_atomIdxs_included_in_fragmentation = set() - for i in new_fragmentation.values(): - for j in i: - new_atomIdxs_included_in_fragmentation.update(j) - - atomIdxs_included_in_fragmentation = new_atomIdxs_included_in_fragmentation - fragmentation = new_fragmentation - - return fragmentation, atomIdxs_included_in_fragmentation - - def __complete_fragmentation(self, mol_SMILES): - - heavy_atom_count = Fragmenter.get_heavy_atom_count(mol_SMILES) - - if heavy_atom_count > self.n_atoms_cuttoff: - return {}, False - - completed_fragmentations = [] - groups_leading_to_incomplete_fragmentations = [] - ( - completed_fragmentations, - groups_leading_to_incomplete_fragmentations, - incomplete_fragmentation_found, - ) = self.__get_next_non_overlapping_adjacent_match_recursively( - mol_SMILES, - heavy_atom_count, - completed_fragmentations, - groups_leading_to_incomplete_fragmentations, - {}, - set(), - set(), - self.n_max_fragmentations_to_find, - ) - success = len(completed_fragmentations) > 0 - - return completed_fragmentations, success - - def __get_next_non_overlapping_adjacent_match_recursively( - self, - mol_searched_in, - heavy_atom_count, - completed_fragmentations, - groups_leading_to_incomplete_fragmentations, - fragmentation_so_far, - atomIdxs_included_in_fragmentation_so_far, - atomIdxs_to_which_new_matches_have_to_be_adjacent, - n_max_fragmentations_to_find=-1, - ): - - n_completed_fragmentations = len(completed_fragmentations) - incomplete_fragmentation_found = False - complete_fragmentation_found = False - - if len(completed_fragmentations) == n_max_fragmentations_to_find: - return ( - completed_fragmentations, - groups_leading_to_incomplete_fragmentations, - incomplete_fragmentation_found, - ) - - for group_number in self.fragmentation_scheme_order: - list_SMARTS = self.fragmentation_scheme[group_number] - - if complete_fragmentation_found: - break - - if type(list_SMARTS) is not list: - list_SMARTS = [list_SMARTS] - - for SMARTS in list_SMARTS: - if complete_fragmentation_found: - break - - if SMARTS != "": - matches = Fragmenter.get_substruct_matches( - self._fragmentation_scheme_pattern_lookup[SMARTS], - mol_searched_in, - atomIdxs_included_in_fragmentation_so_far, - ) - - for match in matches: - - # only allow non-overlapping matches - all_atoms_are_unassigned = ( - atomIdxs_included_in_fragmentation_so_far.isdisjoint(match) - ) - if not all_atoms_are_unassigned: - continue - - # only allow matches that do not contain groups leading to incomplete matches - for ( - groups_leading_to_incomplete_fragmentation - ) in groups_leading_to_incomplete_fragmentations: - if Fragmenter.__is_fragmentation_subset_of_other_fragmentation( - groups_leading_to_incomplete_fragmentation, - fragmentation_so_far, - ): - return ( - completed_fragmentations, - groups_leading_to_incomplete_fragmentations, - incomplete_fragmentation_found, - ) - - # only allow matches that will lead to new fragmentations - use_this_match = True - n_found_groups = len(fragmentation_so_far) - - for completed_fragmentation in completed_fragmentations: - - if not SMARTS in completed_fragmentation: - continue - - if n_found_groups == 0: - use_this_match = not Fragmenter.__is_match_contained_in_fragmentation( - match, SMARTS, completed_fragmentation - ) - else: - if Fragmenter.__is_fragmentation_subset_of_other_fragmentation( - fragmentation_so_far, completed_fragmentation - ): - use_this_match = not Fragmenter.__is_match_contained_in_fragmentation( - match, SMARTS, completed_fragmentation - ) - - if not use_this_match: - break - - if not use_this_match: - continue - - # make a deepcopy here, otherwise the variables are modified down the road - # marshal is used here because it works faster than copy.deepcopy - this_SMARTS_fragmentation_so_far = marshal.loads( - marshal.dumps(fragmentation_so_far) - ) - this_SMARTS_atomIdxs_included_in_fragmentation_so_far = ( - atomIdxs_included_in_fragmentation_so_far.copy() - ) - - if not SMARTS in this_SMARTS_fragmentation_so_far: - this_SMARTS_fragmentation_so_far[SMARTS] = [] - - this_SMARTS_fragmentation_so_far[SMARTS].append(match) - this_SMARTS_atomIdxs_included_in_fragmentation_so_far.update( - match - ) - - # only allow matches that do not contain groups leading to incomplete matches - for ( - groups_leading_to_incomplete_match - ) in groups_leading_to_incomplete_fragmentations: - if Fragmenter.__is_fragmentation_subset_of_other_fragmentation( - groups_leading_to_incomplete_match, - this_SMARTS_fragmentation_so_far, - ): - use_this_match = False - break - - if not use_this_match: - continue - - # if the complete molecule has not been fragmented, continue to do so - if ( - len(this_SMARTS_atomIdxs_included_in_fragmentation_so_far) - < heavy_atom_count - ): - ( - completed_fragmentations, - groups_leading_to_incomplete_fragmentations, - incomplete_fragmentation_found, - ) = self.__get_next_non_overlapping_adjacent_match_recursively( - mol_searched_in, - heavy_atom_count, - completed_fragmentations, - groups_leading_to_incomplete_fragmentations, - this_SMARTS_fragmentation_so_far, - this_SMARTS_atomIdxs_included_in_fragmentation_so_far, - this_SMARTS_atomIdxs_included_in_fragmentation_so_far, - n_max_fragmentations_to_find, - ) - break - - # if the complete molecule has been fragmented, save and return - if ( - len(this_SMARTS_atomIdxs_included_in_fragmentation_so_far) - == heavy_atom_count - ): - completed_fragmentations.append( - this_SMARTS_fragmentation_so_far - ) - complete_fragmentation_found = True - break - - # if until here no new fragmentation was found check whether an incomplete fragmentation was found - if n_completed_fragmentations == len(completed_fragmentations): - - if not incomplete_fragmentation_found: - - incomplete_matched_groups = {} - - if len(atomIdxs_included_in_fragmentation_so_far) > 0: - unassignes_atom_idx = set(range(0, heavy_atom_count)).difference( - atomIdxs_included_in_fragmentation_so_far - ) - for atom_idx in unassignes_atom_idx: - neighbor_atoms_idx = [ - i.GetIdx() - for i in mol_searched_in.GetAtomWithIdx( - atom_idx - ).GetNeighbors() - ] - - for neighbor_atom_idx in neighbor_atoms_idx: - for ( - found_smarts, - found_matches, - ) in fragmentation_so_far.items(): - for found_match in found_matches: - if neighbor_atom_idx in found_match: - if ( - not found_smarts - in incomplete_matched_groups - ): - incomplete_matched_groups[found_smarts] = [] - - if ( - found_match - not in incomplete_matched_groups[ - found_smarts - ] - ): - incomplete_matched_groups[ - found_smarts - ].append(found_match) - - is_subset_of_groups_already_found = False - indexes_to_remove = [] - - for idx, groups_leading_to_incomplete_match in enumerate( - groups_leading_to_incomplete_fragmentations - ): - is_subset_of_groups_already_found = ( - Fragmenter.__is_fragmentation_subset_of_other_fragmentation( - incomplete_matched_groups, - groups_leading_to_incomplete_match, - ) - ) - if is_subset_of_groups_already_found: - indexes_to_remove.append(idx) - - for index in sorted(indexes_to_remove, reverse=True): - del groups_leading_to_incomplete_fragmentations[index] - - groups_leading_to_incomplete_fragmentations.append( - incomplete_matched_groups - ) - groups_leading_to_incomplete_fragmentations = sorted( - groups_leading_to_incomplete_fragmentations, key=len - ) - - incomplete_fragmentation_found = True - - return ( - completed_fragmentations, - groups_leading_to_incomplete_fragmentations, - incomplete_fragmentation_found, - ) - - @classmethod - def __is_fragmentation_subset_of_other_fragmentation( - cls, fragmentation, other_fragmentation - ): - n_found_groups = len(fragmentation) - n_found_other_groups = len(other_fragmentation) - - if n_found_groups == 0: - return False - - if n_found_other_groups < n_found_groups: - return False - - n_found_SMARTS_that_are_subset = 0 - for found_SMARTS, _ in fragmentation.items(): - if found_SMARTS in other_fragmentation: - found_matches_set = set( - frozenset(i) for i in fragmentation[found_SMARTS] - ) - found_other_matches_set = set( - frozenset(i) for i in other_fragmentation[found_SMARTS] - ) - - if found_matches_set.issubset(found_other_matches_set): - n_found_SMARTS_that_are_subset += 1 - else: - return False - - return n_found_SMARTS_that_are_subset == n_found_groups - - @classmethod - def __is_match_contained_in_fragmentation(cls, match, SMARTS, fragmentation): - if not SMARTS in fragmentation: - return False - - found_matches_set = set(frozenset(i) for i in fragmentation[SMARTS]) - match_set = set(match) - - return match_set in found_matches_set - - class Project: """the class that contains all method and info to analyze the project (intended as a collection of GCMS files, calibrations, etc) @@ -2862,128 +1567,6 @@ def create_samples_param_aggrrep(self, param: str = "conc_vial_mg_L"): self.save_samples_param_aggrrep(param=param) return self.samples_aggrreps[param], self.samples_aggrreps_std[param] - # def create_samples_param_report(self, param="conc_vial_mg_L"): - # """Creates a detailed report for each parameter across all SAMPLES, - # displaying the concentration of each compound in each sample. - # This report aids in the analysis and comparison of compound - # concentrations across SAMPLES.""" - # print("Info: create_param_report: ", param) - # if param not in Project.acceptable_params: - # raise ValueError(f"{param = } is not an acceptable param") - # if not self.samples_created: - # self.create_samples_from_files() - # _all_comps = self.compounds_properties["iupac_name"].tolist() - # if self.deriv_files_present: - # _all_comps += self.deriv_compounds_properties["iupac_name"].tolist() - # rep = pd.DataFrame( - # index=list(set(_all_comps)), - # columns=list(self.samples_info.index), - # dtype="float", - # ) - # rep_std = pd.DataFrame( - # index=list(set(_all_comps)), - # columns=list(self.samples_info.index), - # dtype="float", - # ) - # rep.index.name, rep_std.index.name = param, param - - # for comp in rep.index.tolist(): # add conc values - # for samplename in rep.columns.tolist(): - # smp = self.samples[samplename].set_index("iupac_name") - # try: - # ave = smp.loc[comp, param] - # except KeyError: - # ave = 0 - # smp_std = self.samples_std[samplename].set_index("iupac_name") - # try: - # std = smp_std.loc[comp, param] - # except KeyError: - # std = np.nan - # rep.loc[comp, samplename] = ave - # rep_std.loc[comp, samplename] = std - - # rep = rep.sort_index(key=rep.max(1).get, ascending=False) - # rep = rep.loc[:, rep.any(axis=0)] # drop columns with only 0s - # rep = rep.loc[rep.any(axis=1), :] # drop rows with only 0s - # rep_std = rep_std.reindex(rep.index) - # self.samples_reports[param] = rep - # self.samples_reports_std[param] = rep_std - # self.list_of_samples_param_reports.append(param) - # if Project.auto_save_to_excel: - # self.save_samples_param_report(param=param) - # return rep, rep_std - - # def create_samples_param_aggrrep(self, param="conc_vial_mg_L"): - # """Aggregates compound concentration data by functional group for each - # parameter across all SAMPLES, providing a summarized view of functional - # group concentrations. This aggregation facilitates the understanding - # of functional group distribution across SAMPLES.""" - # print("Info: create_param_aggrrep: ", param) - # if param not in Project.acceptable_params: - # raise ValueError(f"{param = } is not an acceptable param") - # if param not in self.list_of_samples_param_reports: - # self.create_samples_param_report(param) - # # fg = functional groups, mf = mass fraction - # samplenames = self.samples_info.index.tolist() - # _all_comps = self.samples_reports[param].index.tolist() - # cols_with_fg_mf_labs = list(self.compounds_properties) - # if self.deriv_files_present: - # for c in list(self.deriv_compounds_properties): - # if c not in cols_with_fg_mf_labs: - # cols_with_fg_mf_labs.append(c) - # fg_mf_labs = [ - # c - # for c in cols_with_fg_mf_labs - # if c.startswith("fg_mf_") - # if c != "fg_mf_total" - # ] - # fg_labs = [c[6:] for c in fg_mf_labs] - # # create a df with iupac name index and fg_mf columns (underiv and deriv) - # comps_df = self.compounds_properties.set_index("iupac_name") - # if self.deriv_files_present: - # deriv_comps_df = self.deriv_compounds_properties.set_index("iupac_name") - # all_comps_df = pd.concat([comps_df, deriv_comps_df]) - # else: - # all_comps_df = comps_df - # all_comps_df = all_comps_df[~all_comps_df.index.duplicated(keep="first")] - # fg_mf_all = pd.DataFrame(index=_all_comps, columns=fg_mf_labs) - # for idx in fg_mf_all.index.tolist(): - # fg_mf_all.loc[idx, fg_mf_labs] = all_comps_df.loc[idx, fg_mf_labs] - # # create the aggregated dataframes and compute aggregated results - # aggrrep = pd.DataFrame(columns=samplenames, index=fg_labs, dtype="float") - # aggrrep.index.name = param # is the parameter - # aggrrep.fillna(0, inplace=True) - # aggrrep_std = pd.DataFrame(columns=samplenames, index=fg_labs, dtype="float") - # aggrrep_std.index.name = param # is the parameter - # aggrrep_std.fillna(0, inplace=True) - # for col in samplenames: - # list_iupac = self.samples_reports[param].index - # signal = self.samples_reports[param].loc[:, col].values - # signal_std = self.samples_reports_std[param].loc[:, col].values - # for fg, fg_mf in zip(fg_labs, fg_mf_labs): - # # each compound contributes to the cumulative sum of each - # # functional group for the based on the mass fraction it has - # # of that functional group (fg_mf act as weights) - # # if fg_mf in subrep: multiply signal for weight and sum - # # to get aggregated - # weights = fg_mf_all.loc[list_iupac, fg_mf].astype(signal.dtype) - - # aggrrep.loc[fg, col] = (signal * weights).sum() - # aggrrep_std.loc[fg, col] = (signal_std * weights).sum() - # aggrrep = aggrrep.loc[(aggrrep != 0).any(axis=1), :] # drop rows with only 0 - # aggrrep_std = aggrrep_std.reindex(aggrrep.index) - # aggrrep = aggrrep.sort_index( - # key=aggrrep[samplenames].max(1).get, ascending=False - # ) - # aggrrep_std = aggrrep_std.reindex(aggrrep.index) - - # self.samples_aggrreps[param] = aggrrep - # self.samples_aggrreps_std[param] = aggrrep_std - # self.list_of_samples_param_aggrreps.append(param) - # if Project.auto_save_to_excel: - # self.save_samples_param_aggrrep(param=param) - # return aggrrep, aggrrep_std - def save_files_info(self): """Saves the 'files_info' DataFrame as an Excel file in a 'files' subfolder within the project's output path, @@ -3081,366 +1664,3 @@ def save_samples_param_aggrrep(self, param="conc_inj_mg_L"): plib.Path(out_path, name + "_std.xlsx") ) print("Info: save_samples_param_aggrrep: ", name, " saved") - - def plot_ave_std( - self, - filename: str = "plot", - files_or_samples: str = "samples", - param: str = "conc_vial_mg_L", - aggr: bool = False, - min_y_thresh: float | None = None, - only_samples_to_plot: list[str] = None, - rename_samples: list[str] = None, - reorder_samples: list[str] = None, - item_to_color_to_hatch: pd.DataFrame | None = None, - paper_col=0.8, - fig_hgt_mlt=1.5, - xlab_rot=0, - annotate_outliers=True, - color_palette="deep", - y_lab=None, - y_lim=None, - y_ticks=None, - yt_sum=False, - yt_lim=None, - yt_lab=None, - yt_ticks=None, - yt_sum_label="total\n(right axis)", - legend_location="best", - legend_columns=1, - legend_x_anchor=1, - legend_y_anchor=1.02, - legend_labelspacing=0.5, - annotate_lttrs=False, - note_plt=None, - ): - """ - Generates a bar plot displaying average values with optional standard deviation - bars for a specified parameter from either files or samples. This function allows - for detailed customization of the plot, including aggregation by functional groups, - filtering based on minimum thresholds, renaming and reordering samples, and applying - specific color schemes and hatching patterns to items. - Additionally, it supports adjusting plot aesthetics such as size, figure height multiplier, - x-label rotation, and outlier annotation. The plot can include a secondary y-axis - to display the sum of values, with customizable limits, labels, ticks, and sum label. - The legend can be placed inside or outside the plot area, with adjustable location, - columns, anchor points, and label spacing. An optional note can be added to the plot - for additional context. - - Parameters: - - filename (str): Name for the output plot file. Default is 'plot'. - - files_or_samples (str): Specifies whether to plot data from 'files' - or 'samples'. Default is 'samples'. - - param (str): The parameter to plot, such as 'conc_vial_mg_L'. - Default is 'conc_vial_mg_L'. - - aggr (bool): Boolean indicating whether to aggregate data by functional groups. - Default is False, meaning no aggregation. - - min_y_thresh (float, optional): Minimum y-value threshold for including data in the plot. - Default is None, including all data. - - only_samples_to_plot (list, optional): List of samples to include in the plot. - Default is None, including all samples. - - rename_samples (dict, optional): Dictionary to rename samples in the plot. - Default is None, using original names. - - reorder_samples (list, optional): List specifying the order of samples in the plot. - Default is None, using original order. - - item_to_color_to_hatch (DataFrame, optional): DataFrame mapping items to specific colors and hatching patterns. - Default is None, using default colors and no hatching. - - paper_col (float): Background color of the plot area. Default is .8, a light grey. - - fig_hgt_mlt (float): Multiplier for the figure height to adjust plot size. Default is 1.5. - - xlab_rot (int): Rotation angle for x-axis labels. Default is 0, meaning no rotation. - - annotate_outliers (bool): Boolean indicating whether to annotate outliers exceeding y_lim. - Default is True. - - color_palette (str): Color palette for the plot. Default is 'deep'. - - y_lab (str, optional): Label for the y-axis. Default is None, using parameter name as label. - - y_lim (tuple[float, float], optional): Limits for the y-axis. Default is None, automatically determined. - - y_ticks (list[float], optional): Custom tick marks for the y-axis. Default is None, automatically determined. - - yt_sum (bool): Boolean indicating whether to display a sum on a secondary y-axis. Default is False. - - yt_lim (tuple[float, float], optional): Limits for the secondary y-axis. Default is None, automatically determined. - - yt_lab (str, optional): Label for the secondary y-axis. Default is None, using parameter name as label. - - yt_ticks (list[float], optional): Custom tick marks for the secondary y-axis. Default is None, automatically determined. - - yt_sum_label (str): Label for the sum on the secondary y-axis. Default is 'total (right axis)'. - - legend_location (str): Location of the legend within or outside the plot area. Default is 'best'. - - legend_columns (int): Number of columns in the legend. Default is 1. - - legend_x_anchor (float): X-anchor for the legend when placed outside the plot area. Default is 1. - - legend_y_anchor (float): Y-anchor for the legend when placed outside the plot area. Default is 1.02. - - legend_labelspacing (float): Spacing between labels in the legend. Default is 0.5. - - annotate_lttrs (bool): Boolean indicating whether to annotate letters for statistical significance. Default is False. - - note_plt (str, optional): Optional note to add to the plot for additional context. Default is None. - - - """ - - # create folder where Plots are stored - out_path = plib.Path(Project.out_path, "plots") - out_path.mkdir(parents=True, exist_ok=True) - if not aggr: # then use compounds reports - if files_or_samples == "files": - df_ave = self.files_reports[param].T - df_std = pd.DataFrame() - elif files_or_samples == "samples": - df_ave = self.samples_reports[param].T - df_std = self.samples_reports_std[param].T - else: # use aggregated reports - if files_or_samples == "files": - df_ave = self.files_aggrreps[param].T - df_std = pd.DataFrame() - elif files_or_samples == "samples": - df_ave = self.samples_aggrreps[param].T - df_std = self.samples_aggrreps_std[param].T - - if only_samples_to_plot is not None: - df_ave = df_ave.loc[only_samples_to_plot, :].copy() - if files_or_samples == "samples": - df_std = df_std.loc[only_samples_to_plot, :].copy() - - if rename_samples is not None: - df_ave.index = rename_samples - if files_or_samples == "samples": - df_std.index = rename_samples - - if reorder_samples is not None: - filtered_reorder_samples = [ - idx for idx in reorder_samples if idx in df_ave.index - ] - df_ave = df_ave.reindex(filtered_reorder_samples) - if files_or_samples == "samples": - df_std = df_std.reindex(filtered_reorder_samples) - - if min_y_thresh is not None: - df_ave = df_ave.loc[:, (df_ave > min_y_thresh).any(axis=0)].copy() - if files_or_samples == "samples": - df_std = df_std.loc[:, df_ave.columns].copy() - - if item_to_color_to_hatch is not None: # specific color and hatches to each fg - colors = [ - item_to_color_to_hatch.loc[item, "clr"] for item in df_ave.columns - ] - htchs = [ - item_to_color_to_hatch.loc[item, "htch"] for item in df_ave.columns - ] - else: # no specific colors and hatches specified - colors = sns.color_palette(color_palette, df_ave.shape[1]) - htchs = ( - None, - "//", - "...", - "--", - "O", - "\\\\", - "oo", - "\\\\\\", - "/////", - ".....", - "//", - "...", - "--", - "O", - "\\\\", - "oo", - "\\\\\\", - "/////", - ".....", - "//", - "...", - "--", - "O", - "\\\\", - "oo", - "\\\\\\", - "/////", - ".....", - "//", - "...", - "--", - "O", - "\\\\", - "oo", - "\\\\\\", - "/////", - ".....", - ) - if yt_sum: - plot_type = 1 - else: - plot_type = 0 - - fig, ax, axt, fig_par = figure_create( - rows=1, - cols=1, - plot_type=plot_type, - paper_col=paper_col, - hgt_mltp=fig_hgt_mlt, - font=Project.plot_font, - ) - if df_std.isna().all().all() or df_std.empty: # means that no std is provided - df_ave.plot( - ax=ax[0], - kind="bar", - rot=xlab_rot, - width=0.9, - edgecolor="k", - legend=False, - capsize=3, - color=colors, - ) - bars = ax[0].patches # needed to add patches to the bars - n_different_hatches = int(len(bars) / df_ave.shape[0]) - else: # no legend is represented but non-significant values are shaded - mask = (df_ave.abs() > df_std.abs()) | df_std.isna() - - df_ave[mask].plot( - ax=ax[0], - kind="bar", - rot=xlab_rot, - width=0.9, - edgecolor="k", - legend=False, - yerr=df_std[mask], - capsize=3, - color=colors, - label="_nolegend", - ) - df_ave[~mask].plot( - ax=ax[0], - kind="bar", - rot=xlab_rot, - width=0.9, - legend=False, - edgecolor="grey", - color=colors, - alpha=0.5, - label="_nolegend", - ) - bars = ax[0].patches # needed to add patches to the bars - n_different_hatches = int(len(bars) / df_ave.shape[0] / 2) - if yt_sum: - axt[0].scatter( - df_ave.index, - df_ave.sum(axis=1).values, - color="k", - linestyle="None", - edgecolor="k", - facecolor="grey", - s=100, - label=yt_sum_label, - alpha=0.5, - ) - if not df_std.empty: - axt[0].errorbar( - df_ave.index, - df_ave.sum(axis=1).values, - df_std.sum(axis=1).values, - capsize=3, - linestyle="None", - color="grey", - ecolor="k", - ) - bar_htchs = [] - # get a list with the htchs - for h in htchs[:n_different_hatches] + htchs[:n_different_hatches]: - for n in range(df_ave.shape[0]): # htcs repeated for samples - bar_htchs.append(h) # append based on samples number - for bar, hatch in zip(bars, bar_htchs): # assign htchs to each bar - bar.set_hatch(hatch) - ax[0].set(xlabel=None) - if y_lab is None: - y_lab = Project.param_to_axis_label[param] - if yt_sum: - legend_x_anchor += 0.14 - yt_lab = y_lab - if xlab_rot != 0: - ax[0].set_xticklabels( - df_ave.index, rotation=xlab_rot, ha="right", rotation_mode="anchor" - ) - if legend_location is not None: - hnd_ax, lab_ax = ax[0].get_legend_handles_labels() - if not df_std.empty: - hnd_ax = hnd_ax[: len(hnd_ax) // 2] - lab_ax = lab_ax[: len(lab_ax) // 2] - if legend_labelspacing > 0.5: # large legend spacing for molecules - ax[0].plot(np.nan, np.nan, "-", color="None", label=" ") - hhhh, aaaa = ax[0].get_legend_handles_labels() - hnd_ax.append(hhhh[0]) - lab_ax.append(aaaa[0]) - if yt_sum: - hnd_axt, lab_axt = axt[0].get_legend_handles_labels() - else: - hnd_axt, lab_axt = [], [] - if legend_location == "outside": # legend goes outside of plot area - ax[0].legend( - hnd_ax + hnd_axt, - lab_ax + lab_axt, - loc="upper left", - ncol=legend_columns, - bbox_to_anchor=(legend_x_anchor, legend_y_anchor), - labelspacing=legend_labelspacing, - ) - else: # legend is inside of plot area - ax[0].legend( - hnd_ax + hnd_axt, - lab_ax + lab_axt, - loc=legend_location, - ncol=legend_columns, - labelspacing=legend_labelspacing, - ) - # annotate ave+-std at the top of outliers bar (exceeding y_lim) - if annotate_outliers and (y_lim is not None): # and (not df_std.empty): - _annotate_outliers_in_plot(ax[0], df_ave, df_std, y_lim) - if note_plt: - ax[0].annotate( - note_plt, - ha="left", - va="bottom", - xycoords="axes fraction", - xy=(0.005, 0.945 + fig_hgt_mlt / 100), - ) - figure_save( - filename, - out_path, - fig, - ax, - axt, - fig_par, - y_lab=y_lab, - yt_lab=yt_lab, - y_lim=y_lim, - yt_lim=yt_lim, - legend=False, - y_ticks=y_ticks, - yt_ticks=yt_ticks, - tight_layout=True, - annotate_lttrs=annotate_lttrs, - grid=Project.plot_grid, - ) - - -# %% diff --git a/tests/conftest.py b/tests/conftest.py index b9bcce0..6a45435 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,49 @@ import rdkit from gcms_data_analysis.main import Project +test_dir: plib.Path = plib.Path(__file__).parent + + +# testing name_to_properties +name_to_properties_dir = test_dir / "data_name_to_properties" + + +@pytest.fixture +def dicts_classifications_codes_fractions(): + ccf = pd.read_excel( + plib.Path( + name_to_properties_dir, + "classifications_codes_fractions.xlsx", + ) + ) + dict_class_to_code: dict[str, str] = dict( + zip( + ccf.classes.tolist(), + ccf.codes.tolist(), + ) + ) + dict_class_to_mass_fraction: dict[str, float] = dict( + zip( + ccf.classes.tolist(), + ccf.mfs.tolist(), + ) + ) + return dict_class_to_code, dict_class_to_mass_fraction + +@pytest.fixture +def checked_compounds_properties(): + properties = pd.read_excel( + plib.Path( + name_to_properties_dir, + "checked_compounds_properties_correct.xlsx", + ), + index_col="comp_name", + ) + return properties + + +# Project class testing @pytest.fixture def gcms() -> Project: @@ -13,6 +55,7 @@ def gcms() -> Project: plib.Path(__file__).parent.parent, "tests/data_for_testing/" ) Project.set_folder_path(folder_path) + Project.auto_save_to_excel(False) return Project() @@ -202,44 +245,44 @@ def checked_list_of_all_deriv_compounds(): ] return list_of_all_deriv_compounds -@pytest.fixture -def checked_compounds_properties(): - compounds_properties = pd.DataFrame( - index=pd.Index(['tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid', 'n-decanoic acid', '2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', '1-hexene, 4,5-dimethyl-', 'phenol', '2-methylcyclopent-2-en-1-one', '2,4,5-trichlorophenol', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], name='comp_name'), - data={ - 'iupac_name': ['tetradecanoic acid', 'oxacycloheptadecan-2-one', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid', 'decanoic acid', 'butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', 'hexane-2,5-dione', '4,5-dimethylhex-1-ene', 'phenol', '2-methylcyclopent-2-en-1-one', '2,4,5-trichlorophenol', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], - 'molecular_formula': ['C14H28O2', 'C16H30O2', 'C16H32O2', 'C18H32O2', 'C18H34O2', 'C10H20O2', 'C4H8O', 'C6H8O', 'C5H8O2', 'C6H10O2', 'C8H16', 'C6H6O', 'C6H8O', 'C6H3Cl3O', 'C16H32O2', 'C18H32O2', 'C18H34O2'], - 'canonical_smiles': ['CCCCCCCCCCCCCC(=O)O', 'C1CCCCCCCC(=O)OCCCCCCC1', 'CCCCCCCCCCCCCCCC(=O)O', 'CCCCCC=CCC=CCCCCCCCC(=O)O', 'CCCCCCCCC=CCCCCCCCC(=O)O', 'CCCCCCCCCC(=O)O', 'CCC(=O)C', 'CC1=CCCC1=O', 'CCC=CC(=O)O', 'CC(=O)CCC(=O)C', 'CC(C)C(C)CC=C', 'C1=CC=C(C=C1)O', 'CC1=CCCC1=O', 'C1=C(C(=CC(=C1Cl)Cl)Cl)O', 'CCCCCCCCCCCCCCCC(=O)O', 'CCCCCC=CCC=CCCCCCCCC(=O)O', 'CCCCCCCCC=CCCCCCCCC(=O)O'], - 'molecular_weight': [228.37, 254.41, 256.42, 280.4, 282.5, 172.26, 72.11, 96.13, 100.12, 114.14, 112.21, 94.11, 96.13, 197.4, 256.42, 280.4, 282.5], - 'xlogp': [5.3, 6.3, 6.4, 6.8, 6.5, 4.1, 0.3, 0.9, 1.0, -0.3, 3.5, 1.5, 0.9, 3.7, 6.4, 6.8, 6.5], - 'el_C': [14, 16, 16, 18, 18, 10, 4, 6, 5, 6, 8, 6, 6, 6, 16, 18, 18], - 'el_Cl': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0], - 'el_H': [28, 30, 32, 32, 34, 20, 8, 8, 8, 10, 16, 6, 8, 3, 32, 32, 34], - 'el_O': [2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 0, 1, 1, 1, 2, 2, 2], - 'el_mf_C': [0.7363226343214958, 0.7553791124562713, 0.7494579205990172, 0.7710342368045648, 0.7653026548672566, 0.6972599558806455, 0.6662598807377618, 0.7496723187350464, 0.5998302037554933, 0.6313825127036973, 0.8563229658675697, 0.765763468281798, 0.7496723187350464, 0.3650759878419453, 0.7494579205990172, 0.7710342368045648, 0.7653026548672566], - 'el_mf_Cl': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5387537993920973, 0.0, 0.0, 0.0], - 'el_mf_H': [0.12358891272934273, 0.11886325223065132, 0.12579361984244597, 0.11503566333808846, 0.12131681415929203, 0.11703239289446186, 0.11182914990985994, 0.08388640382814938, 0.08054334798242109, 0.08831259856316805, 0.1437305053025577, 0.06426522154925088, 0.08388640382814938, 0.015319148936170212, 0.12579361984244597, 0.11503566333808846, 0.12131681415929203], - 'el_mf_O': [0.1401147261023777, 0.12577335796548877, 0.12478745807659308, 0.11411554921540658, 0.11326725663716815, 0.18575409265064438, 0.22186936624601306, 0.16643087485696453, 0.31959648421893727, 0.28033993341510427, 0.0, 0.17000318775900541, 0.16643087485696453, 0.08104863221884498, 0.12478745807659308, 0.11411554921540658, 0.11326725663716815], - 'fg_C-aliph': [13, 14, 15, 17, 17, 9, 1, 3, 4, 0, 8, 0, 3, 0, 15, 17, 17], - 'fg_C-arom': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 6, 0, 0, 0], - 'fg_Cl': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0], - 'fg_alcohol': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0], - 'fg_carboxyl': [1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1], - 'fg_ester': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - 'fg_hetero_atoms': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0], - 'fg_ketone': [0, 0, 0, 0, 0, 0, 1, 1, 0, 2, 0, 0, 1, 0, 0, 0, 0], - 'fg_mf_C-aliph': [0.8029031834303979, 0.771895758814512, 0.8244793697839481, 0.8396398002853066, 0.8405345132743363, 0.7387147335423198, 0.20850090140063793, 0.4377509622386352, 0.5503395924890131, 0.0, 1.0000534711701274, 0.0, 0.4377509622386352, 0.0, 0.8244793697839481, 0.8396398002853066, 0.8405345132743363], - 'fg_mf_C-arom': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8193178195728402, 0.0, 0.3752887537993921, 0.0, 0.0, 0.0], - 'fg_mf_Cl': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5387537993920973, 0.0, 0.0, 0.0], - 'fg_mf_alcohol': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1807140580172139, 0.0, 0.0861550151975684, 0.0, 0.0, 0.0], - 'fg_mf_carboxyl': [0.19712308972281825, 0.0, 0.1755596287341081, 0.16054564907275323, 0.15935221238938055, 0.2613317078834321, 0.0, 0.0, 0.4496304434678386, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1755596287341081, 0.16054564907275323, 0.15935221238938055], - 'fg_mf_ester': [0.0, 0.22811996383789943, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - 'fg_mf_hetero_atoms': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5387537993920973, 0.0, 0.0, 0.0], - 'fg_mf_ketone': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7914574954929968, 0.5936960366170811, 0.0, 1.0000350446819695, 0.0, 0.0, 0.5936960366170811, 0.0, 0.0, 0.0, 0.0], - 'fg_mf_total': [0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168], - } - ) - return compounds_properties +# @pytest.fixture +# def checked_compounds_properties(): +# compounds_properties = pd.DataFrame( +# index=pd.Index(['tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid', 'n-decanoic acid', '2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', '1-hexene, 4,5-dimethyl-', 'phenol', '2-methylcyclopent-2-en-1-one', '2,4,5-trichlorophenol', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], name='comp_name'), +# data={ +# 'iupac_name': ['tetradecanoic acid', 'oxacycloheptadecan-2-one', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid', 'decanoic acid', 'butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', 'hexane-2,5-dione', '4,5-dimethylhex-1-ene', 'phenol', '2-methylcyclopent-2-en-1-one', '2,4,5-trichlorophenol', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], +# 'molecular_formula': ['C14H28O2', 'C16H30O2', 'C16H32O2', 'C18H32O2', 'C18H34O2', 'C10H20O2', 'C4H8O', 'C6H8O', 'C5H8O2', 'C6H10O2', 'C8H16', 'C6H6O', 'C6H8O', 'C6H3Cl3O', 'C16H32O2', 'C18H32O2', 'C18H34O2'], +# 'canonical_smiles': ['CCCCCCCCCCCCCC(=O)O', 'C1CCCCCCCC(=O)OCCCCCCC1', 'CCCCCCCCCCCCCCCC(=O)O', 'CCCCCC=CCC=CCCCCCCCC(=O)O', 'CCCCCCCCC=CCCCCCCCC(=O)O', 'CCCCCCCCCC(=O)O', 'CCC(=O)C', 'CC1=CCCC1=O', 'CCC=CC(=O)O', 'CC(=O)CCC(=O)C', 'CC(C)C(C)CC=C', 'C1=CC=C(C=C1)O', 'CC1=CCCC1=O', 'C1=C(C(=CC(=C1Cl)Cl)Cl)O', 'CCCCCCCCCCCCCCCC(=O)O', 'CCCCCC=CCC=CCCCCCCCC(=O)O', 'CCCCCCCCC=CCCCCCCCC(=O)O'], +# 'molecular_weight': [228.37, 254.41, 256.42, 280.4, 282.5, 172.26, 72.11, 96.13, 100.12, 114.14, 112.21, 94.11, 96.13, 197.4, 256.42, 280.4, 282.5], +# 'xlogp': [5.3, 6.3, 6.4, 6.8, 6.5, 4.1, 0.3, 0.9, 1.0, -0.3, 3.5, 1.5, 0.9, 3.7, 6.4, 6.8, 6.5], +# 'el_C': [14, 16, 16, 18, 18, 10, 4, 6, 5, 6, 8, 6, 6, 6, 16, 18, 18], +# 'el_Cl': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0], +# 'el_H': [28, 30, 32, 32, 34, 20, 8, 8, 8, 10, 16, 6, 8, 3, 32, 32, 34], +# 'el_O': [2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 0, 1, 1, 1, 2, 2, 2], +# 'el_mf_C': [0.7363226343214958, 0.7553791124562713, 0.7494579205990172, 0.7710342368045648, 0.7653026548672566, 0.6972599558806455, 0.6662598807377618, 0.7496723187350464, 0.5998302037554933, 0.6313825127036973, 0.8563229658675697, 0.765763468281798, 0.7496723187350464, 0.3650759878419453, 0.7494579205990172, 0.7710342368045648, 0.7653026548672566], +# 'el_mf_Cl': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5387537993920973, 0.0, 0.0, 0.0], +# 'el_mf_H': [0.12358891272934273, 0.11886325223065132, 0.12579361984244597, 0.11503566333808846, 0.12131681415929203, 0.11703239289446186, 0.11182914990985994, 0.08388640382814938, 0.08054334798242109, 0.08831259856316805, 0.1437305053025577, 0.06426522154925088, 0.08388640382814938, 0.015319148936170212, 0.12579361984244597, 0.11503566333808846, 0.12131681415929203], +# 'el_mf_O': [0.1401147261023777, 0.12577335796548877, 0.12478745807659308, 0.11411554921540658, 0.11326725663716815, 0.18575409265064438, 0.22186936624601306, 0.16643087485696453, 0.31959648421893727, 0.28033993341510427, 0.0, 0.17000318775900541, 0.16643087485696453, 0.08104863221884498, 0.12478745807659308, 0.11411554921540658, 0.11326725663716815], +# 'fg_C-aliph': [13, 14, 15, 17, 17, 9, 1, 3, 4, 0, 8, 0, 3, 0, 15, 17, 17], +# 'fg_C-arom': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 6, 0, 0, 0], +# 'fg_Cl': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0], +# 'fg_alcohol': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0], +# 'fg_carboxyl': [1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1], +# 'fg_ester': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], +# 'fg_hetero_atoms': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0], +# 'fg_ketone': [0, 0, 0, 0, 0, 0, 1, 1, 0, 2, 0, 0, 1, 0, 0, 0, 0], +# 'fg_mf_C-aliph': [0.8029031834303979, 0.771895758814512, 0.8244793697839481, 0.8396398002853066, 0.8405345132743363, 0.7387147335423198, 0.20850090140063793, 0.4377509622386352, 0.5503395924890131, 0.0, 1.0000534711701274, 0.0, 0.4377509622386352, 0.0, 0.8244793697839481, 0.8396398002853066, 0.8405345132743363], +# 'fg_mf_C-arom': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8193178195728402, 0.0, 0.3752887537993921, 0.0, 0.0, 0.0], +# 'fg_mf_Cl': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5387537993920973, 0.0, 0.0, 0.0], +# 'fg_mf_alcohol': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1807140580172139, 0.0, 0.0861550151975684, 0.0, 0.0, 0.0], +# 'fg_mf_carboxyl': [0.19712308972281825, 0.0, 0.1755596287341081, 0.16054564907275323, 0.15935221238938055, 0.2613317078834321, 0.0, 0.0, 0.4496304434678386, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1755596287341081, 0.16054564907275323, 0.15935221238938055], +# 'fg_mf_ester': [0.0, 0.22811996383789943, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], +# 'fg_mf_hetero_atoms': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5387537993920973, 0.0, 0.0, 0.0], +# 'fg_mf_ketone': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7914574954929968, 0.5936960366170811, 0.0, 1.0000350446819695, 0.0, 0.0, 0.5936960366170811, 0.0, 0.0, 0.0, 0.0], +# 'fg_mf_total': [0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168, 0.9998867256637168], +# } +# ) +# return compounds_properties @pytest.fixture def checked_deriv_compounds_properties(): diff --git a/tests/data_for_testing/compounds_properties.xlsx b/tests/data_for_testing/compounds_properties.xlsx index b330fe34c29455d45a21cc337182746eb9dfb3e3..8c18045728deffc7ddb6314e4d9ac8ae3a5a99ed 100644 GIT binary patch literal 12194 zcmeHtgr; zIB||znrB()c!JD0`udR{(*R!`>bsTLL^eN2mExLXU<&vEGnV?*P}chz;fu8jxd)az zH{{gTP`HMQrjdb@Xfu`;)SvY(f%KB#;Uu3|FkSTW6o@sHsg0Bx9UqdV+nlHxSZ584 zLD!gqVpJ0i!mf(7`eLkT+dXwWJ=&?F!mkt_nm91b_35=U1$~MA zrWODSR^c_6Wh}PiUOn$Ia%6_jyWXunl5e8n_pIZYG-XD|n8;z1*wAg{^=FF6b92)8 zeJL0iY~bpUKQYGlw9TJ{+6EWBdquxNIcU1<5JDd8ASUw)KG_&J;Qf3&v7_yOdXaSC z&GG^X0KC3}0Tlj?mbI!(q*rfPlX+tu+#4U$0RX_(#;+em)Yytn-p;=cPf|GXqu9S!tc_9>!f1S z7a~>2;|%J!Flc%BL74n0zFK{{$|q$(mGzuihwM-| zv#!0hYS=D@Gu(SPVY*8O&IkR%i3-==BGf-gQc($9n0_OO!W)H<-Yny0&FE_9Xk}z) zXZ2g$%2idgOJhR*@~!68=SeO?(g!?IhL}w&*CuSP%qs%l-cPkM#U@o`_T>=~+M@aR zyOpGolfd26>ZGVVuB$Hlnw>qW78J{8-4fU`|nZ1aV>W*^YT|=%; z79z1qMQ6-1y}TyLbdyP!^SdyN8Gu*DrPQst=>u*vmX;((-fD% zRfwjE`rR?uMXA4E1=9Pij*S~$-y@@vxB@B5Ms_ftK7xbryHI}^_gKGfQ$)RBwyrS# z^ZXz)wIOd(+SiDk_r555tLEUjvzFum+pQ?0yiQJ&SC+$YF3Xbh*{au(NwRscCKxJd zM-g~QqErCo)5UNIRdt6^xbvpY2?A0+{$cmL4xgoQ*zSa^pwT?0ceQBOw?oCfnR}=r z)?Q}qmgU%2h!W6UzOnhltNR%2jjAQt(s4oMQhBJQR5awQ&3uT?au_9KSQ%|J1HR}r z9j5F!i1P)(^K>N+>(savQ54CbXH!ZQtd{$!U3lW7>>M%pk<0An^Ph}`Zh>YG`N^YK zM6d-G@dogcC!lAiuX0R166N>SXo3AG zns+}8@s~d^k=>Z%gS)iYdo8z9bc=4$5AbC^+4|Wa_GypZJVjYa?6N2!vmbPp49E8I&klWUp_2Fju{ zBgwJq5fb!Z4fQewG3%GBbv&9TU5zoZ;m zyV1*0FleKbGD1Ib2sG5=!=sWDIK)&Zv_^rr0lG|duH8E-a-86td=inX#;G7S%Q2=; zES63a1Ve8=bGT;P=OIA(xXiDnzW!Y*^~aspM&acf$Zvk`M3a{|Fp_)-?tsbt8DiSI zPSG3apDj-xW!=ZDI&&W2ckoN335(hJJ%gOz1rLZ5c~i$Z?PRAktDbE-3q1}|w=e&s z8@@XGxINfEyZ$!Q|AfrmF4NrtA^<=j4gg@k{o+rT>11wd>g>e$`-S;8-vPDrp`toj@qxSnW&(`| zdv<&lcAE~Z?Xiwm4dR>d%TPCVb z>XJV1Vmiciwi;<$zJpc%;F_?OzIui5Fq$ot^rY~3wtJ^qth)n(8VeoZnm8VI8SV_K zg(yfCMfu%7lBb4=ecuagm11CpBR+#<59TnLZV$Fi-X-2Fcc%n?I_JY} zWq4$%FR1E1wLvr~Ir)KSir6I-Rm@8!s7r2TKqVK<85U^hK8gV)F-$0?;ZJ0Ur)%`e z_?WLF@erTMCvy6dot)b7IGt#pTrKN2n;@IaBLs z@yuAZI57{_7RVdw=8(wa({($CRBHsJXfdIL;ND%8*jc)ZN{!ysmdA_nvvu#WwwI@e zm(_2t595s9FE3*4_dEUV?JpO?FY9({FFWl`FApd6-cNQn&;9jh-cL`@k;SQErmr)0 zYweGieK&^Zm+0BI!V~RlC9Za_9>5DQPxV!4yHxZUO=zSC-LmanJ3> zL!c2)VK5Nf8YoR_#If2Fjm`KzkFcA`W2b-P$NZ8XS{MO^23wGV9Gxx`QV{7baKNK( z5wil1s_H8*0MC_4sfafg1JdhcENJNaExxtxRL83Hrp!j=gifO=Ckb;>$+XqAwtCG? z*LCjbzQRp~jCZMViDrGS%^{-%?=JK415(RR!7@hbrtJOH>}d2=1!w8!dg-HLn`Mq- zr#_7{>zvKo&)H6=)E-&p>rNVVRH3P@)^d>JQ@0!$7_YQP>9&dUxZ?rtI<^>FE(@r& zmrYLnrlxE)^Ak+)khopM94LvT2p_mc11h65DFS5q>&FFHB1$GaI;1F2 z*jpV1Ahk@lHf(T;ymE>#Xh-=c5tdDI2Ws8apT%YAr_60kNW)nDuss@8XP z_La;H$_b+OiTW`T74DFWs-x@nhhA2A(O2?k>bloi>L5JHw?B@W1Z z8}qsep`&m68lD}^f*q%S5Alm(2fM~SSI0mnNNopj*&KrlyPz-tLk(wsM$gX(h`arc zy;dTpc2aHFLGvh|RmfxkPhX7RX7uuDtnLGm1@=5lA5r1OpCk4SQgIcYq!)FyFl!g~agS;tK}q=A@r7-POv}NIBgMKtm@4r0$LM zV0Mb0Y?hzMfzvY3c$P^p#5AL6*ziZiK@Ob$1FH7)k4zh)O!x+5TRv?K1Mjf_wmF*b zjMB5?gy-rq=JIXRElh=XJX~jHNFwL7PV?plUMhzMT|`KtJW$!Rt;iiAhdy&@oB~;x z?egl9(RCG_WheX18T1c=4&UsI*E(;x&Ta$Tp|fP$O?E!c%!UfP;SZ)9Y>V|2bj|p& zR@Zja;oEAh<{`dgzmkOoDeQ^>({1pM*!nrNFkKFwm6#$Z1sP4)m4z9f%+!c$!;L6% zDFz#niXsRsEYz^tm4P|;4aHLsZ=anQnDMWem~-d;9EEt8Emc$A%W~eQ3~Bg-^gpH; zKZ#z7lqlHj;PW^|kV9i~Z`TbjZcwM7unfHD4mRB8%NrCsV&0~ecCN+CMc0a^C4Tjd zOaWaW!Q+qEwZ3nE%Z9IzYgoOP&MLb{ZWu4*zkTdRnwNxWQIjNL&Qmn%Tzna)E~NE! zW}9#)BUgTGMozM!2afu=2g=ezauP#y9^ETycW4+az+}2s$#K4~G)Atm zVBa^}YkU(X*wlEcAz473flmpfusJ{J>bnymi3CyoUSRBmzHlcKHWl=v3ki)lte;f@ zV<5r`v{?%B&C<&IM+sWh&E6e~uq+nVEPF^#e1(>9L@E}0!OrDxK={OoK53=G(iO?P zfl!8CI4@%&Z6YX>t;Ac9x5k7yy zfvIf@3LMYz4f=`>nGqX|(5AetL}tVwO7S*>a!Sxe@M>n+BbcA2s5*Eo0%9Wu!N0cc z9DYO785GWB7o(w6qD|y%#Y}hX?`Q10&|)Dg!7ak3nU)}c3}^Fv;@4jRo7Z!6>)m7z zCgN_{JWuLw-rn8gr&XO&8wN}7tG}WY@EG3&3*IsGUf?)a9yS)}FQ7bMgz?Oz-#w=N zj@SzA-L71CY|_#6M5e)oio8zcNG+hWt>l@9d~hy9T%>=yMf7!t;;<1GoG3fXtv1B1 zv#(PR1#j9_&|c<;N*9r-m2t*QSDJlffFCtyJ$C=FbS2*{k6D?@bZfBk;V@jQmWpst zcAC~8EsKuwwByBv-?IL18`_& zc>kPLIu1xC979y9TxNmyv=~^Wh;mvzjdphoE+Ajt%w(cBtDiFriKp&-+j<<|smq4I z8sX_8s;*B#&+}je<*(v!4ss~Ai(p+&dmT3%XC*9$bIs;?-5-LLOOW6DrE{Mj9$P?b_X&8V<1IP;N< z6#om-%1c}~En-DHV{A9wZ%1Vy=X)c>G zuJ-G-xjCxikhJu##MaiS)|nl9gYH;UH^pD@bvd@VN}C}wrbhvdHPAJS3nUYWWyqyO zg82w?w0NM|SrKJ5Wn0QBj2@sQdJ}B5p6N%hA;3))4KfW_&5BzHe2j56$lPnp&ct%p z!FH?P&FenY?0iS2jd<3Fk$Hr?0ldW*u-94Y(oHKKo$5WyW-nBBpXL_c65Cg|HRHl1P2rk-t+{Q*Sa`V;^y ztxZ|mJ`u6xqU6wv$8>U`(PJS@n+1nJzG=BEkyFelU3>7Emis=BTgl_&a=vn0N0+9{ zIMz*!%nniA_7ZK|8vFd0?en1Sw=r@(v6XA{d>}s@eB~%Zp}^x}zV&?C+~;)fi>t@B zct2B?9_pr#vh`CSy)xq&#U5mB3+l~4Y3sPjMxIIjNK^`?a@BWMN8|FeJyM5_E-!5X={k5XJiObuE>Xb z!n=h0!Odl87<>F3uR z?hf{^)%c#fzHMAT7cR7%UU%uCEA4NzfVfc=3%{Yb`9}eXi zy{~%BGAYOP2hK{GLY)`oW-u*IhRffTGRSws_a8dO&V8IBDI-Ybjd-vAp72WInX9e= zy@IU-_h-}#l752E@?*2nMUA5AYKLFV4*LhAdVups{SJ+yVbUjz@y}2A z8V+s*hFq6~jW@qTMod1vglIbO_`Vbh5zbWHJF|r7o4F_t^Ti+DA*M>5o#>$Yv5+2h zC0SNP<1{S@C+Y(CW{NHuv}%`xAXA6DJJ4y1V(1I~BOoU?WUwZDTe%v9^Kdx?i%yoF zVa)R0SlZ$ssmYFt(3sNzUtc9V=%Ou-R(qc!AzAyr8SUOlJ`PBs>}VvpF1PO*xA8qU z6o$xSe~9o7i;lZx>pZnP8vi%{=G6#XlE+{B&82-Ox5G_M@8}lTx!r1jlCQS8i>8YB zd<{>#PWTE$c!m0{Z2i3wsiu~Yk-wIulNukPRU43$rL9tBnPBBqRiYY_8TWCL`Uz*9 z2%m@^_fuNxr&H%h#7s}Z3`vY-)>h5a6Dqd&`j}WdUprMZ7b$yf&s)j+AFAPleDa2m zZ;OY@c)x9a{ay__o15C0GXDPldm((RIckf?iQI;@&x`z7o^CPD2qzXyI%$i;!DvA$ z48KU%G8x6S@f|g96UJbbpl`Otq_ppv2h|d!gpb}-fkRrhtZ$1@@V1nemWnkcZe1&;&6`+|N=B6-ISXk?I(6ws7svj4x+X9zMBUngKZD60~2DadehF zUt3GugvrUsp>NC_!Rq})iad=?MT06Mc1$zl;nxrvC93?yQ7S}0i9l8p%1YH}A`{W4 z=>&1)e2-5yV}ii1*#`}rq*{hIn3a_n17z^cr+O9*D<=z!Q5;ElWwL}vv_Q^7?-alB zDS#Mh(6~*)((Iy@TI=a{;`LJGCmqwD9Hq!tRV*M!sv*duAbpf6Y1O(Mx`}85VX6%| za%s-6{UkOHu*dexS4(0U)v{|{%ABI}J)Rd`%8+VRa=IXYu!3XZTt0DW)f_Mi#f^E^ z((b5G$E__>g|ZXG)YtErU((Ky>+Zyh>E*;cZ>1d-8|}qY=rZTf60{1w@qoC-F&S8l z!fHc2zm@{|mh%qx<7A#DynB99uU6QZ(5S%`=`k%M~>4CItDw> z`x7(oFVD;mzynT3BJa)qVV0M9B7yszz42$kfviRm#x~EN%ReX?+a9iN&j>$nXoDgR zoeRi@iqsFFEbUS6e}o50$w6*;@;<=ik0GW5Y>*DhrSLb(U_J`yq&4(4(`LEd`ONp< z5@;f!GONvW-;2fZbm)$0romN!*qZB+!`=349-A@rnXU-*1q-YZq;r>_L^&0vPG>n= zuS`^*YzqgTD$exHSj7ntaQU*W*9a@05BC(X zHY`<*p6br*juc#Ta68AwAb)iaI78e0dci|ToBkki<1jP?*&eF8h4#~cq>H7T7Q>B) zA|l6DB=_p1kzQ~;ZraYEP+hP7tXlGrBruzlXf*BXC5vNPG!KlNQ^g3I^w0v*Hnb3W z4u84lx$f!5kS~z$H>``A&y=Cg-`ayOh)eGRwz3$A5S9sy`=(~PB7;v?444o*kft`J^e!~TmGI4P>sWQ#u%%+8n zz;;h>%KbA5fu@?AT+;U(wTaS-IWY9C77Td9(!dG}M7`vui*CB0=I;tHcsB(UKVaq? zIn!P$OS@zPYaji(T%@Je2a8P$d zm0JzkO9-jDxLEYAC}ImLO5JOqIK+sEn|JgX5&1uUD)bXTNwnuReA~qIDPgm*xg4S& z6P-$Atb`vcH%_8#psME)El9E6q=+_S`ASJ!hJxgLIb@r_+fGBV!zhvHi2XXQGPwt% zgH2Z-Cj-S&6E5ACDe(%rf2 zGG3P7`Xm&%qE$$s7yYYEvd_?sX19*3$uvIFq-a+`ZO2r$mHQ%i)j?7DDsax` zQh`Cbvb?P9k@!Pa**d-yWkk#FUe-Z1@mA6&t$O$A<*Ly<x-Tzg;cIZ9dMW>hj6^G76bV}TftG8H)@JCp3a(1^ib^2phYuR?03EqpS(i{2ebjSrr2V;#Ts|*IwWGKjM zP!UE?DfEfiB+rTGcwPKHuED$>eo>l4ab}VjNwQx(5`8}4&C*IS6SoB-O6B&!l`;42J`$FhE{Vxjnu^2oI`++WRZMaBTedkh->pDY z(hP*0X26xvvisnpYvwr5Br@@IkEZkh43nmVPB%%X=_4?s(GotMgi6RL?>#X-8;^;k zB6nvt#v>It6eX4^mb(s7aDittK!j7~c^8fl_;$6bQRSuxN47t61~EYHXi=nYB-bV-Cw@ z&1IIbO;N;2&;IhJCY&h=7T|rEXb<;$fkjmai4d)yT42CDf?Q7Y667#R%%k=0^F?LNqn&deh&iNlC*OP>$H*;wx z@HH~8PW;^8Ckar+P?!nvH5jxd2lF*nTT11+u*Bb#OE8U~sDpQ-iX0^C+BQWF&tjK# z6Ltofb!O-DZM=r60G`%JA#OG~vQ^1%%I(rcR&5uP`W?LK@5ChM8grT1r0PYrZSLT7 zJ`p5!bNhpU8_)e*(G*OmyGgAc2lVhs9hcM)q6X-!lih{m6&i^+wF0{wujQmbewas* z+@H!G1sT(>Z(>Xo8<4NUJi2kooI5dX)a@uboO7j9)%kmt{FtOR($`u zNpu1)e?GpvQ*eLC0oR(+p6(tmjp=@A-uN?naMZDOX!pzh`V(&C{wwyx2^K^#^H(}c z5aX4r&KqI@d>a@-8=i+U8o|#aUW9VbM^xvn$5Tixg9&3Ti&rc^tEr&Xrw4y>3}vI- z?vH#pjeoZ<02Ad|Q>nw!Ih!?`HL(Co(s{%9A?5>&Q-GRm^wQgwkouu_h$ASBorD3kN&vq`al=U-KWRk@xgr5? zq%7Ry5wBd5F1kFxS7dkb@?v+C>Zm|DFS~tWVQQaGjws&&Tr1V92+95FXb-6_g7he0 z4bGHp`vS-MspkH{)TS)_yu8j`hfCdC9+eiZFu;q6-CRUZRv9KZdX%{86PS@596-H@ zEKLqel3a>Fd|l$2JxETKHhOHZRgY5PO2r3!4wn4Tr_mc{p6YWP5j$(FCvDSjP1<{{7!S*ynZ ztf~{35!ZP5hcOL%)HtODUE=CIc8gD*Gon`9}6N#>Pd}ov|l76KmdGzv3 zd@`S26^*HL_e~`EwDSCF7{!$~R?LcMz+Ubq`mx3D8HoFx{^24Oo5B?wcq3Ah+KI?# zq*FXNz$e-G(KRprfi%kyb(AvNTHl2R(X9#c0`w5~4BS!Mc3QNs6NSj>73eNFW`4~f z`9mHAwyw)e1{ehoReQ?rCLWx+nx4n&&$0S3f#ZEpL{gid?_e2RDVqjG!#m7Pzw<|8 zaSEr*Amw8~pbO>gJx9*9lbxsu+Pe3ALbXCZv%lncLu#yhw8A&N9KIdP6)%4$o4s|c z*akI0HjlI{5x>dagq9ZevbHj{}J{rDe19t zZ+AYvK_BI9U)aRXSi#ZG-iguJ&e8PGj=ukG2fz7)h&V-ge|xR z?2qycnj~UC?ddfe5v|m^p}Skmg&0vH&tv`q{qaVNI-Vh(J5b{iu^p;VBXpmw$if)` zamF2<7Gd8{GB9eKu#Pjnq9t1oG{DrKeI0XbxDh2CkeRu3J3Qi0du+W6KfxqElAbB( zt+0!+>_vZ!W(N{kBYo|;pikQPY7n(ah z`+V7zHIiQKm~oRK=dfqpDemA%WW$cLTo<|I9I|;yd1rHZ#ixQmJ9`!eM#Iuc&Do)2 z9M;x;8ucm2tow^E`wv^7Yio(%ifeBWHoHLyX}zq zO>zM_0CgzmqZt)szozsnJAKZ{-MM?C4Yo$faQ?*@?w0l;l$mmmhN8=ls*OZ^MvQ6t zjy6hh?F?FO+u1H$Bz5ajS|Cco6Ja4QldicF5Y=*asuV(+*>qoktIjyK6*mB}0J%(Z z4BV(xj)TqX1TQyN-^imI*GEAT6TZ6*->ScscXnE@qqnV+656`dS_$Js=F%6u&zjfG!Q^6Hq`)9_W zs!bQ{5aZ~X=ax9i;W56Lfam!wf2rM`IUJijFbe?S%ll>RL>_|M(Y|C*wIUH?OG zw1V`%EBN=m&3_60c1?au=>OKm`K#csy=;Gpw!AG+{nFX?tMI=!0{tZl0JOY4=>NYh z=vO_zc8C0>=?w1w4dNfYBEM?+bye*zEzh|BagFU)1;5rme96Ad?)LsJu1WJZ@jsoQf;7b2>;eF=Z!e!W LM+r~=+qeG%tYG{l literal 8247 zcmZ{J1yoz>(sr=o4#lmw6_?^t+>1NKAy}~jg(6LHcc&1bMT5J$Lx2LsU5XShL4Qtr z&pP+?zhAQU&e|(^Ci6b?%)Bz{N(hJq00005kPz>wCu`7&5%l;}`FPaNZ% zZXB;&T%LP5I;cdepm%ZNynImHTXNDqrskra2CojjlNaERwUqY!ci_chJ7H1G_eVd6;Q#>K z|Mu6y+12VVhYR9V)Vet_f**zw-|h3u$iFmxt}Gk~>Y>A_XF911-n~~!4Gi7p1qt5&fJ}JVTsorn@dwt`;Jev*F4Uxf4ppf>;_5{? zcw5GS{M+UDafXvI593vuGDHyB_HM>x}|w#fc?-(`GBB7zeJ=ag-h@hAqqF9uuzuXn5XY$&tXpF(7ZdZ zBkAhKiI-sA@MMGwIpV5Db9PtOxkG!tBx()VZo+cza=tvSusBLnZR)4r%6^7zCR&uC z+7$5Z(o}R(Fx>mTUc?!@Ns?qfzzdtgwaVD-&Kg*M#^{m@-nV~o_qWU3~gUk#D+>Ixx#pSd^0=Jm9}~5 zQqS*FP0opSwy2}}*=(XLolMREd($SaV66fsH@-MZR&c7lIyrnkDpzuD@KsG;Defx1 zUws?Oz)Do;&l2Z!QQcfUg?Z2wjx6bVrrWj~)ZqrXKo)wb-LeQy(IbRQAztAv=40!f z@k;7ADa#b0z{eX+M1LX~oI=NdSM=h&iah>3Yr*_|=9I*7A_v{IVqA<}HSK42hS6Of z(oR_jnMa82q=?Tw5-gJsjXaKw0^WwKT9yW6*L+L~p^Hv0>%wJ~5;G^^Cv~lT8d&$H z2xMk}*9Y}dovgvV)i32s5HlM%7>)B7n~B51jW-c^{OwpoEN(kxu%Kf4Vcr`(#Jg;h zZiUX>pkeE~hWEi=c^ufHf67f0DQ;pjd=}s1=0ygl>!Y@UbnJ7ALwLo$P;#RooitL4 zZTWbi2_*NmsFw%5gL{C^3v`uS1$Dgef9^M4@T2iuG>~B!PMEMNY@Tovm2*$>H@kIuImXpPlw;tWsdLBwfR8#LzKI59+R^ zAwBA_)|8b%I9|3CnbV%;g<{g|7Nn7>LBw!#50dLH#x@QUG2N*dMdy*OcUAgcm)KUz zAgFj=IpU&9IgBA}BN9(h_x2M7-_515=O%3dr)G+D1F2LSf@4$GqL5&AYFs1Q)T>zM zZrEP07wP(oZ-|Y4sk)9kb5Q8bPNzl%_=iX`MStjN9?em1aO!xsbcpvm(^Q+JK6PFK zM~6~6HraZ!hS?J~f>ld1_D{$kAR6$eue8m$4EqpRWAl8KBV~47c2bN?H=yww3x!s- zP7vm|B#zHsz38$_#kVOLTDGd})TS8MHax9pvlp!)R2{$9FB{0gz*T*Ujwf%CNK~kp zcSfSww!^B>qb=Aj!&8TsLfo>k-Ty{avjNYu4jZ1kvH3-jj}@0q;pqxH1}d9js6O7h zit*vsos7(KpJ$%eDQ`Q?PVZa><*>JecTCTXrOV%6E^S;pB?!waXL|SI!nw=9(i&aa(>R8j=p{6yT|GdF$*7bhEXxa(CnS_3>*KDKi3UE)3w^ zu@dCO7ZY!nrLlif9H3!C_3d736CMs&7kyS6gcejp|` zat2+Rv=#bB7@4-TXDhNGHfMN$Wd6QF*VkCNulm-!?c&Nqh+@>Ad^=%%;hulx{w`wd zYWU6^7P0i9H3XW{lELAFz1-2hWDIL0Q#<>%(U9smdNzKu!2Ce#H+(p0{i0BOtX)*c z#5w8nSrOAc%gU{Hef#Lpms5rJ%}iO5xBmV+`eJE`ZN6*|H(Q=Ju!;h!iwozUa>E!` z0P!fwv(i+IzPAxKcaBse#`oUm9#y*pE751BHU&A-4lz%`A6mUn9Dyd;N1e-#Fmvzo zt-YJI4gIvzBl9C@rTFstYF(Z271QS@YiY=Vi(s4L^myAMsM31CN6+yKiBe zs_a?JmhTM_-}8CQxESkR2VdcIU*F7VMwmrZNPWhW6UVeMQf#@o?w<-LTe*D)6#T=C zRMn&<8`dQXQQvZ+to!+N7^8G{_;h}dl@;ay#}B(@gFQaKZ3?qmBFT0%*LxekGk1KJ z5~B~~B@thooilMT7v?DBD@++J^nmU0rbjW<_xfqrEI8J*QjCfduJ})rnDUYC#6dfySOI9M8|9IP=UpM&4P_hyIVd%`Em(2Z9wCmEt9-n3V}EebNVh#43523ZPB?xO8T4 zS0|MO;BzjmbwRBxm)*y$AqBiieZh1xFkN+6)x03sUGS6CN=@FVJ_KRd<=B!OR83$tUqJK(m%P)psSL$`C; zIHuV4ABs>X8P4)Gd4u#*>U zmdC%j^YtFKZ%`bU6p8BSNzkYlU5p^2+)xqKQt3sHtnaU`s4sxWkA}hdWO!bUD z0b$$&o(W=7yjfe4@6OWhGUWsTclDCI3ZcpLeWr1nEF##hQlY!>FMp4RTNOjMqa3+H ztmDaR{?c*XG@%r|y+nFABLq1C02w@+%AhZu0dU;&J`OAh#?22I&<^h4hyP$`BVDDz zTBQ-|U~=cX#NtFmZ&Q=Wp@6gV#s3Fw1ZH2c*bH3VePSjZP{5pq z{eU~}H^?C;#msALeixmDgT*TC8xtCr`0`TcurQ!{<-x&rV|{Cq!#3KULS^aP z!>-_7x)h`4URfm*+><7c*nZ!Q#JIG2^{fDrq&QyW^5dq5; zp@ODkk{~byWdvsqORdz-t-mGHBc5=d4zu)ZZ$Hn$d*U1T3IZB2DD{e3*YJJL@$L>E$_ z^|oN#v9nN%F?%_|GdA)Apa<}_z#7CI2{~L2IOdRe`7i%=IH?ECba=Ljbt7R&^n;6h zf&-Wx!2l&qM9OEJ(q_UUT&AR~hg7--Fv41%`=7%~b z2ZwzI|0M%$Hro}3ZR)9Pb9*9Hjfk!HD(>JuovGd-55O7;S^MU)HrMHaSY3b>n^_u* zERkZM+JoL39q!J2Q!bd|hv$jE>c_V3PesKfLDt08$Vp{E1J}=fW#)JC`<7Bz?@TC^ zi4>{xl>$5fGe~D{NC!TEeFO)IZ3!M`KQU01Q8gWRsd~#*=Ra(z% zcJL{dYZ6UGRL61Kw@B;7aBK4XG<_gAIWZ#Dc{)Cv1e~ljZ9SEoh|X_qZ}93X+&+5| zOH&=!24G36vNkyp!53f9(Ow}CMO9j^fnKJ+D_JWRe?A@__m}v}5!?}eJAz{k&ZS2p zUjjIwRPfTDK_}v+%DEBiaETX%$R;suSldoGaMzCGT~1xaWm}yf%uBG^SM(Kp}s|sY2s}0>#L43~eiW0kh673f->M zIZnEP1xM^0U3}TPeWym|#nt;@=&wJ$QpHTk{C-`wB>C!zV9nhRjhyA~2TwgfK1_LyIEUb2nLSH$dZw6!#{8phl81FELQ6_6sBkWD2r#Wam)? z4oS#IC>te>dobtY&%juNr^={==D+u7#^B4|Ah61iFipJGgqL!){EOr*2~ppVY`%j{|S`;<)BI}4V>t3 z!iyP3q6G3cYRXKvcu8i{sPU+MQ6``8Hv^wF$TAH`r=8BjvrpKU6TX!HLLLA#myFII z%Ct?YG{TX2GgJgmg8F37tdtnTty%}0rjr|980EkS1U(=9a;M#wYfb$k}{rne7lAh)KJ zYw5)`lSLQFLfEw_KqLInWah8b`rK6_YZA6M*$~tOeA+u@v|;?~Pj><4g7n$Guyqop zL{?@-UT{!55BH+D?wxjzW4vv@Z?0O=6LJ`Q)*XG-!KZ2WxUK(aGoS+)&+T6ibM1L3 z!CzxmP%o6^wo3OK_6_W(vz(n-*fOX~%me6RuLau4-3|_6%HGqX2t@&X{Eu z9qz3_w`2%0}C-K;>%WxWtco0oPts>OtFcSgu5J_>1enC zkTsP)7LJ$7XAWqW*f{EEk&E~}vhvZ8HG><4bKN@ry29unSKCl)j&L?dX`4;4#n_>2 zJ}7xaUW#$mifFZcy~F<3aRG7DS%G^C(ry2FP=tM+20j)_k?91eMDZms!{sX+pGt#- zc~jjEX!9#v!!igR9?UCeS%Ik{m|wZAWf_&>;?C>Tg*2$Y;4UQv{GC`tet##{^1v*o|?FowmfZfVeyEnzI7spu_4Exe#$v8FlSZ|_syfV zKVW@M3GD7AoE^sY2pIP~mJl8Kie=nIfkbPtpYsFdxK;|?4$^G_@mPz!6*_OWpi9xQ zZMU^3c;;L-DFyGKN;s!;dP++>pSZB%UVk2m@iU*jz`sUuD`!ht1C?Ls6bYUKcTUob zC{P#WfTkj5+aiDsg6s96g9d(zATI=GlzM(U!>-sLe@uyBt~m*E&DK70?#}zboh|C5 zayq^&2TR*OEQ3wG{UFw=Pk^n&f33y#wAUSWD)w`0w1s^2WL&0Fua&ty!~H_E4EX{1 z-}}*@moccH;Q;_gbN~SJ_kPsP-P^&+?XRQEC7p%n3{IRo!XD&A6?Kg-Qu+Loib?bK z1xq1q>1ILnreC;9EZf_7>hR+au$C?>KY>oja~g#$MH|4IF%&z`UoaJgupDOYi(7rG z*l#fJMxJuRln0iYVvc%17&)Q{O2oia6HwpBVCCv~F4c6W3DhW=hc#WGvY+q9V;-3Ll zuQK?~#<_%D^#g=KiL)nx=G8}OZj0aF=^@6?w)N%Rx*yQAwOaM}o?7^H9%H*`k}bHr z+hY2{4;(ZM!C<}2BFfx5(IDH2lq$k?aOOpi%e2*XUm0NWpx`yH@+gt%u5a4i((&`e zN0NWtU*Oy0D8ArEi9WC~!H|1qO19$8cJR2z0TYk(APF__5=ENM8Q*6tnTU}6v;sxe zm_Cq3ZD1$YudmLAc%lVxw^NUN$i8S5d7o?!A;kXHzOZ3Es@jszqtg-~kU2=fBp7+X zQ?GiUj-Y~uBvezWxC0M!HU4^Es&Xu$y@xIG<@6cUhhP$wLO08nwA{2Fa3HMHt=5@@ zF!%L1IcicgtCJ;b^G6FVm}R_~vf}BRSvvo24I_dnyC*hM0=FF*jz;r-hrNi1E@TkS z0fC9uc9c*{q}Tu)I={Hrv85ffes?k6WxFl#^0Ey+WCqYu!cnvqf!wlo1yjTlP=tIV z+gS_Kz^PNcb4VpUP>1|uH#S;+av<@>r*9t2Jve*aX=s^Mx2=|T~nJj)@TB)QNp zG_FJO#DGa6*MY)V{Ew12Z7$O&I@uOEgD8U*VhBm}sB5u!Q-%auxo9n6@8d$4#)sn{ z9qJlz!|Os2Mu>ZDMopKvvnJznCN`}4qtW5|Ln{pP--@o6+Yj$}RPxhDRX>UvVCrh+ z;KuR%uX|R!k`p`^h6Du9%2c_i2cD~Ru0lE#y&-G|oYoU((Q|$oHP=CHi>vgIoy`ux z=KbVJ?zT|i(8t+h^i7Aa3{VARHp_mKr^mvlB*C7`8#{3^hr_mgMnAyb?KD%b>~^bg zjuyAy*VO!qAt7MA><1Jku_%JS3HB|ooQaZ z&hX;ppafq7g6hmpTS?cp-m#LvORD4_#O{ZNfJ$o#{U}v-B5<|t*y(UIV)2!x7Pv}A z5VrDh&RXugLHfg~e(^c~C+-L=_Uzo}+PT~-lY4Q*OM^KbwMRXC)3yLPcE)X$HCfYr`(puG! zNjr}LJGXqlGX`-&Mkw2Y__*m_Hae4()LmU1IX31)Y%v;R6n$LbtU$uTYMa3Jb6bw1 zF!wtPhnDZB=>Jv`&;CYJ*(0epj|7wcCil0H{+r@oqNyWGyAWzZ6sXw0F{?pQigE#!0u*R$~-kB}KkX8QyB_LmzoKR+F zjesHQ0=!P`hpxIOS&L#vzS2oWQZ*2*tig`;Kr>&s$0t_yK1T2ao71Wj1`Gs~9Y zrcsx}C4Z+uJX34|)4tJlm1=&#Fdi<)6T6t|y##-s0`U;z#Y5djzHzyRGMd%%{q;Wv znGq}nB6}R*EW+<@mZ7w>le?9ZyQ!A9iMamQglt50yrl`KK1zHINYD#L$)@#? zSH*iM%nE%~>x#VozR?$l#S2~y>4FsIQyXY-#BMcC;S5G?8^YHcss7AWX`|g32_>Zu z+gM+{z*anwVQO?8=`Nnu8Bg# z9}jln;0XZ#ca!%~HNRc~j|~6+E#E)Uf3{qIV*!8wxQhQq|J{K76aHso@*nu%} zHszlN{ydoe+dy3)8p{vSMV^rQd) diff --git a/tests/data_name_to_properties/checked_compounds_properties_correct.xlsx b/tests/data_name_to_properties/checked_compounds_properties_correct.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f7d745ee69ef5595d3b006a4b755fd13c77c983d GIT binary patch literal 10552 zcmeHt1y@|z(smQv8+Uhi3GVLh?k*i9!3h#1kPZ?wXmEE67F-((?jexi?tDFS=e{$O zx!*6i_w?$sSFb)*tIpo_JXO1PX(+?O;Q|lODeE{Oyek-2%iT?XKTXH%~hugr#|QaP~$9==$}R@UNx9@bia z3tFjB@$ z<s#5+CB@AtMU@pmc*Qw8uceSvvdZS1ib8Id>ebNaofs| z=U~oeoGlSb6*e^w&~T2e$FJaan+W_Oj+d`8a@<3?a!0p;L;7Wn#HKYnA<;&SklK}X zy>K8~M*X<}b68N(;83$*m->Mbr(ISr&?xO~;W>*q{Qp&!P3T;$6K_QCEm<{pyv?ME?45UCE;&52`;EtgC z;iVOk_#cB*mz%uhvDmo6G>yI$k*T+yFA$mOy;9^n%QwDY`OIF-UZ%+_`7`=-BrumX z7w0MutzP# zX%AgI`nk&fR2tr(x0U@uWx${d)x{05uC~K-$7-u=FCl7wa~qegGntG|oQD85wah^+ zYF_MX?g{w;x}1w(qk7(xku3i&0?55(gU3T*u@uFt&oF85gyJSSF($FknVy zPO@QIz>+`wuZYvFnV{%FHS0&-7@xSC{bRM=%l9 zC3k)PF58@Ek_Ri5XC9tA=bT-q@=dj`Ks3aGz8L=Y({{rir1YN818??9^1*+??{fuqGFr{Ir>Vj{`kw7{MHB`!TyegY1G_> zfKfvl_8`{Gd|8;WUIAld?qV$gPHRaKIsx!vtBZnKL~Oyo#1hc*5ox5pAZ0A6-lBo# z;%c$9H#-?cWk0~alem&-2EzcU5teNX=cRqhOp025pCm1;ZZZp78n8hx!uWtgMUk%z zFBLI(HYXIUvtCLL-~HqII0g^>kfm?^+iHr^rm9(*t|XhOrk}NYbGM6pnGq+eWM{H2 zivc{4%^txQ^U?Y-?P||I_aW-eaWy7E@;IC^>aMd7(G@f$9N^dhdlkEnYIBn=CNVC( zjXOYN38P47BWoAV`BvUcFuucO^Zkto;SCJc24nkhq$jRi9cu<0?0ocFunD9;MV_Uh znu$2XeO$_XuQCD1;plfFvjLgNE87d;^hG>#*~Iu{v;8(t5EOM{lY%9ltyZ`1+qQ5y z*}mc8oUQT|BWr~%HDd8uAP16#{9VvKnDIE*uhH~Ag`iPIlQRyXsAVF~U5Qe)$5I&i zViv3n$@gtH*{y3lS$=L}Iz1qybxl-u_!_~-^r(J89rEIp6V5emeP9un7ig+7mjHgx zYrZo@`yEHaw9i4f8Nq%h{+2k@-Tw)QP_d-cGw469P*4y9kYJ#I_*dZgD=hvUL|~vf z9TZmo-B-Dms`3|3U74<6P6h=Y1x_a`ROP(9NUJrz&T`5FmxtEs`9G6$@C zr2o;_fX@{l#wIMzMK?=v3^v?L9G4?uc=xfhF<2z4g9`F6d3YS`{U1Ne4p1@Dy>VK^ zz=N#WxPm*^jC4d%`Jbp|&ReD;d7Z{NUvN8lPm&EoNpz%sJLLAY_I^cJeNz)9qacPSt>m4y9Qn9f4a<)fwg`ObSq#^3jh#8 zpZJ4Ay&Y_ALEap{4_v>IY36&?ghftZeP4w0v zKWg!h;3GjQjW^N@@z}366YN({;!+djR}%HB)zTVsL}Hji_0qK(mckydKWC3Jd79N1 zI;fz+i-zVWlqFE4AX@uYA{@gZSFku71g7%r*q|FV4}vB)Rcy=g$%7A4TE@c|?^IhY z6-g&P;FlJ{QZ-$1s^~HyPskne)dz(bIJ}mBsMO0MaWq;DpYA!lT#+GOp|Z*+{-!RP zgw1xA>grrR&r5jqu1E@;-zU)RF4Vd=;4b1wSK;~T6brnNbHjfRZw#f-f~D)Dp5r7# zKV7O&eVtP=MUl2@FpJBr+Wh`A)B)SG4rbI_VKH%>eSe2dXP)tg+%nq|&R!~Jt6b(? zoQaQOCm(8OYpZfTqF@|SWcbzGAfPK??nY>1qkybO*;xa+bAwpg7TOL`&tvK6bgb5{ zt9FiwW*RS%6!UQ-qDH@e+at>S0+!ZDvdy2&RS#!&B%TRpgD{)Wl~a0>CKXS8g;%X} z7=9lIMi=bV>xQB8I%M&jL%5+MymBlT~zQ@_!Ato|@YvD($GbE$>RTba3U`AE+f z`!1|sAx+khTeawkp!atA2K6_8_}wWA3?0?C<$w%ZFZ_l|O#Oriy2OG)GLRN=PLTf4sjNNpQD6-5(h- zPgpbGt*Tqh=|!9tqm=Ur?M68Jx^sD8h>gAd+)c2<^%44al#iw zRQAg+h`8*R!=r0*-oix4zIevSTCOc&>6rzCg1S$tVPHcL(j(Odhl{_2ms7o734P=$ zS6P#_l|;f4vhoZi_7FqE0CW}7lV_}VYK@}J+D@mDYjwgFiZRR8^}SxRzAZw?wy8Mt z3|+D&VTMMj&6x4h|2FNNKgKEv-<^~6i}~C<7TPsZuq>`i`A_1BZ@$yj9dE{2*uuZ+ z5f3FeZ`tyNN=2U%h;ioAD3Gf*ug5xeUkxB9s&1&Nx@LwhnyO#{=3loKF_H*K`k{R~Dk8}a9LiD$& z!=#~hcK(0^7Xh;*K^PEOQ4{huy6sBJRyR8mMr>V;JWQc<%I27yD}+4b=tIg@_xyv3 z23o5{l6YFnZEa@dUDK(su~{&Ricu*=^k61+-f-HRXtot{%^>!?(LvcHqL%2=+nWRI z7{;_#Y9&De&G;Z@>RnN%c5!^jyNlfDb>P-(9~rnI?dWkzp{U(F^0#l0y!3&l0(B26 zGLx@hA#5_fz?CM*qm0aCTLC<(?3WEzoCR$eo##AQC)SjjZ>g<}LE=2z6Owi|Z!8_O z741=m>V1=0Yb_Gm)C9k3#N#Isw@>%$>ZT=#)!v>$=K|ZvR@y!pJSSsYVurc`t|W?z zfu|}RhAc6@p1psKI6x!~(n`p#x?Bum%PJ)E;9!fBV0~kx4dNe; zyLUNXb+}#_B-^3Q|DL?0Te};kcqFu8qQGb5U=Ls6iXHk+XX%7y)wQ=y$geCgF1@sK zmv01|f7^jlW}8H>W1CHYLYjijg{ws<#Gi;F%^8Wi3|SWqIN_3EK(@V`t;bNTOeG5F zY0m~s<8QN5wTA5y=V7{Di$>g=+AJ8&-&(O?cIpws^pWX%m{X`DB5(MT1EU1RH3~j{Z-+uwOPhAKrhp?^2{`V z|2KuCeCYiHkzpjUja(xvhBS80*NF8(Ywi*?Oqz{6PBnzvqAZV@*4XwMuj$i*Iypx$ zvS%@=CH%PSm^A_1*sdnnqgYyDNze&{-iS5nBocwEK#8pcb4r(%)D8`zDAfX>@V%t97>a|?&rg^N7o99? zG6V&O1#{cg#Y4CMkCJSq3|9>$4_njRfRT$nAnw8uWIP&i9dU^xbeSgLiX;dVQtQ$w zRghZhCSIOj?+kl~LdRw_f3oh8(o`TQtB81HqLk9tYVP^gNJuYak8eDo>GBo| zr11m8C|Pdy+>$SBr62Wg^5|=@iu^87&zXe5$N;>fJpWH}fTF zhC0zHXS4qP9-B~pZCtKXq?55tkcO|}3&wv;bM`*bHQPbAiP|K;l{tS;b3hKZuC^S% z-+#|=_6^6}NCbeN33i2nuhdx=z*a=>V3kug`8}=Xm10OsOr27(d|N1)gj?~3Y9vDQ z93je~t1lTB5fnnqriwf>@>E0HrJ}Z#jEr<#7!Ym=$XwX2w(ci9B3oY`NDfa@VS!J} z@pL%!pUKoF%Jaq=cV`71O}z{$=rMdvG@-!q0Z3%B6!6qh<|j=@ONVr6?*-cwCQ}k% zZ7UmD9rw*P`#st!dW%9$m^@aM%rgp@m86PtJ?8ji+-X(`BDK){gN?*UxC_1^3vZ?N zk##O@EsobJqz+SG7A&hF#idwY)WRw`(xXN&$8WARKZsg!jXoef{ceTLy|i2mu*mgV+BZUW#z81_jog?r4% zy<;(ZiZyiqtL$QyQiqoJ3EizTD!0JWflr6wpw%1V#0Mket}1=vh6-IocL{uR^RBOp z##w5;-6V;90{F)rEMszGeIyz^4*W(E&QX^y;V+161{dBEe8xDwP=fiGPf2VCDKtd$ z|CG{a6m#Safek_6E_aBWp=FRJmBRJ)5CY;lBlv}{Pm#3Y4oA?9p$=(GnlyxAdZQ6> z6SF>kdukuL#Dj=57$}Q-2-k8?1O_IxNf)h1UM*yHwH$y zAHPzF-F*8#@gOmn(;~z1IpB1uj)CLz?fKOa`Kxu~XPCpsVybUsng+2JzccOHp=~Lt zA#4T+-y(k+$H)Y@V(wKck*-%D+liTEH21f$yzVGsMK^(wphMkpl~LnT{J~ zpjN@SIhfI*UJV%T+q3uEE{pX?iLH`lKCe7@>s_2aoda@NQ;eqBA@5D=;TnpTx+SB) z+JqdE7rr(1b!2SbmX0{ooavu&28)pihVrh}OKTsGd@15-UaT2AG@aQQExO=;4tkde z{NVrk2xsTRi4ZwU=B>h|=kPE>=NsKkoKp+x9_~sOJU=1&xO_L6g7bqGHiO>rh&&pKv5XIA++G<8Lda^~RinJh!}FNiNK&}@qLl&1riXUXK?vgOE+uV8 zTAR&x=TaU~{cjb{01)^7yy8W*2UW+*R;y7)oh{!!zn4O;-H>hNH(5Azenf}n27XN^ zf=e{OpRtG&ZLN2ehX_wMt!=ES6GpDz;qlYA{morkw?U5YMGE4`ijipq3isaAk>YD& z;;X{Ce`;QsOky~0U_8WN$DiMq3@b}0R>Hu}luj_^qcLYiovM*}p31qDX3G-*#;G3@ zRLmXYe{X3zf!0W_Q10VzllpbIZX+%(4cD3{SdFN{@tM7CqtCa454sgP%40jFdVG0l zTHlNLwTS|ni-}ps%aKiy!dBtSmqV~jM=yD!(CX98rfik z&XNH=KksN6HJpK6qzBED6WQoXO_X_sU|6QPIV!?Oex|OsmCi#R3R1d0K8}6MnuHRX zN;d{ro{2K@4qg3L6ybL-io?XPl0Aeiq2=n3GG15LvthPz*{NiXYP9i6>lB7&#wH=z zqBNHc`UHFK4-707SeT%*VYejVPG|A)~)~#@?cG5aJN})4&g168>svr;{@3wsY*ca&G~jN}$unegzG|1RZLmh45ON zydzwz+0Ff0T0_Y}868EFG~*h??ZoQOm7(Z?jfjk)8UnKgjIap4+>guOslyR!+`Jx3 zvJzBx&KeP0g{#?8R)YJ@3iOz73-fdKMDJA8`B)IEp3hJC&n(6?3p# zg`OqEBrZ@eoQE!cztb?rx}URHVu8~P9O5TeAW$$SN1PN~kR?$`PjWSgIlSyyVr+iL zF0?{qJ~$R~8z!S72^UvLPNJbA5IV=EhjtBst|0^tc zgZy1=z5i9eTyk6DL<^*-4hEhd4*P7eBD)Z%YQw^}T1p68RK>6{NWI{)DfAZFUz5K9 zH#;<;EhtlK&OoRz6$kX=ad*?w(w&pn*Lwz{OH2q%T(R#DxX$bdXH!bUP$~?mondq`ETp_=;Iwh_`q2{V=LC)v zvPn#TPMHDNA+|%_mnnytqsU_k3LydHT0kuE0K8Y$<1*>M-t3n5m_?o?$rYNFzQgo_ zaCz+Tu?&U5#iL}QJstYQUpaQuP^#fR6nIuf@DGN85?ki-ifyt6(D4FXo4r{|wyoCE zr0pGM4`p22L_a*A^;1zfyJO|r(W*Qh2h$WnTrhr_^G&z&wCN+dO2 zrq@mYzKAI8m(`PFzcyK;y^bX*wvzGg*y{1RP*Z}bb9hUAbEx_=(wb#$18=g_f^G%* z&QDO~*z+L`FM#1YVpY|>;SxS|_rOKR&sd8es}}JcRkY zS>n}bAi3JZPsZbp{VB}$p``Klg>&xHT1F)O>7i5p;XJIX-BHQI_msP0$ZrGct4+AO zXLDwACg)M8yDvE;6D5(o?oEn1|Km{riMMr%4-{esP$1#{0VD@YFIyXJke8#I{qJ~s zt*e@l0j>4v{|J`Il;}~WL|)OMVyP&MA&FYa*3;Y;RI7G!iOF)iz9Lg-t*BPKNn96w zWw+|rZzgW)wDDk7v7@vTbjo=qN?#)6;XnLvRJAslf8;2=9g0L}c<8Hgr=BtKan6VY zb4MxbE}3iWtKeL{6(0n%)DrtczLT@ndXs2ht0U3#-54(ISi!8Z+Gcu_TXJ1{M=$Y= zyH&%}kM@yOUEF5^&SL`3b-6OkLrhr0rrzD>Li`yomc8|}KaxoKf@fStXOuW!;uox3 z)N~D;hxqoZClj3!nA^)3@Y1ut`8TY?)=Ql2tUy`2}YO> zqxjYBI2a|5Pu$Cg(4JD3k4KlmL0)!^1EDZlF^N*XkyFhJO&@Hm8&m9^_jl#6wEqcY-1 zr&IRn06{PWw>!fE%Zx(5a>rnlEui}4Uf0MAMP7n0?1uN3Jxwqe8%*v>FR8Zn0}PPe z?0S*C%cu>Le4>tWcaL`AQp0r=EF9dWm|6uySyCn{3WFkW`2t(Iz$?Rar(nkTo#N0q zX?h7QhW1=UY7PsmWh&yIkBmjRT2Y<3r52hV$f(+p!E*OxfHxo3}sa)?AsN`bl`xjpxpDMo_#kb}6IbCJ;_adk9;3UhD&I(k~U5TUb^Wb(1 zg3|gw3BUZt+C&lR!FEtz#e!DoZQQLjyxcv!Ijr5iZ2xGs_+Mcj3g9@fsd6tTVZ;&q zlk`wvMv%(8NuQir7DQNv7+)&?Cio0Yu*272_eHk`X^!^MS8CarXJ9Y?B-L~HIphlF zBs!)T39hk-Mg3jdwwNQ0Z4uo}ej~{UArA*bbMxd;;3qbha(;^r00BJM>vb9mS-Sw7 z^#BHCiE@}Uyk}8Gff(rlImxitF1bhcrzV52qRf{uuPj{XsfDPXXC&@S^TUSlprI>h ztZuLl9VdJZPd=PL4d?b&+7QMO%Gw-X$c*3I78BwkT542Xw2H<$wZC?KI2X~0kNDx+ z2biU^e{Gm!w$ys$-fFGAxlnCxA)RF&WqJg6d{=3veIzMQVzE!DbiV}5k&b$YCW2Bj zIKNTej*pqJgjagTYyQO%>wBzg_WO)D*3+xUhuFKoVNXhRrbqR58d-jtY?zD0yHop_ z|6J~8!o27Yg!7wkN>$J)bIYdGQof8brF4mf|NVVB_;CZnM@(R0g3&T zL9@>`QC>=wJvbT+rM$D|RBVV^ZyF;Tx{<43-Ag~rMK-Il5S0xZHov_LL=WBe z%>Jl24}6V1oNs5(h%jKN{KUtWe{g;5-{MMWP&V@EWSn@@_y?lB_7?+9pSqg$WKs^i zX|}G<4D!ZVEYIC$dx$ahUCLRuu#)b{i-kE&9lW=&oo1)X;gz{;cf|x7t>1MJzs4v6 zRw$0cS(U5t^9CM}6cm_Sz3e3p(NM%k>-|jXJkTIKJ8jn0_qm#fP{OY!p&7IdLs8bE zc#e1iQd#CfYlj`r+aeh!J-g7~c^~Ar)^;4JKE`cE5>~Z;8Revz0Y^8`G2$bj;Y(_K znl+^B+JiF8F?J+-H8s)eJ2zdY7z93}d26Q>6m0T<{BO#HdBz3}A&>W{3`fshryqs?a*SX#Z!#KMw4E)$(fv>`yJx~(l+igxI=IV!CeBuo#4UU8Jyq}WFQdS-7UB~1b26bpursy^qu6~`<-)g z?)MAs?X|jl_3ZuB>e*dSRqd)$l!b=DhJb@WfPjD?hsbyZw6lhUfB?WkKwv>2KxvEG z**crrI_s;t+nYM+GP&6RU*y0*(PcqEf#?74_%B|8(zs5AZWffFQ>j+bcm}H)hc^gi z%Z42oG_0gAs7?qO<#Tw5mY288j75q{MGo_oi0+@iG`i5#=o(iu!pw{%0CmDN;MDo@ zkBd!ChxZ;bOz}Z#T|CIt%h1(}S9ey`E1n%;)r<+H5tBmp5!FEw244j>! zZt4xxX+VA<2|sCc=JF4B2XZbrUm(OUBqhB*{&wj{^+7mc6y2}v-C*FYge*o#oL$-7 zP7E0d(vTdhhIJ%&V5eK5Xv<|oC+lgv3|6f_kX*zI1J&>>{c9#usZg3W-kvxgovxZC z;?SVHC=T5OJOIi1!$J~8nK)(b@{DZu7z)jM51;MuI^FXS(awB7oMPD2IK34C z!0r^Z5g1s;(DQxF-{T-qXEmNXV+%MPwnN=%=NRRPAC-Vq{VXX-8Gx}Z*IV+-aTozQ> zz@2pvJD%!0pHQ%mD)f<8?r16*XTZtGe6ieX07!oQgHTPyg4eRrD8rGD!d>44*l{VA z`UUIJi%}tMK$(IA^E=zPWIuJ*wXaq!$N6x&dk;5acgeunp#LYbqBXF)`X@=$u4oor zp&=kV!F~-LyfSV;W>-5$Ya=^5>z__6Pu<2Ymk03VGxZ4H<7$=KfH~s~Oa1L-T}jb# zoo6NmUYY?8i+L)|q}B6%gtSyc;_S=$a7?1{FtSMV3^MZKTJRUN02Hl0i#-PlC9OY5phn?#oQWs&jmJ0mCl8~Y0fvIxdW1qCY-Yy?Zs|| z614?9u;tko`KM_j93*c9E>jMVA*{6JG4w06$oHL4=hvb-mZZK1ol)Y$xe6evN=kCJ zMu~*v3qo7eI^BJHf}s>y0~t6G2TVtKQ7tlX^9s08ccXjYkXCkav-tZGg+tWAmG=Nz z(Q{zo*LChMj0UZ58gt9VTTa+*m#)nZ>%9xWFu@w4)r_f1rXzEPw8&@)GJ->?@Wd<+5F;A5GhC>Xa=cyD@L zy#^~StcD{}Us}#jFH^Xf@i|mx4sq?|`>ZgEn5^}TGuF8FQ8-@d(WC}F(r807CZiTk zxK^C%G%ScjyBEiZXvvvVl(EMyyBA6UQfxF?B;f1YZ6W(iQoN;S10hDaGYl9nytvg% z-vIQg=<|WIrUYC8eRJ-p9nU)?URpz#9gUNgF3#=vDDkAg*jI9w-voL!KU8HFgb-n1 z%3aX)(BI@zjmHA+h*aO}AS@kPjnf1O)Olx(MIR*CN}zT+j1O}&x8K=fj&%)=kT)Ij z`y@~+5P~wCNPm!`g=B&RpUh6I?{+E8t*`s|YihaPwlek-uJmW%EzG&-@Kass6lGp# z`EKTy7u4fHuF>6pI#+{Lh<(8(LdUwbGV>IAO9sC?g0e99l^rG5)*k6`V|F61v&FC; z1+w6#&2j~14bF_z2&m1*Wj=5igJMbCfK$t~JH<9_z(c-}j*nbF$t{D<4oC~|v;^cY zq&7xm#;%+X!th%X-YG=w=-De&muN=$@=uJuU=mv3r{Fj;Q@vGmE?5hC5A5?|evOdG z-}!WKia@e%y8H5e^$S~TaH7Si0>>EHTaMN{uX}sXoa50W!}GXItF)B@O~+<$N7uxM zOO|^!Qa>L;MGNLAdv>&B$*t$0;ZraJ|H;u9__!k&Fnfi-JSBucfCO{&4`23IrvBTn zL4spW@ZA6IqcmpNs)q$d{1)^SFxl?F=B*gyU^c0oWRvRi!9I>h&In%Hd;8Q{nQyQo z%C?$Ss_T|Cq-ELHwd^|;8KF6ji8PXbsf($xHSB5c@F?ZS4hvaeIyEdb?84ssyOwuW z(OZQEDS{u1i~^v#SM2gpmK1}@9bPZ;d`>%!GTygd%oQ|}cZ@ASpBm?UAvBF0tn4Qg z*YF}VJ96yVx~15P<2C=|Wb_U)F5B5eAFP;PJB|98Hf=PkIuO<29^y&z%D` z3wuKUQHSD8JPF?+K|tWkflu&^f7y8_3sX~PC+1%_)}IDHEp|B~l?4!V%5Y8cY8@Wh zk18ul4Q%Zhu5%75GN=;{kBo-F2Vb|iD&kNI3^52}B zMxFVdHzDB=++a2gH2$!cE~WPB>XAT2XRekuZ7soH=hK3Z{X<4T4Qh5}W30B3f$Dq&nW1 zDBGb^JEM>x;?|}}bJ;VK9Do%Xe$l6c;iZ?hjIfs>=-t9qc;zDOoNc1?E!+4vW?vo} z`~a7;itg`6+`5cTA^MGFq4;!ccjQuJc;u!frt@#ssfmyEoi@Mk!oCEcO;Ba^?C)Y> zpI{Ut^(u)-FIN$jxt3iK=wD3sxWvo_BGaO?v_?quJRNy^s<=4lGFDu&2E3o&$wVE@ z-nbv~p6ri&(U9M*O2OQ9uzVwZd5NXr=ly9{k2rGnhSka%t6795LTvEHw})%Hkcrgv zL?%tX?L`KNmDM|fqJ)$$9eXkDP8XSLy3NAQfu0(EfF^Wq%S^^ zk32BEt_xf5C*zLMtv2zA3-zc&e!X3kL2E26I%kz6wF$|QV~o^6%7=aX?qp}Y`|JH) z|7j$;>-h6Si$}YD%k{y|^42`w-Ev{2-lP5b`TjK7v$tTY07<*g7k$^%IKFe*aQ znQy1mH^;OSdwSpW^~o+)j}N<7kEh4q9$nFT{0M1#>S6KOOvrHXQnYhZ3?(6}3Xt5^ zk~?$G(DBxM{C;2Y_KNy7$;teB20C-_-hB6+;m$*W(@9^2kM<9uM6TuIgr@^8UG9~@ zmSyIhd#Po4JZSntq1kp8$@Pa#rL2WXovRInk1Txml@lb7w34pf1;`;ypZ0>82v?MV z?Ajm;Qb;H`YdT4LBW}_H2J2#9nw$u7%Z#mUL>eUv={)2iHMN`))c4M z2KQ=1o%!46)7e?}xMMU|!4#OSH5Pl@q8fcd4oC#pNVvlbFC9S=JjxJ$iO^WZ46i$>Ay0b;!mc;?xOm;z{qA2?PJ>m>Oo-!I}v zxJlHonC_lIb?_i}#%(kYQu-j(Ns3U`DJ5vp^&y5qDlQ_{z0>6LGhU;WWqh&4Q$ocV zq;ov0%;#q^#+R0!j~nNZrXE8mS`Js2pkR=EOCX9o!ZXHzh)kW0W`#mKF>)W44Qoy; znUH7YP>yEBnEr;toMdbjy$xg`bJmzZvw{vwgf1Ks)7aL<%-&4U1Z^==4-IK@}lCF(szzf)Hi_yHgu@!Mm-2$>8SW8i?3jV1Pqv? zXShNLUm5)T0qBa%`_)m@iii)jpoT_)tGpXC_u;5HzeouV z71R5>{I}8N6sE%o&{$rp2P$oZ_p5=nB?c`alLjCQ-j#%Q5LG0^hj5$Ebn)`@GD2YW zCdZ`s>Wpg0g1?n;?V0TM!<4Zwo$6kEZ^*p)QGmtQM7m~nH{1HyYDo#oAJB>5SWs*- zCd6tVySg9AKW915ygEu2v7E@D=e_C>>3Qrn7r#NZy&R$$sRMa@b$_mBOrpDhntxR- z`eh(d0&bKvlo~~Op7ITXH5K=T1$i>n5}T4P6;8FJR_-@b(*9wf^Zw%7BB~fzYZ;bd zwWD%r-WTQ3D;9z&<toR7M-%`i0)3f@C{bON@hdL>shmjEY?{>m0-#0BdQEhq1(f8=2SJ$#4O!7qrNsO7qzHVccD}RNPvs^2~yW14?)} zH5Tgzy5d&3;@SUmJb*?fTzD^h`SrQ;48PfzKDG3wXYO5nka62Tz5SNcI<19wu(;bY>O2n}`P zU6*>cV!Fp@#CD{dD028!nnWdcJyvn?jP&CGM=d~#84+ffFRF_4#ShBM34fBd4m2RY z6wcS^8U2ME@ruKDTP@F9MpAo+M=K&}g15Mhbe%Yu=Qk0GYR`c)`wD&66|JR1Z;hFO zt!*G$8GBvYIEHg5%sC5e&(Ya&SHfYmS>49LN$tjLn0aBPX^9oJ&LH$&pN%XHKn@;4 zwhm&T0XJjBDM00nKWxar5QM1{i!@B1O)(O!;SG(G9X~;As~-Vw z5&fqP&3*xOqZu3ogbH{R>yKd6*}~Mul=;{7R~&kzIr=l|Xvh1;5Ab~ZYB9!$AR1aW zVUx?jXh9~F__MB6B8F?z3wr)$oWZI$emRztvVLnGbW3njAM~aQ9a3}T{aQtWw`H`n zRDrZ`cX%W~#@nrjar>ZVk7MDX337~>i&7j_=66tH)lrfhpR_yEeNLx5KIC<21`>=* zF(d&9be6rI+e+L-si~+DZpPT$bpF z7UWsTccn}LMMxu!mu{0tFL$v@fqJ@~M7=Zx2}g{_hskfNDi#3Y>Zorq(9PsZ+O#f* zZo=9jS?WF>y0oO*zLyw>u*df=P)}eR)v{|_%9wiPcQh})lrGbz>~v0jXN}Cpy?pG_ zra52~LKyX^rQK2SjQUrD#R;7{e{?4l0@lLYgCDmQ8A9UVlh^0_(f6-!`y)w7gdQfR z9Jx+5UNPBu{Wv!F^?78y+uG-5CiDH;Kg{+tPbTU<@q{!Ulc6T!wg~4AD$bC$+J! zg(1uB`-l1dTM|ulOjh-o?jI5{d>y)DnyJVYkhT_j)X2B}nn&hLeWojdeZhijBx$_m z#}Q6NDbrcbz;)@5l)I$e$z802A4Tp+$S^i1Pe}74bX_g!8n;D3CrUGYGuAPJBs_i` z>$RdPXTv>(?2SuRqbIsEyCa3yT)fWFQGg`(z*DT&oGL~* zWQP{ew-H3Jb6=Nxo#~#Kee{77+5moTIaS_lyuT8$5AKVQx`ddt>&q!xQh1cNylFNX zp;O&*?RE3uv+11NmJLbAqILswR>Q5~oM)L5aQF#+yrucR`~>d#M{~=o#BnW~Jy_h7 zZFhZO^R}~PtUz8ENf9C?cm94XH8HLdI<7plT|wjGL>%o|9o-2U6Yj#1uzyKpkqjDU znrNghC#C)?YO>r%6mmzDg<7QMTD=$N)dGt{MTShS3Y3q4x`l| zno&);G&o((Fs59Cnx$xXC_s_5Yo7#*32EhxO$jAP$@Y0qvB}~aIXM>_He7IN;}>TW zDpNg9ZCW`p^=d5j7VV%ia|w{5MK6KQu5LqQoREI{J*r0`A`z`3qvi+w&W~^!NEt z!eL``Im9?7J{8Ygi85AhoIu-1*T5%Um<;?%6KT$tM9WZyf$n@cWE;o-f4YgfTfVnqU5$mR z_f6B+NrW*sCZPv67JLKv{YmEy8}&~{>X$6+b**JTJgRK0a_X2u!rkVrG<$l z-+ZX0i^BdEpFo$2Kn5jV=otFsndq^=7QZ zZl`&}Dt5!}HkrG-s9qIE)uI=()ts|kO33G01q*Z4)RI7d0f7$yJD0&2s znPuiB48zUN$Yb?oJSB)p-gHlHZO=5l2SI6IP_p4{9<~WF8Sb zn4=v(ifJ7YA~=wv?IY^abVT(jN+uA*cfwq6-Fw5jD=J>-3Y4ugtPzRc3gd@!McurS z^5cT7U3d}3-^89@WsncTM_mW5&L1H8%r9xtb#pNBtXhbJhW9ItTd}mTnprwskaame zO1Y}v5@N!y=YBa@cC?^xJwW1sPpG(a>1-vICBv4AWhXz;VObv*&Cn_1qE_ZlmiAPk z!e^p=r`Zh<+Yi=NXbu^k#V_k7?QFh!7J6kh1pSoh0^e~;t!zN%LaC)@beb|LJu{kM z<4(Mn=x-Nik}gXqdi!3bb~Z?;Q+>Xkx0JnccH^X8z!eveU)EM|#16q4iRl?LC+Z({ zSPtXs&`^s%sdL&`snZsv-0>Bsh_N~n+j+}|}cVj(Vdi=0#Zypn|+hsOi+fEZ-6VNJewQ#J{ zDXVTGyLR^E(*`MT%J{@WJXbroH8vYP78J4?QaL%wTX50%Qrm$f!!b3T*r z_w0nTE=1Imo9)J`=SF0w)in!!ZSWeJo(6Ai+ z=K|XI;eb-TjkNGffMK4y3I}!L=#u{EGXI7*)VJO)WGPAo+$8NZ1egYx#82!) z_bd|GA@!dN`i;@@y$-wKB-WwH5pLUHsWM}3t>T*A1uGPYc{VhWzDUze6Vq5tgKEIm zP%)564W3y(nWukEiC=OkU2M}M_*APPbO@|`m$rzvn-WaFHqJO%W0uBB=a~{pzmgiI zOoFvILn&YDnij@!F4(sd#j@{}x9IUI*+YQsJC>Yv`6ruP79sN2QL4VWjDF7J*YSvy zNW4(BRue46`?~JZutu4{DsJo_I{w)~_?5zPqf;chys}Evv$#Fj1d>datF$(T!26L7-$F{ta>m>RSVnc^8a91Cq5^1e@y@O{*|S?pvLk?a-;U zeWJ!ui!qg1k_39VlDAGY?AlmCv)%(8rF_H^vtLJU!VMHx-OU(t-!xs07)iK0?g^(8 zk8OWi+I}iBxDYpW+)MU?jb$E+x$NGS{fPPW+C=E+@pU*h%Be*g+E$-_ZgDaOWdY?mPfku z-6Uh}qhOWd@DRWZh8ZOt%^1}H^}i%m|H|a)sM}Iv_ZrO%z9wL?^T@!3g+GaL1lJ6? z2DI1(G^}UQ{`Cl5<2Uif;spnQ5CJt1P6RqTNYVHm=Ln%0YE6)4cd6kVi**vt2$C6W zjlX7Rsew8R!>>n(uUq5q@1M^BD-jzQ8?CJ!JaCPK&4j7JM$d&3dZ*{Bc;IFIry)C6 zh~`iPFT5Eze82z~;!Ny}6&>yDotTa798G_&Jh-pqfAu%;(s{+`%67BhgHBsD?{Mqh_8>1a{T%HijK0CEchzFj*trl? zYGK2oA$kY$ZoyE8A5HRvU)wie6h*Q2red1d@QHSDmLIK zg!`ycaBzQkGK1~Lb%eJ7IIQ=X_e48tFRAb0RqRBgdwl`*%;olN!jK9Y*{j&=$X-dW zCb-`tpqN9UhD9&%yjr1JK;aU2z4pcT{g=1AXg$2W&tj3oUCv5{mhD#)J7I2+s-h={ znM&UOk^l7Q*L~h#HGBfAA>glSXlQT$KP?2S-@lHum|<||9YFk6>Opk-^?M|Q>>*YW zfgUKad|VA8^C^u+mu-TagbMSRk1O?zozUnCu8pM_cQk<8+?tD;ezuq!cFZ1cypB= z4JDVFs*QMJW}Iopj&@o}?Q{lS+u1Hc^mo8ghAoV^2ht*b7F`RcEljJ~sZv;3R?}~S zJoU!WZG?eng@7{YF&Lv#1uhQnW1_q~eIt)=C&uq`ey_j(DX9wO|GmUNs`0-|`8{#^rxZc3#{$32?`hQE1^m9n{Zqg! z$u9waZg_u({vO5u31uYz1NwVR|GR|WHh#D1G#qq literal 0 HcmV?d00001 diff --git a/tests/test_fragmenter.py b/tests/test_fragmenter.py new file mode 100644 index 0000000..76653df --- /dev/null +++ b/tests/test_fragmenter.py @@ -0,0 +1,50 @@ +import pytest +from gcms_data_analysis.fragmenter import Fragmenter + + +def test_fragmenter_simple(): + """test simple algorithm for fragmenter, + gcms_data analysi only uses the simple fragmentation + """ + algorithm = "simple" + smiles = ["CCCCO", "CCCO", "CCO", "CO"] + fragmentation_scheme = { + "CH2": "[CH2]", + "OH": "[OH]", + "CH3": "[CH3]", + "CH2-CH2": "[CH2][CH2]", + } + + checked_fragmentations_1 = { + "CCCCO": {"CH2-CH2": 1, "CH3": 1, "CH2": 1, "OH": 1}, + "CCCO": {"CH2-CH2": 1, "CH3": 1, "OH": 1}, + "CCO": {"CH3": 1, "CH2": 1, "OH": 1}, + "CO": {"CH3": 1, "OH": 1}, + } + + fragmentation_scheme_order_1 = ["CH2-CH2", "CH3", "CH2", "OH"] + + for smi in smiles: + frg = Fragmenter( + fragmentation_scheme, + fragmentation_scheme_order=fragmentation_scheme_order_1, + algorithm=algorithm, + ) + fragmentation, _, _ = frg.fragment(smi) + assert fragmentation == checked_fragmentations_1[smi] + + fragmentation_scheme_order_2 = ["CH3", "CH2", "CH2-CH2", "OH"] + checked_fragmentations_2 = { + "CCCCO": {"CH3": 1, "CH2": 3, "OH": 1}, + "CCCO": {"CH3": 1, "CH2": 2, "OH": 1}, + "CCO": {"CH3": 1, "CH2": 1, "OH": 1}, + "CO": {"CH3": 1, "OH": 1}, + } + for smi in smiles: + frg = Fragmenter( + fragmentation_scheme, + fragmentation_scheme_order=fragmentation_scheme_order_2, + algorithm=algorithm, + ) + fragmentation, _, _ = frg.fragment(smi) + assert fragmentation == checked_fragmentations_2[smi] diff --git a/tests/test_minimal_case.py b/tests/test_minimal_case.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_name_to_properties.py b/tests/test_name_to_properties.py new file mode 100644 index 0000000..d9c92ce --- /dev/null +++ b/tests/test_name_to_properties.py @@ -0,0 +1,98 @@ +import pytest +from gcms_data_analysis import name_to_properties +from pandas.testing import assert_frame_equal +import pandas as pd +import numpy as np + + +def test_name_to_properties_wrong_input(dicts_classifications_codes_fractions): + dict_class_to_code, dict_class_to_mass_fraction = ( + dicts_classifications_codes_fractions + ) + compounds = [ + "wrong_name", # test for legit string that gives no pcp result + " ", # wrong entry or datatype + None, + False, + np.nan, + ] + for compound in compounds: + to_check = name_to_properties( + compound, dict_class_to_code, dict_class_to_mass_fraction, None + ) + assert to_check is None + + +@pytest.mark.parametrize( + "compound", + [ + "2-methylcyclopent-2-en-1-one", # Comment: small ketone + "hexadecanoic acid", # Comment: another compound + "n-hexadecanoic acid", # Comment: different names, same compounds + "phenol", # Comment: a ring structure + "phenol", # Comment: repeated compound to test idempotency + "2,4,5-trichlorophenol", # Comment: chlorine, unidentified + "phenoxytrimethylsilane", # Comment: silane, not listed in fg + "bromophenol", # Comment: Br not listed + "9-octadecenoic acid, 1,2,3-propanetriyl ester, (e,e,e)-", # Comment: large compound + ], +) +def test_name_to_properties_single_compounds( + compound, dicts_classifications_codes_fractions, checked_compounds_properties +): + dict_class_to_code, dict_class_to_mass_fraction = ( + dicts_classifications_codes_fractions + ) + + to_check = name_to_properties( + compound, dict_class_to_code, dict_class_to_mass_fraction, None + ) + to_check = to_check.loc[[compound], :] + to_check = to_check.loc[:, (to_check != 0).any(axis=0)] + checked = checked_compounds_properties.loc[[compound], :] + checked = checked.loc[:, (checked != 0).any(axis=0)] + assert_frame_equal( + to_check, + checked, + check_exact=False, + atol=1e-3, + rtol=1e-3, + ) + + +def test_name_to_properties_all_compounds( + dicts_classifications_codes_fractions, checked_compounds_properties +): + dict_class_to_code, dict_class_to_mass_fraction = ( + dicts_classifications_codes_fractions + ) + + compounds = [ + "2-methylcyclopent-2-en-1-one", # Comment: small ketone + "hexadecanoic acid", # Comment: another compound + "n-hexadecanoic acid", # Comment: different names, same compounds + "phenol", # Comment: a ring structure + "phenol", # Comment: repeated compound to test idempotency + "2,4,5-trichlorophenol", # Comment: chlorine, unidentified + "phenoxytrimethylsilane", # Comment: silane, not listed in fg + "bromophenol", # Comment: Br not listed + "9-octadecenoic acid, 1,2,3-propanetriyl ester, (e,e,e)-", # Comment: large compound + "wrong_name", # test for legit string that gives no pcp result + " ", # wrong entry or datatype + None, + False, + np.nan, + ] + to_check = pd.DataFrame() + for compound in compounds: + to_check = name_to_properties( + compound, dict_class_to_code, dict_class_to_mass_fraction, to_check + ) + checked = checked_compounds_properties + assert_frame_equal( + to_check, + checked, + check_exact=False, + atol=1e-3, + rtol=1e-3, + ) From 3646b50f072176d309fbf9d37be78e167bdda74e Mon Sep 17 00:00:00 2001 From: mpecchi Date: Tue, 26 Mar 2024 16:02:12 -0400 Subject: [PATCH 4/7] minimal_set tests for computations first commit --- .../example_name_to_properties.py | 223 +---------- scripts/utils.py | 3 + ...sis.py => AAAA_test_gcms_data_analysis.py} | 0 ...ties.py => ZZZ_test_name_to_properties.py} | 0 tests/conftest.py | 362 +++++++++++++----- .../S_1.txt | 0 .../S_2.txt | 0 .../T_1.txt | 0 .../T_2.txt | 0 .../T_3.txt | 0 .../cal_minimal.xlsx | Bin .../classifications_codes_fractions.xlsx | Bin 11719 -> 11780 bytes tests/data_minimal_case/files_info.xlsx | Bin 0 -> 9100 bytes ...xlsx => checked_compounds_properties.xlsx} | Bin tests/example.py | 37 ++ tests/minimal_set/files_info.xlsx | Bin 9014 -> 0 bytes tests/test_minimal_case.py | 70 ++++ 17 files changed, 372 insertions(+), 323 deletions(-) rename tests/{test_gcms_data_analysis.py => AAAA_test_gcms_data_analysis.py} (100%) rename tests/{test_name_to_properties.py => ZZZ_test_name_to_properties.py} (100%) rename tests/{minimal_set => data_minimal_case}/S_1.txt (100%) rename tests/{minimal_set => data_minimal_case}/S_2.txt (100%) rename tests/{minimal_set => data_minimal_case}/T_1.txt (100%) rename tests/{minimal_set => data_minimal_case}/T_2.txt (100%) rename tests/{minimal_set => data_minimal_case}/T_3.txt (100%) rename tests/{minimal_set => data_minimal_case}/cal_minimal.xlsx (100%) rename tests/{minimal_set => data_minimal_case}/classifications_codes_fractions.xlsx (50%) create mode 100644 tests/data_minimal_case/files_info.xlsx rename tests/data_name_to_properties/{checked_compounds_properties_correct.xlsx => checked_compounds_properties.xlsx} (100%) create mode 100644 tests/example.py delete mode 100644 tests/minimal_set/files_info.xlsx diff --git a/example/name_to_properties/example_name_to_properties.py b/example/name_to_properties/example_name_to_properties.py index 48f6ee6..31b2b10 100644 --- a/example/name_to_properties/example_name_to_properties.py +++ b/example/name_to_properties/example_name_to_properties.py @@ -11,209 +11,6 @@ from gcms_data_analysis import name_to_properties -# def get_compound_from_pubchempy(comp_name: str) -> pcp.Compound: -# if not isinstance(comp_name, str): -# return None -# if comp_name == " " or comp_name == "": -# return None -# cond = True -# while cond: # to deal with HTML issues on server sides (timeouts) -# try: -# # comp contains all info about the chemical from pubchem -# try: -# comp_inside_list = pcp.get_compounds(comp_name, "name") -# except ValueError: -# print(f"{comp_name = }") -# return None -# if comp_inside_list: -# comp = comp_inside_list[0] -# else: -# print( -# f"WARNING: name_to_properties {comp_name=} does not find an entry in pcp", -# ) -# return None -# cond = False -# except pcp.PubChemHTTPError: # timeout error, simply try again -# print("Caught: pcp.PubChemHTTPError (keep trying)") -# return comp - - -# def _order_columns_in_compounds_properties( -# unsorted_df: pd.DataFrame | None, -# ) -> pd.DataFrame | None: -# if unsorted_df is None: -# return None - -# # Define a custom sort key function -# def sort_key(col): -# if col.startswith("el_mf"): -# return (2, col) -# elif col.startswith("el_"): -# return (1, col) -# elif col.startswith("fg_mf_unclassified"): -# return (5, col) -# elif col.startswith("fg_mf"): -# return (4, col) -# elif col.startswith("fg_"): -# return (3, col) -# else: -# return (0, col) - -# # Sort columns using the custom key -# sorted_columns = sorted(unsorted_df.columns, key=sort_key) -# sorted_df = unsorted_df.reindex(sorted_columns, axis=1) -# sorted_df.index.name = "comp_name" -# # Reindex the DataFrame with the sorted columns -# return sorted_df - - -# def name_to_properties2( -# comp_name: str, -# dict_classes_to_codes: dict[str:str], -# dict_classes_to_mass_fractions: dict[str:float], -# df: pd.DataFrame | None = None, -# precision_sum_elements: float = 0.05, -# precision_sum_functional_group: float = 0.05, -# ) -> pd.DataFrame | None: -# """ -# used to retrieve chemical properties of the compound indicated by the -# comp_name and to store those properties in the df - -# Parameters -# ---------- -# GCname : str -# name from GC, used as a unique key. -# search_name : str -# name to be used to search on pubchem. -# df : pd.DataFrame -# that contains all searched compounds. -# df_class_code_frac : pd.DataFrame -# contains the list of functional group names, codes to be searched -# and the weight fraction of each one to automatically calculate the -# mass fraction of each compounds for each functional group. -# Classes are given as smarts and are looked into the smiles of the comp. - -# Returns -# ------- -# df : pd.DataFrame -# updated dataframe with the searched compound. -# CompNotFound : str -# if GCname did not yield anything CompNotFound=GCname. - -# """ -# # classes used to split compounds into functional groups -# comp = get_compound_from_pubchempy(comp_name) - -# if comp is None: -# if not isinstance(comp_name, str): -# return df -# else: -# if not comp_name or comp_name.isspace(): -# return df -# else: -# if df is not None: -# df.loc[comp_name, "iupac_name"] = "unidentified" -# return df -# if df is None: -# df = pd.DataFrame(dtype=float) -# try: -# df.loc[comp_name, "iupac_name"] = comp.iupac_name.lower() -# except AttributeError: # iupac_name not give -# df.loc[comp_name, "iupac_name"] = comp_name.lower() -# df.loc[comp_name, "molecular_formula"] = comp.molecular_formula -# df.loc[comp_name, "canonical_smiles"] = comp.canonical_smiles -# df.loc[comp_name, "molecular_weight"] = float(comp.molecular_weight) - -# try: -# df.loc[comp_name, "xlogp"] = float(comp.xlogp) -# except ( -# TypeError -# ): # float() argument must be a string or a real number, not 'NoneType' -# df.loc[comp_name, "xlogp"] = np.nan -# elements = set(comp.to_dict()["elements"]) -# el_dict = {} -# el_mf_dict = {} - -# for el in elements: -# el_count = comp.to_dict()["elements"].count(el) -# el_mass = ele.element_from_symbol(el).mass - -# # Using similar logic as in the fg_dict example -# if el not in el_dict: -# el_dict[el] = 0 -# el_mf_dict[el] = 0.0 - -# el_dict[el] += int(el_count) -# el_mf_dict[el] += ( -# float(el_count) * float(el_mass) / float(comp.molecular_weight) -# ) -# # Now, update the DataFrame in a similar way to the fg_dict example -# for key, value in el_dict.items(): -# df.at[comp_name, f"el_{key}"] = int(value) - -# for key, value in el_mf_dict.items(): -# df.at[comp_name, f"el_{key}"] = float(value) -# cols_el_mf = [col for col in df.columns if col.startswith("el_mf")] -# residual_els = df.loc[comp_name, cols_el_mf].sum() - 1 -# # check element sum -# try: -# assert residual_els <= precision_sum_elements -# except AssertionError: -# raise AssertionError( -# f"the total mass fraction of elements in {comp_name =} is > 0.001" -# ) -# # apply fragmentation using the Fragmenter class (thanks simonmb) -# frg = Fragmenter( -# dict_classes_to_codes, -# fragmentation_scheme_order=dict_classes_to_codes.keys(), -# algorithm="simple", -# ) -# fragmentation, _, _ = frg.fragment(comp.canonical_smiles) -# fg_dict = {} -# fg_mf_dict = {} -# # Iterate over each item in the dictionary -# for key, value in fragmentation.items(): -# # Determine the root key (the part before an underscore, if present) -# root_key = key.split("_")[0] -# # if root_key in hetero_atoms: -# # pass -# # Check if the root key is in the sum_dict; if not, initialize it -# if root_key not in fg_dict: -# fg_dict[root_key] = 0 -# fg_mf_dict[root_key] = 0 -# # Add the value to the corresponding root key in the sum_dict -# fg_dict[root_key] += int(fragmentation[key]) -# fg_mf_dict[root_key] += ( -# float(fragmentation[key]) -# * float(dict_classes_to_mass_fractions[key]) -# / df.loc[comp_name, "molecular_weight"].astype(float) -# ) # mass fraction of total - -# # Update df with fg_dict -# for key, value in fg_dict.items(): -# df.at[comp_name, f"fg_{key}"] = int(value) # Update the cell -# # Update df with fg_mf_dict -# for key, value in fg_mf_dict.items(): -# df.at[comp_name, f"fg_mf_{key}"] = float(value) # Update the cell -# cols_fg_mf = [col for col in df.columns if col.startswith("fg_mf")] -# residual_fgs = df.loc[comp_name, cols_fg_mf].sum() - 1 -# try: -# assert residual_fgs <= precision_sum_functional_group -# except AssertionError: -# print(f"{df.loc[comp_name, cols_fg_mf].sum()=}") -# raise AssertionError( -# f"the total mass fraction of functional groups in {comp_name =} is > 0.05" -# ) -# if residual_fgs < -precision_sum_functional_group: -# df.at[comp_name, f"fg_mf_unclassified"] = abs(residual_fgs) -# df.loc[df["iupac_name"] != "unidentified"] = df.loc[ -# df["iupac_name"] != "unidentified" -# ].fillna(0) -# df = _order_columns_in_compounds_properties(df) - -# return df - - folder_path = plib.Path( r"C:\Users\mp933\OneDrive - Cornell University\Python\gcms_data_analysis\tests\data_name_to_properties" ) @@ -269,18 +66,7 @@ compound, dict_cl_to_codes, dict_cl_to_mass_fractions, None ) list_of_compound_properties.append(n2p) - if n2p is not None: - to_check = n2p.loc[[compound], :] - to_check = to_check.loc[:, (to_check != 0).any(axis=0)] - checked = checked_compounds_properties.loc[[compound], :] - checked = checked.loc[:, (checked != 0).any(axis=0)] - pd.testing.assert_frame_equal( - to_check, - checked, - check_exact=False, - atol=1e-5, - rtol=1e-5, - ) + # %% to_check = pd.DataFrame() for compound in compounds: @@ -291,12 +77,5 @@ dict_cl_to_mass_fractions, to_check, ) -pd.testing.assert_frame_equal( - to_check, - checked_compounds_properties, - check_exact=False, - atol=1e-5, - rtol=1e-5, -) # %% diff --git a/scripts/utils.py b/scripts/utils.py index beae60e..12b08d9 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -13,6 +13,9 @@ # %% +from collections.abc import Iterable + + def print_checked_df_to_script_text_with_arrays(df): # Convert the DataFrame to a dictionary with 'list' orientation df_dict = df.to_dict(orient="list") diff --git a/tests/test_gcms_data_analysis.py b/tests/AAAA_test_gcms_data_analysis.py similarity index 100% rename from tests/test_gcms_data_analysis.py rename to tests/AAAA_test_gcms_data_analysis.py diff --git a/tests/test_name_to_properties.py b/tests/ZZZ_test_name_to_properties.py similarity index 100% rename from tests/test_name_to_properties.py rename to tests/ZZZ_test_name_to_properties.py diff --git a/tests/conftest.py b/tests/conftest.py index 6a45435..e8f227b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -40,54 +40,60 @@ def checked_compounds_properties(): properties = pd.read_excel( plib.Path( name_to_properties_dir, - "checked_compounds_properties_correct.xlsx", + "checked_compounds_properties.xlsx", ), index_col="comp_name", ) return properties -# Project class testing +# test minimal_case +minimal_case_dir = test_dir / "data_minimal_case" + + @pytest.fixture def gcms() -> Project: - - folder_path: plib.Path = plib.Path( - plib.Path(__file__).parent.parent, "tests/data_for_testing/" - ) - Project.set_folder_path(folder_path) - Project.auto_save_to_excel(False) + Project.set_folder_path(minimal_case_dir) + Project.set_auto_save_to_excel(False) return Project() -# fmt: off @pytest.fixture def checked_files_info(): files_info = pd.DataFrame( - index=pd.Index(['A_1', 'A_2', 'Ader_1', 'Ader_2', 'B_1', 'B_2'], name='filename'), + index=pd.Index(["S_1", "S_2", "T_1", "T_2", "T_3"], name="filename"), data={ - 'samplename': ['A', 'A', 'Ader', 'Ader', 'B', 'B'], - 'derivatized': [False, False, True, True, False, False], - 'dilution_factor': [25, 25, 125, 125, 1, 1], - 'total_sample_conc_in_vial_mg_L': [560.0000000000001, 560.0000000000001, 112.0, 112.0, 2800.0, 2800.0], - 'sample_yield_on_feedstock_basis_fr': [0.45, 0.46, 0.47, 0.48, 0.49, 0.5], - 'calibration_file': ['calibration', 'calibration', 'deriv_calibration', 'deriv_calibration', 'calibration', 'calibration'], - } + "samplename": ["S", "S", "T", "T", "T"], + "replicate_number": [1, 2, 1, 2, 3], + "derivatized": [False, False, False, False, False], + "calibration_file": [ + "cal_minimal", + "cal_minimal", + "cal_minimal", + "cal_minimal", + "cal_minimal", + ], + "dilution_factor": [1, 1, 1, 1, 1], + "total_sample_conc_in_vial_mg_L": [1, 1, 1, 1, 1], + "sample_yield_on_feedstock_basis_fr": [1, 1, 1, 1, 1], + }, ) return files_info + @pytest.fixture def checked_created_files_info(): created_files_info = pd.DataFrame( - index=pd.Index(['A_1', 'A_2', 'Ader_1', 'Ader_2', 'B_1', 'B_2'], name='filename'), + index=pd.Index(["S_1", "S_2", "T_1", "T_2", "T_3"], name="filename"), data={ - 'samplename': ['A', 'A', 'Ader', 'Ader', 'B', 'B'], - 'replicate_number': ['1', '2', '1', '2', '1', '2'], - 'derivatized': [False, False, False, False, False, False], - 'calibration_file': [False, False, False, False, False, False], - 'dilution_factor': [1, 1, 1, 1, 1, 1], - 'total_sample_conc_in_vial_mg_L': [1, 1, 1, 1, 1, 1], - 'sample_yield_on_feedstock_basis_fr': [1, 1, 1, 1, 1, 1], - } + "samplename": ["S", "S", "T", "T", "T"], + "replicate_number": ["1", "2", "1", "2", "3"], + "derivatized": [False, False, False, False, False], + "calibration_file": [False, False, False, False, False], + "dilution_factor": [1, 1, 1, 1, 1], + "total_sample_conc_in_vial_mg_L": [1, 1, 1, 1, 1], + "sample_yield_on_feedstock_basis_fr": [1, 1, 1, 1, 1], + }, ) return created_files_info @@ -95,79 +101,81 @@ def checked_created_files_info(): @pytest.fixture def checked_files(): files = { - 'A_1': pd.DataFrame( - index=pd.Index(['unidentified', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A_1'), - data={ - 'iupac_name': ['n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.'], - 'retention_time': [6.025, 36.163, 40.052, 40.492, 43.847, 43.986], - 'area': [23386, 44389, 15068, 1878180, 1456119, 6379752], - 'height': [24797, 15019, 5705, 493759, 339605, 1147599], - 'area_if_undiluted': [584650, 1109725, 376700, 46954500, 36402975, 159493800], - }), - 'A_2': pd.DataFrame( - index=pd.Index(['unidentified', 'n-decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A_2'), + "S_1": pd.DataFrame( + index=pd.Index(["phenol", "naphthalene", "dodecane"], name="S_1"), data={ - 'iupac_name': ['n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.'], - 'retention_time': [6.025, 26.284, 36.158, 40.041, 40.494, 43.847, 43.988], - 'area': [25493, 10952, 50650, 21294, 1656756, 1371069, 6394708], - 'height': [25716, 4259, 14520, 6739, 461942, 324690, 1138647], - 'area_if_undiluted': [637325, 273800, 1266250, 532350, 41418900, 34276725, 159867700], - }), - 'Ader_1': pd.DataFrame( - index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader_1'), + "iupac_name": ["n.a.", "n.a.", "n.a."], + "retention_time": [13.703, 20.942, 21.426], + "area": [20, 200, 2000], + "height": [20, 200, 2000], + "area_if_undiluted": [20, 200, 2000], + }, + ), + "S_2": pd.DataFrame( + index=pd.Index(["phenol", "naphthalene", "dodecane"], name="S_2"), data={ - 'iupac_name': ['n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.'], - 'retention_time': [6.027, 38.123, 41.729, 42.157, 45.253, 45.369], - 'area': [16741, 49508, 27798, 1415205, 519476, 1724814], - 'height': [13451, 18415, 9132, 484890, 180850, 501749], - 'area_if_undiluted': [2092625, 6188500, 3474750, 176900625, 64934500, 215601750], - }), - 'Ader_2': pd.DataFrame( - index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader_2'), + "iupac_name": ["n.a.", "n.a.", "n.a."], + "retention_time": [13.703, 20.942, 21.426], + "area": [40, 400, 4000], + "height": [40, 400, 4000], + "area_if_undiluted": [40, 400, 4000], + }, + ), + "T_1": pd.DataFrame( + index=pd.Index(["phenol", "naphthalene", "dodecane"], name="T_1"), data={ - 'iupac_name': ['n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.'], - 'retention_time': [6.027, 38.125, 41.744, 42.161, 45.258, 45.37], - 'area': [14698, 53613, 25213, 1402990, 605137, 1956560], - 'height': [12802, 18373, 8775, 496504, 202599, 594688], - 'area_if_undiluted': [1837250, 6701625, 3151625, 175373750, 75642125, 244570000], - }), - 'B_1': pd.DataFrame( - index=pd.Index(['2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', '1-hexene, 4,5-dimethyl-', 'phenol'], name='B_1'), + "iupac_name": ["n.a.", "n.a.", "n.a."], + "retention_time": [13.703, 20.942, 21.426], + "area": [20, 50, 500], + "height": [20, 50, 500], + "area_if_undiluted": [20, 50, 500], + }, + ), + "T_2": pd.DataFrame( + index=pd.Index(["phenol", "naphthalene", "dodecane"], name="T_2"), data={ - 'iupac_name': ['n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.'], - 'retention_time': [8.527, 10.507, 11.071, 11.486, 12.214, 13.687], - 'area': [147566, 69223, 40376, 441077, 19522, 200947], - 'height': [39393, 18515, 12132, 112797, 7194, 64421], - 'area_if_undiluted': [147566, 69223, 40376, 441077, 19522, 200947], - }), - 'B_2': pd.DataFrame( - index=pd.Index(['2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', 'phenol'], name='B_2'), + "iupac_name": ["n.a.", "n.a.", "n.a."], + "retention_time": [13.703, 20.942, 21.426], + "area": [10, 100, 1000], + "height": [10, 1000, 1000], + "area_if_undiluted": [10, 100, 1000], + }, + ), + "T_3": pd.DataFrame( + index=pd.Index(["phenol", "naphthalene", "dodecane"], name="T_3"), data={ - 'iupac_name': ['n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.'], - 'retention_time': [8.502, 10.474, 11.027, 11.456, 13.661], - 'area': [181021, 64531, 35791, 472362, 228750], - 'height': [44551, 19823, 12737, 120142, 75153], - 'area_if_undiluted': [181021, 64531, 35791, 472362, 228750], - }) + "iupac_name": ["n.a.", "n.a.", "n.a."], + "retention_time": [13.703, 20.942, 21.426], + "area": [0, 150, 1500], + "height": [0, 150, 1500], + "area_if_undiluted": [0, 150, 1500], + }, + ), } return files + @pytest.fixture def checked_is_files_deriv(): is_files_deriv = { - 'A_1': False, 'A_2': False, 'Ader_1': True, - 'Ader_2': True, 'B_1': False, 'B_2': False + "S_1": False, + "S_2": False, + "T_1": False, + "T_2": False, + "T_3": False, } return is_files_deriv + +# fmt: off @pytest.fixture def checked_load_class_code_fractions(): class_code_fractions = pd.DataFrame( - index=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79], + index=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80], data={ - 'classes': ['ester', 'ester_1', 'ester_2', 'ester_3', 'ester_4', 'ester_5', 'ester_6', 'carboxyl', 'ketone', 'ketone_1', 'ketone_2', 'ketone_3', 'ketone_4', 'ketone_5', 'ketone_6', 'ketone_7', 'ketone_8', 'ketone_9', 'ketone_10', 'ketone_11', 'ketone_12', 'ketone_13', 'ketone_14', 'ketone_15', 'ketone_16', 'ketone_17', 'ketone_18', 'ketone_19', 'ketone_20', 'ketone_21', 'ketone_22', 'ketone_23', 'ketone_24', 'ketone_25', 'ketone_26', 'ketone_27', 'aldehyde', 'ether', 'ether_1', 'ether_2', 'ether_3', 'ether_4', 'ether_5', 'ether_6', 'ether_7', 'ether_8', 'ether_9', 'ether_10', 'ether_11', 'ether_12', 'ether_13', 'ether_14', 'ether_15', 'ether_16', 'ether_17', 'ether_18', 'ether_19', 'ether_20', 'ether_21', 'ether_22', 'ether_23', 'ether_24', 'ether_25', 'ether_26', 'ether_27', 'alcohol', 'C-aliph', 'C-aliph_1', 'C-aliph_2', 'C-aliph_3', 'C-arom', 'C-arom_1', 'C-arom_2', 'N-aliph', 'N-aliph_1', 'N-aliph_3', 'N-arom', 'N-arom_2', 'O-arom', 'O-aliph'], - 'codes': ['[CH0](=O)O[CH3]', '[CH0](=O)O[CH2]', '[CH0](=O)O[CH1]', '[CH0](=O)O[C]', '[CH0](=O)O[cH2]', '[CH0](=O)O[cH1]', '[CH0](=O)O[c]', '[CH0](=O)O', '[CH3]C(=O)[CH3]', '[CH3]C(=O)[CH2]', '[CH3]C(=O)[CH]', '[CH3]C(=O)[C]', '[CH3]C(=O)[cH2]', '[CH3]C(=O)[cH]', '[CH3]C(=O)[c]', '[CH2]C(=O)[CH2]', '[CH2]C(=O)[CH]', '[CH2]C(=O)[C]', '[CH2]C(=O)[cH2]', '[CH2]C(=O)[cH]', '[CH2]C(=O)[c]', '[CH]C(=O)[CH]', '[CH]C(=O)[C]', '[CH]C(=O)[cH2]', '[CH]C(=O)[cH]', '[CH]C(=O)[c]', '[C]C(=O)[C]', '[C]C(=O)[cH2]', '[C]C(=O)[cH]', '[C]C(=O)[c]', '[cH2]C(=O)[cH2]', '[cH2]C(=O)[cH]', '[cH2]C(=O)[c]', '[cH]C(=O)[cH]', '[cH]C(=O)[c]', '[c]C(=O)[c]', '[CH]=O', '[CH3]O[CH3]', '[CH3]O[CH2]', '[CH3]O[CH]', '[CH3]O[C]', '[CH3]O[cH2]', '[CH3]O[cH]', '[CH3]O[c]', '[CH2]O[CH2]', '[CH2]O[CH]', '[CH2]O[C]', '[CH2]O[cH2]', '[CH2]O[cH]', '[CH2]O[c]', '[CH]O[CH]', '[CH]O[C]', '[CH]O[cH2]', '[CH]O[cH]', '[CH]O[c]', '[C]O[C]', '[C]O[cH2]', '[C]O[cH]', '[C]O[c]', '[cH2]O[cH2]', '[cH2]O[cH]', '[cH2]O[c]', '[cH]O[cH]', '[cH]O[c]', '[c]O[c]', '[OH1]', '[CH3]', '[CH2]', '[CH1]', '[C]', '[cH2]', '[cH1]', '[c]', '[NH2]', '[NH1]', '[NH0]', '[nH1]', '[n]', '[o]', '[O]'], - 'mfs': [59.044, 58.035999999999994, 57.028, 56.019999999999996, 58.035999999999994, 57.028, 56.019999999999996, 45.017, 58.080000000000005, 57.072, 56.06400000000001, 55.056000000000004, 57.072, 56.06400000000001, 55.056000000000004, 56.06400000000001, 55.056000000000004, 57.072, 56.06400000000001, 55.056000000000004, 54.048, 54.048, 53.040000000000006, 55.056000000000004, 54.048, 53.040000000000006, 52.032000000000004, 54.048, 53.040000000000006, 52.032000000000004, 56.06400000000001, 55.056000000000004, 54.048, 54.048, 53.040000000000006, 52.032000000000004, 29.017999999999997, 46.069, 45.061, 44.053, 43.045, 45.061, 44.053, 43.045, 44.053, 43.045, 45.061, 44.053, 43.045, 42.037, 42.037, 41.029, 43.045, 42.037, 41.029, 40.021, 42.037, 41.029, 40.021, 44.053, 43.045, 42.037, 42.037, 41.029, 40.021, 17.007, 15.035, 14.027, 13.018999999999998, 12.011, 14.027, 13.018999999999998, 12.011, 16.023, 15.015, 14.007, 15.015, 14.007, 15.999, 15.999], + 'classes': ['ester', 'ester_1', 'ester_2', 'ester_3', 'ester_4', 'ester_5', 'ester_6', 'carboxyl', 'ketone', 'ketone_1', 'ketone_2', 'ketone_3', 'ketone_4', 'ketone_5', 'ketone_6', 'ketone_7', 'ketone_8', 'ketone_9', 'ketone_10', 'ketone_11', 'ketone_12', 'ketone_13', 'ketone_14', 'ketone_15', 'ketone_16', 'ketone_17', 'ketone_18', 'ketone_19', 'ketone_20', 'ketone_21', 'ketone_22', 'ketone_23', 'ketone_24', 'ketone_25', 'ketone_26', 'ketone_27', 'aldehyde', 'ether', 'ether_1', 'ether_2', 'ether_3', 'ether_4', 'ether_5', 'ether_6', 'ether_7', 'ether_8', 'ether_9', 'ether_10', 'ether_11', 'ether_12', 'ether_13', 'ether_14', 'ether_15', 'ether_16', 'ether_17', 'ether_18', 'ether_19', 'ether_20', 'ether_21', 'ether_22', 'ether_23', 'ether_24', 'ether_25', 'ether_26', 'ether_27', 'alcohol', 'C-aliph', 'C-aliph_1', 'C-aliph_2', 'C-aliph_3', 'C-arom', 'C-arom_1', 'C-arom_2', 'N-aliph', 'N-aliph_1', 'N-aliph_3', 'N-arom', 'N-arom_2', 'O-arom', 'O-aliph', 'Cl'], + 'codes': ['[CH0](=O)O[CH3]', '[CH0](=O)O[CH2]', '[CH0](=O)O[CH1]', '[CH0](=O)O[C]', '[CH0](=O)O[cH2]', '[CH0](=O)O[cH1]', '[CH0](=O)O[c]', '[CH0](=O)O', '[CH3]C(=O)[CH3]', '[CH3]C(=O)[CH2]', '[CH3]C(=O)[CH]', '[CH3]C(=O)[C]', '[CH3]C(=O)[cH2]', '[CH3]C(=O)[cH]', '[CH3]C(=O)[c]', '[CH2]C(=O)[CH2]', '[CH2]C(=O)[CH]', '[CH2]C(=O)[C]', '[CH2]C(=O)[cH2]', '[CH2]C(=O)[cH]', '[CH2]C(=O)[c]', '[CH]C(=O)[CH]', '[CH]C(=O)[C]', '[CH]C(=O)[cH2]', '[CH]C(=O)[cH]', '[CH]C(=O)[c]', '[C]C(=O)[C]', '[C]C(=O)[cH2]', '[C]C(=O)[cH]', '[C]C(=O)[c]', '[cH2]C(=O)[cH2]', '[cH2]C(=O)[cH]', '[cH2]C(=O)[c]', '[cH]C(=O)[cH]', '[cH]C(=O)[c]', '[c]C(=O)[c]', '[CH]=O', '[CH3]O[CH3]', '[CH3]O[CH2]', '[CH3]O[CH]', '[CH3]O[C]', '[CH3]O[cH2]', '[CH3]O[cH]', '[CH3]O[c]', '[CH2]O[CH2]', '[CH2]O[CH]', '[CH2]O[C]', '[CH2]O[cH2]', '[CH2]O[cH]', '[CH2]O[c]', '[CH]O[CH]', '[CH]O[C]', '[CH]O[cH2]', '[CH]O[cH]', '[CH]O[c]', '[C]O[C]', '[C]O[cH2]', '[C]O[cH]', '[C]O[c]', '[cH2]O[cH2]', '[cH2]O[cH]', '[cH2]O[c]', '[cH]O[cH]', '[cH]O[c]', '[c]O[c]', '[OH1]', '[CH3]', '[CH2]', '[CH1]', '[C]', '[cH2]', '[cH1]', '[c]', '[NH2]', '[NH1]', '[NH0]', '[nH1]', '[n]', '[o]', '[O]', '[Cl]'], + 'mfs': [59.044, 58.035999999999994, 57.028, 56.019999999999996, 58.035999999999994, 57.028, 56.019999999999996, 45.017, 58.080000000000005, 57.072, 56.06400000000001, 55.056000000000004, 57.072, 56.06400000000001, 55.056000000000004, 56.06400000000001, 55.056000000000004, 57.072, 56.06400000000001, 55.056000000000004, 54.048, 54.048, 53.040000000000006, 55.056000000000004, 54.048, 53.040000000000006, 52.032000000000004, 54.048, 53.040000000000006, 52.032000000000004, 56.06400000000001, 55.056000000000004, 54.048, 54.048, 53.040000000000006, 52.032000000000004, 29.017999999999997, 46.069, 45.061, 44.053, 43.045, 45.061, 44.053, 43.045, 44.053, 43.045, 45.061, 44.053, 43.045, 42.037, 42.037, 41.029, 43.045, 42.037, 41.029, 40.021, 42.037, 41.029, 40.021, 44.053, 43.045, 42.037, 42.037, 41.029, 40.021, 17.007, 15.035, 14.027, 13.018999999999998, 12.011, 14.027, 13.018999999999998, 12.011, 16.023, 15.015, 14.007, 15.015, 14.007, 15.999, 15.999, 35.45], } ) return class_code_fractions @@ -193,29 +201,181 @@ def checked_load_calibrations(): 'Area 5': [np.nan, np.nan, np.nan, 2957268.0, 3164919.0, 741540.0, 5345977.0], 'Area 6': [np.nan, np.nan, np.nan, 11730886.0, 12451729.0, 3975200.0, 19779576.0], } - ), - 'deriv_calibration': pd.DataFrame( - index=pd.Index(['benzoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '9-octadecenoic acid, (e)-', 'phenol', '4-oxopentanoic acid', 'benzene-1,2-diol'], name='comp_name'), - data={ - 'iupac_name': ['n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.'], - 'MW': [122.1213, 256.4241, 280.4455, 282.4614, 94.1112, 116.1152, 110.1106], - 'PPM 1': [np.nan, 5.0, 5.0, 5.0, np.nan, 5.0, 5.0], - 'PPM 2': [np.nan, 10.0, 10.0, 10.0, np.nan, 10.0, 10.0], - 'PPM 3': [np.nan, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0], - 'PPM 4': [np.nan, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0], - 'PPM 5': [30, 30, 30, 30, 25, 25, 25], - 'PPM 6': [50, 50, 50, 50, 30, 30, 30], - 'Area 1': [np.nan, 403058.0, 126644.0, 467088.0, np.nan, 48330.0, 184752.0], - 'Area 2': [np.nan, 570479.0, 183307.0, 741971.0, np.nan, 206224.0, 729379.0], - 'Area 3': [np.nan, 694901.0, 241591.0, 953554.0, 17168.0, 620353.0, 1607583.0], - 'Area 4': [np.nan, 936570.0, 350170.0, 1408563.0, 21329.0, 885337.0, 2232039.0], - 'Area 5': [73458, 1474014, 475205, 2476003, 21557, 1096645, 2972508], - 'Area 6': [113812, 2605959, 824267, 4300414, 71706, 1394486, 3629582], - 'CAS': ['65-85-0', '57-10-3', '60-33-3', '112-79-8', '108-95-2', '123-76-2', '120-80-9'], - } ) } return calibrations +# fmt: on +# Project class testing +# @pytest.fixture +# def gcms() -> Project: + +# folder_path: plib.Path = plib.Path( +# plib.Path(__file__).parent.parent, "tests/data_for_testing/" +# ) +# Project.set_folder_path(folder_path) +# Project.set_auto_save_to_excel(False) +# return Project() + + +# fmt: off + + +# @pytest.fixture +# def checked_files_info(): +# files_info = pd.DataFrame( +# index=pd.Index(['A_1', 'A_2', 'Ader_1', 'Ader_2', 'B_1', 'B_2'], name='filename'), +# data={ +# 'samplename': ['A', 'A', 'Ader', 'Ader', 'B', 'B'], +# 'derivatized': [False, False, True, True, False, False], +# 'dilution_factor': [25, 25, 125, 125, 1, 1], +# 'total_sample_conc_in_vial_mg_L': [560.0000000000001, 560.0000000000001, 112.0, 112.0, 2800.0, 2800.0], +# 'sample_yield_on_feedstock_basis_fr': [0.45, 0.46, 0.47, 0.48, 0.49, 0.5], +# 'calibration_file': ['calibration', 'calibration', 'deriv_calibration', 'deriv_calibration', 'calibration', 'calibration'], +# } +# ) +# return files_info + +# @pytest.fixture +# def checked_created_files_info(): +# created_files_info = pd.DataFrame( +# index=pd.Index(['A_1', 'A_2', 'Ader_1', 'Ader_2', 'B_1', 'B_2'], name='filename'), +# data={ +# 'samplename': ['A', 'A', 'Ader', 'Ader', 'B', 'B'], +# 'replicate_number': ['1', '2', '1', '2', '1', '2'], +# 'derivatized': [False, False, False, False, False, False], +# 'calibration_file': [False, False, False, False, False, False], +# 'dilution_factor': [1, 1, 1, 1, 1, 1], +# 'total_sample_conc_in_vial_mg_L': [1, 1, 1, 1, 1, 1], +# 'sample_yield_on_feedstock_basis_fr': [1, 1, 1, 1, 1, 1], +# } +# ) +# return created_files_info + + +# @pytest.fixture +# def checked_files(): +# files = { +# 'A_1': pd.DataFrame( +# index=pd.Index(['unidentified', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A_1'), +# data={ +# 'iupac_name': ['n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.'], +# 'retention_time': [6.025, 36.163, 40.052, 40.492, 43.847, 43.986], +# 'area': [23386, 44389, 15068, 1878180, 1456119, 6379752], +# 'height': [24797, 15019, 5705, 493759, 339605, 1147599], +# 'area_if_undiluted': [584650, 1109725, 376700, 46954500, 36402975, 159493800], +# }), +# 'A_2': pd.DataFrame( +# index=pd.Index(['unidentified', 'n-decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A_2'), +# data={ +# 'iupac_name': ['n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.'], +# 'retention_time': [6.025, 26.284, 36.158, 40.041, 40.494, 43.847, 43.988], +# 'area': [25493, 10952, 50650, 21294, 1656756, 1371069, 6394708], +# 'height': [25716, 4259, 14520, 6739, 461942, 324690, 1138647], +# 'area_if_undiluted': [637325, 273800, 1266250, 532350, 41418900, 34276725, 159867700], +# }), +# 'Ader_1': pd.DataFrame( +# index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader_1'), +# data={ +# 'iupac_name': ['n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.'], +# 'retention_time': [6.027, 38.123, 41.729, 42.157, 45.253, 45.369], +# 'area': [16741, 49508, 27798, 1415205, 519476, 1724814], +# 'height': [13451, 18415, 9132, 484890, 180850, 501749], +# 'area_if_undiluted': [2092625, 6188500, 3474750, 176900625, 64934500, 215601750], +# }), +# 'Ader_2': pd.DataFrame( +# index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader_2'), +# data={ +# 'iupac_name': ['n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.'], +# 'retention_time': [6.027, 38.125, 41.744, 42.161, 45.258, 45.37], +# 'area': [14698, 53613, 25213, 1402990, 605137, 1956560], +# 'height': [12802, 18373, 8775, 496504, 202599, 594688], +# 'area_if_undiluted': [1837250, 6701625, 3151625, 175373750, 75642125, 244570000], +# }), +# 'B_1': pd.DataFrame( +# index=pd.Index(['2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', '1-hexene, 4,5-dimethyl-', 'phenol'], name='B_1'), +# data={ +# 'iupac_name': ['n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.'], +# 'retention_time': [8.527, 10.507, 11.071, 11.486, 12.214, 13.687], +# 'area': [147566, 69223, 40376, 441077, 19522, 200947], +# 'height': [39393, 18515, 12132, 112797, 7194, 64421], +# 'area_if_undiluted': [147566, 69223, 40376, 441077, 19522, 200947], +# }), +# 'B_2': pd.DataFrame( +# index=pd.Index(['2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', 'phenol'], name='B_2'), +# data={ +# 'iupac_name': ['n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.'], +# 'retention_time': [8.502, 10.474, 11.027, 11.456, 13.661], +# 'area': [181021, 64531, 35791, 472362, 228750], +# 'height': [44551, 19823, 12737, 120142, 75153], +# 'area_if_undiluted': [181021, 64531, 35791, 472362, 228750], +# }) +# } +# return files + +# @pytest.fixture +# def checked_is_files_deriv(): +# is_files_deriv = { +# 'A_1': False, 'A_2': False, 'Ader_1': True, +# 'Ader_2': True, 'B_1': False, 'B_2': False +# } +# return is_files_deriv + +# @pytest.fixture +# def checked_load_class_code_fractions(): +# class_code_fractions = pd.DataFrame( +# index=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79], +# data={ +# 'classes': ['ester', 'ester_1', 'ester_2', 'ester_3', 'ester_4', 'ester_5', 'ester_6', 'carboxyl', 'ketone', 'ketone_1', 'ketone_2', 'ketone_3', 'ketone_4', 'ketone_5', 'ketone_6', 'ketone_7', 'ketone_8', 'ketone_9', 'ketone_10', 'ketone_11', 'ketone_12', 'ketone_13', 'ketone_14', 'ketone_15', 'ketone_16', 'ketone_17', 'ketone_18', 'ketone_19', 'ketone_20', 'ketone_21', 'ketone_22', 'ketone_23', 'ketone_24', 'ketone_25', 'ketone_26', 'ketone_27', 'aldehyde', 'ether', 'ether_1', 'ether_2', 'ether_3', 'ether_4', 'ether_5', 'ether_6', 'ether_7', 'ether_8', 'ether_9', 'ether_10', 'ether_11', 'ether_12', 'ether_13', 'ether_14', 'ether_15', 'ether_16', 'ether_17', 'ether_18', 'ether_19', 'ether_20', 'ether_21', 'ether_22', 'ether_23', 'ether_24', 'ether_25', 'ether_26', 'ether_27', 'alcohol', 'C-aliph', 'C-aliph_1', 'C-aliph_2', 'C-aliph_3', 'C-arom', 'C-arom_1', 'C-arom_2', 'N-aliph', 'N-aliph_1', 'N-aliph_3', 'N-arom', 'N-arom_2', 'O-arom', 'O-aliph'], +# 'codes': ['[CH0](=O)O[CH3]', '[CH0](=O)O[CH2]', '[CH0](=O)O[CH1]', '[CH0](=O)O[C]', '[CH0](=O)O[cH2]', '[CH0](=O)O[cH1]', '[CH0](=O)O[c]', '[CH0](=O)O', '[CH3]C(=O)[CH3]', '[CH3]C(=O)[CH2]', '[CH3]C(=O)[CH]', '[CH3]C(=O)[C]', '[CH3]C(=O)[cH2]', '[CH3]C(=O)[cH]', '[CH3]C(=O)[c]', '[CH2]C(=O)[CH2]', '[CH2]C(=O)[CH]', '[CH2]C(=O)[C]', '[CH2]C(=O)[cH2]', '[CH2]C(=O)[cH]', '[CH2]C(=O)[c]', '[CH]C(=O)[CH]', '[CH]C(=O)[C]', '[CH]C(=O)[cH2]', '[CH]C(=O)[cH]', '[CH]C(=O)[c]', '[C]C(=O)[C]', '[C]C(=O)[cH2]', '[C]C(=O)[cH]', '[C]C(=O)[c]', '[cH2]C(=O)[cH2]', '[cH2]C(=O)[cH]', '[cH2]C(=O)[c]', '[cH]C(=O)[cH]', '[cH]C(=O)[c]', '[c]C(=O)[c]', '[CH]=O', '[CH3]O[CH3]', '[CH3]O[CH2]', '[CH3]O[CH]', '[CH3]O[C]', '[CH3]O[cH2]', '[CH3]O[cH]', '[CH3]O[c]', '[CH2]O[CH2]', '[CH2]O[CH]', '[CH2]O[C]', '[CH2]O[cH2]', '[CH2]O[cH]', '[CH2]O[c]', '[CH]O[CH]', '[CH]O[C]', '[CH]O[cH2]', '[CH]O[cH]', '[CH]O[c]', '[C]O[C]', '[C]O[cH2]', '[C]O[cH]', '[C]O[c]', '[cH2]O[cH2]', '[cH2]O[cH]', '[cH2]O[c]', '[cH]O[cH]', '[cH]O[c]', '[c]O[c]', '[OH1]', '[CH3]', '[CH2]', '[CH1]', '[C]', '[cH2]', '[cH1]', '[c]', '[NH2]', '[NH1]', '[NH0]', '[nH1]', '[n]', '[o]', '[O]'], +# 'mfs': [59.044, 58.035999999999994, 57.028, 56.019999999999996, 58.035999999999994, 57.028, 56.019999999999996, 45.017, 58.080000000000005, 57.072, 56.06400000000001, 55.056000000000004, 57.072, 56.06400000000001, 55.056000000000004, 56.06400000000001, 55.056000000000004, 57.072, 56.06400000000001, 55.056000000000004, 54.048, 54.048, 53.040000000000006, 55.056000000000004, 54.048, 53.040000000000006, 52.032000000000004, 54.048, 53.040000000000006, 52.032000000000004, 56.06400000000001, 55.056000000000004, 54.048, 54.048, 53.040000000000006, 52.032000000000004, 29.017999999999997, 46.069, 45.061, 44.053, 43.045, 45.061, 44.053, 43.045, 44.053, 43.045, 45.061, 44.053, 43.045, 42.037, 42.037, 41.029, 43.045, 42.037, 41.029, 40.021, 42.037, 41.029, 40.021, 44.053, 43.045, 42.037, 42.037, 41.029, 40.021, 17.007, 15.035, 14.027, 13.018999999999998, 12.011, 14.027, 13.018999999999998, 12.011, 16.023, 15.015, 14.007, 15.015, 14.007, 15.999, 15.999], +# } +# ) +# return class_code_fractions + +# @pytest.fixture +# def checked_load_calibrations(): +# calibrations = { +# 'calibration': pd.DataFrame( +# index=pd.Index(['phenol', '2-methylcyclopent-2-en-1-one', '2,4,5-trichlorophenol', 'tetradecanoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], name='comp_name'), +# data={ +# 'iupac_name': ['n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.'], +# 'MW': [94.11, 96.1271, 197.4, 228.3709, 256.4241, 280.4455, 282.4614], +# 'PPM 1': [5.0, 10.0, 5.0, 10.0, 10.0, np.nan, 10.0], +# 'PPM 2': [10, 20, 10, 20, 20, 20, 20], +# 'PPM 3': [20, 30, 20, 35, 35, 35, 35], +# 'PPM 4': [50.0, 50.0, 50.0, 50.0, 50.0, np.nan, 50.0], +# 'PPM 5': [np.nan, np.nan, np.nan, 100.0, 100.0, 100.0, 100.0], +# 'PPM 6': [np.nan, np.nan, np.nan, 300.0, 300.0, 300.0, 300.0], +# 'Area 1': [135884.0, 175083.0, 155710.0, 70675.0, 51545.0, np.nan, 31509.0], +# 'Area 2': [304546, 759316, 343277, 203215, 130834, 22338, 133847], +# 'Area 3': [678618, 1070146, 805095, 500430, 361070, 63841, 551470], +# 'Area 4': [1866918.0, 1928385.0, 2302730.0, 469543.0, 430809.0, np.nan, 494928.0], +# 'Area 5': [np.nan, np.nan, np.nan, 2957268.0, 3164919.0, 741540.0, 5345977.0], +# 'Area 6': [np.nan, np.nan, np.nan, 11730886.0, 12451729.0, 3975200.0, 19779576.0], +# } +# ), +# 'deriv_calibration': pd.DataFrame( +# index=pd.Index(['benzoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '9-octadecenoic acid, (e)-', 'phenol', '4-oxopentanoic acid', 'benzene-1,2-diol'], name='comp_name'), +# data={ +# 'iupac_name': ['n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.', 'n.a.'], +# 'MW': [122.1213, 256.4241, 280.4455, 282.4614, 94.1112, 116.1152, 110.1106], +# 'PPM 1': [np.nan, 5.0, 5.0, 5.0, np.nan, 5.0, 5.0], +# 'PPM 2': [np.nan, 10.0, 10.0, 10.0, np.nan, 10.0, 10.0], +# 'PPM 3': [np.nan, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0], +# 'PPM 4': [np.nan, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0], +# 'PPM 5': [30, 30, 30, 30, 25, 25, 25], +# 'PPM 6': [50, 50, 50, 50, 30, 30, 30], +# 'Area 1': [np.nan, 403058.0, 126644.0, 467088.0, np.nan, 48330.0, 184752.0], +# 'Area 2': [np.nan, 570479.0, 183307.0, 741971.0, np.nan, 206224.0, 729379.0], +# 'Area 3': [np.nan, 694901.0, 241591.0, 953554.0, 17168.0, 620353.0, 1607583.0], +# 'Area 4': [np.nan, 936570.0, 350170.0, 1408563.0, 21329.0, 885337.0, 2232039.0], +# 'Area 5': [73458, 1474014, 475205, 2476003, 21557, 1096645, 2972508], +# 'Area 6': [113812, 2605959, 824267, 4300414, 71706, 1394486, 3629582], +# 'CAS': ['65-85-0', '57-10-3', '60-33-3', '112-79-8', '108-95-2', '123-76-2', '120-80-9'], +# } +# ) +# } +# return calibrations @pytest.fixture def checked_is_calibrations_deriv(): diff --git a/tests/minimal_set/S_1.txt b/tests/data_minimal_case/S_1.txt similarity index 100% rename from tests/minimal_set/S_1.txt rename to tests/data_minimal_case/S_1.txt diff --git a/tests/minimal_set/S_2.txt b/tests/data_minimal_case/S_2.txt similarity index 100% rename from tests/minimal_set/S_2.txt rename to tests/data_minimal_case/S_2.txt diff --git a/tests/minimal_set/T_1.txt b/tests/data_minimal_case/T_1.txt similarity index 100% rename from tests/minimal_set/T_1.txt rename to tests/data_minimal_case/T_1.txt diff --git a/tests/minimal_set/T_2.txt b/tests/data_minimal_case/T_2.txt similarity index 100% rename from tests/minimal_set/T_2.txt rename to tests/data_minimal_case/T_2.txt diff --git a/tests/minimal_set/T_3.txt b/tests/data_minimal_case/T_3.txt similarity index 100% rename from tests/minimal_set/T_3.txt rename to tests/data_minimal_case/T_3.txt diff --git a/tests/minimal_set/cal_minimal.xlsx b/tests/data_minimal_case/cal_minimal.xlsx similarity index 100% rename from tests/minimal_set/cal_minimal.xlsx rename to tests/data_minimal_case/cal_minimal.xlsx diff --git a/tests/minimal_set/classifications_codes_fractions.xlsx b/tests/data_minimal_case/classifications_codes_fractions.xlsx similarity index 50% rename from tests/minimal_set/classifications_codes_fractions.xlsx rename to tests/data_minimal_case/classifications_codes_fractions.xlsx index 5f69cee2d5ac047d8db884a6ee183bf013179d74..f446d7e2e3fb65b9ef0779a0938e613229c44983 100644 GIT binary patch delta 4815 zcmYjVWmweR(_T^u>5y2ETo4uzkXR6;dtoVwC6$np4vC+{f*`$wz|y6FE0U6u(%p?n zNjC}zFVFMg{Xg^Je3*M?uDQ}Ac6y8@t&m*?_lU#IWvrTNZR*6ssTeAzp=j1P zyDSFx!@lYHo?cis+Y5hv3!sAA9D#Q60kn-BY(Zh8&ZC#AlkvlQOTv9)XfS%wPm zqg(03ePgJ#tG+FutV&Dk2z-E1gB|56X)1CVQ`}Md5C*70=X)22Rcq$F@T18^wvHnj z(l5xP5SzSBrpYbv+ZT@~mYYj_iErb6U66yR7Nx~!6SiL- z_of!(TV_>%haYgRbw}}gd08BBUYC>!WMI(70!JQ@K<&kxdm|Gm0|7y)w6EDfWlduS7p8QlS_Vrj60{}hmFh<9ed^`>JI zd|A7ZD7OnjEyIL>QTC!C2e_BbX~iOenw$Zxd`%fvidVgOChKFA`Xtff-tP%HQbi5N zaZlEJ!f=Nv*)TrlZ-A@9y^I}CA$N>>Nw3?^)) zxG3LibsOpv7i_+8p&D%K>1V6kmI_MYf-|DBJXrp)kVR&rWUg)Z952>6>>N+OOI22h zX*WVU8RvnnEc)q5Z=@9G3GzesX%4Vbh%Kt2$64gLe0OL-07pL%mgjl&bAJ3f>YVk? zdOzuO&kqsOd>2>ZzsnQD1+5KMHKaI2XH75X@fQhgS*)E*g~jZ;w-ZTUGS|{p39OF_ zk6B8vO=sRGsTmPhCl{f>g!;dvD4fpt8k>!tKkNbUrCu^#z~k4w`?s8vt!P@Cr}=~U;-G~x|`B9tbN-%fX{ z*L&P4RE#|FGj(hir=1KWW*6x0S!P&y40f(RWrA>_Mhxi(6@<4vnxV2*L_b&L>p$HH zXi_}FyE7Bi!uK%1R9!B>3iS+_K8Y!xA((VYRXZ0R+7tvNxbKI0AAD~Ay(4Z0^@z0i z`YGx@kMISXDl0vkO{vY)(zu-|GYqDaeL7b2@&lQ`78GmJoL$5+qpUEI5A#VYkQAg}9I>_Rt8YHrMv9~tV{^N3^Yd?6g1k1XYrM|h(b+!u`-Z0;^ zuwXq}*xhbl*c}_v-Z_E1{u9}B-60gTl9J^Xp!3RCCko8^B~)eAMDxHi3mItq_xI&z z?Gvsi;JvA(EONn!jj8qvtF@~lkG=1ogG~Q0rHak%rd)4{nTgMbHOvXFU8>G$(c|$J z%T6>4sVo6kD~O!w5#-4-{H+k|vV0i)7oy_RUPKaE_iiIXfN>t-~xrqx+d8q^+g+2EPB$2l z1P*|L@l>RP1nnZ?ZI8YxtadY-zoECwO{J11vw9;sPgJb}q5*yIQ=*jfaX4~!LKhbNVi1PO zETl_v!x$zqDqsn#Q{a|q=L`xY{St$GL;wj_E|~)z#;hrZ)%|1^_tapNDHn6@X8BJQ=qBvK`B{QN*oVArbDY7NCOAi}2 z3_95aTJj>tZOFP(lnTO0H%9Bpx?Rwym(q6W(KbywcajS=(rjxKNXC2zMNJ=*1E{D% z>P`Ws_Aig9?MeM!y5)NOfNJW6IKIqX&()cbM)rz&_yoMNN~c1COZ<5Bod$KB^8M_s z7zJTMnOj%xex)htg*qZqT2P&zO4LOPF4RYFCI~+EDZA~k>5@nvF#eYRy9(96El8iQ z;7WazLzhu#B;q4^kFJcjwV%BPs6$CFBRQ)FUNiC|^+M8==4nvUmVyc6Vv&semj5`M zd`KjJxGOj?!R{4Ra>K>2?Ns?!JN1Ph5Ka?VBDyNxRwtSS=P*op6nphR^L>7&f0DwO z*j^=f7P^Y~D^<5>QAB8fM`Pk^w~(H^3;hchp(q!1SFloqC&wyKu)d;OpVn}Chh!mX zc_a@uyKb%XTq4-Hmna6-A`z2YobTTi2MB@}OsBbBYY=nI62Wckq4JQ-R~FK_k)9w- z?G(9C9?D7GRZEvyKGznXNjFtey1Jm3%@n(o!E=yLV5@$x`S9loyZ%Obs_K!jX|arc z0gLj{lwh(VNk5RZ40GLDLFs_S9`X35b?V_TYeWw1gU#&6ZqeJ0tmC$ebuzo!LRQH& z;bSojWI|62!w}0cT?VLC<(`)~@4In|o|Q(mFnvx*juf{WZ^=x-!VMOVImVyd6CLCBXX^5Zb36uNjd zTmyD&f$rH|M^5o0d$u&rSz&}3CvAm_N&cn{OIM$h=dT&<51fevj_N{v20K_a$p_ek zd3!)69g9ce9yx8UA%9C$PVrs?l4;J7w8&`-wEI(EjyeR^WSBd0lGGeKf15HF-`M~2 zqd2uxdR(l*8NI{u3#$yO{&fzzR$a1gv468SIB|rx7E_b(+{t>J-5;Au6ys01>5G30 z!?^i#R%?~mlB_3L+EPAJ>1I$#7zf0v(xFNJP^N&Hx73!m$Jx8fDHlW4FU`JoSCS(h zJ!{+AL)U%F)x6iwmAide1GGnCq)i?`NTLu^G=MY;dhcScx~F3;dw4;FU~fh+X`;`CCeazMR{%ya<1a15yTDptdL+RGVO~ zuk7F}V8m;$q{Tex8yBmZ;oIFe9LGXt2p0tkrWp+31Woy9BH7%WeQ1krNGWvwS6E5! zQ(e1hM(Q{?$9&5L4ZS&H4;ue3HXpV%j$r5!3CP?Es}+CWu>Ci6QJ$OY>qVPku1@Cb zf!NhJO$Cye6AYkSE?z@jQZlagw_CJ|LRF9mzYiAwh2_B?j^p7Fa8nDJlawm$kA!iH z={%*++s}SB-2Ajw-Pqolm&eGQ(|zS>rKLJNi`6l>2^;?f|9<+pv8-47xu8>H6AGg4 zY6eN-JH(}$w5RbOm>BY5>?4~n``R;N`ZX7SN)9omJa1sr8cyCBw47r^kwGLKIY4I3`y_Xg4W0(bGdlG-P#EE)4E>ZI)va@buvzAIm zFZoQp9JZ=i7b2P=+l`W0TT|6H$6e)Da8<5ba$M#7X)1aZ=5K1-u7>9KLDb^!91; zek_7`#ZZ))FJoekDXa?o^hCM74$N8%%tE4@S?&8QQ6gyVkcwpxjEhs<>qTz1>3{26A3emy;ccX0?hQfZ5M#N(eH#XFx7 zuM4K08RyWd^1(!l9?E=Qix&cZ1r*G@=1+SqE&Q8W(-Hg5IbTSY?Mb{oUZ&P1!CP+#<1E|N#zYV~cBC75 zF@91}^^~E;pI`jJ!sMeWsnZx@{VMBbxo)^f6|3rTepkeCLU<`_aoO!Jt(+tiq9u}} zbN<|UhRGp<5934ZacyT8*q+)DhV&s_(?|q7aMB#r8U^@D5QK@pk`lP-7m}3&x9Nq6 zjmbq!!4+Gzl?X2r6H%a@vdoA9VHdNBvxd7_OKjJMZ&^$1s2D5k^Uc^H8b|ybdA1rg zy7-If>wx{B-UK*>m#|dZgImOuI(#CqyQK`qtUU3f|18E*XTi@Fn*X%!xZhgY&wWEK zlX-CU-R$c12g@TR8~2U000OjNZ{l(Ls>WNY>nAT{ciw70s7R)Ebhy*r#T;<3w{rAd zX!you`UedMi|KJ^!;Wr0CD2jA#k|hd46m=loAe zC%_<(ualsMr=ODz`jDTN`Twsu3eauE{~DAHz0Cg|zE(`%M delta 4704 zcmY+IbyU<{*ZzlY#G3(zuA!v6ySp1?5b2O^_)3>b3|#_4Bi*3DAT^{2=+LDgjUb5R z>;3%J`n}J4{=D}7taaAj*E(xmC)Q@vy6Gz}!3(=~GD|EFND01yO9hyEzCcKj-A8`D z!y5{6D*$O4%u&QPQPo(gdN!`!m~?M|>tL-TB1vYy*Iy7buRhhQ@}Oc9E2fq&&T*dn zUS(@))uG2?mKi;DDC>TxZ{Gh9*qyciU2~j@ z2&5w1N>qP9R?5z8p$ZfbSqfw1zc!&*Xfm0puAxAL7zt1h!a%PLh*Bhv~< zW-M4*JDw^1W=hU7iMi259ylq~wfJp3seQV%gnDj@HAK$ey%to`Vd}VfW~rh2RtrB& zJ>0!&gKFR{46?EbH7KGp{i%ae^ZJFOfLPmuUlFLqzSl+`ivW1L%pn@h=3Z{WzqD`| ze?5_ShVuf3{UAhx;TB*+z#PJ;a-Q1PeYx;GKIgEnszLSOny6l|S z2a9puFb)Gx`p);*^|UbYrrCw`X?Gl=>If6JO6&y>zBt@kdYcDRs{r)55@u38?PdM< zWW^Iu91|VHl@j<|<)j;6-(GtNpI0zV<)R?Ai?LdhzS8m3<(}io_48h2$qkr?ALqkUxu8bXW;Bc`1P^64+7>8x} z)ynd;Pp6^OqVpGpmU^_H0iV3c?|;UX#~jFT=AemlC<$OBDYcnuG+ml`=xrq&G>R@z zaiziW)>ZV2&nvIzBP|yg6JEx`Evz>47k9xYwJz=t`AJ`uE^eoSPP&Eq$Q-^dMC($% zIS3*5DmQN@_Pqisk1n>3e=^H{m9QGt^zV!C_anax=)97mi@Fys z#djmMl{H}?dHYZYz)#pHTEhrcFbOB4DyI|)K_E|UIEt7BSP~$MKjFTh=UFG`VBRFd zs0%c&=DsM~|G6YV-jujKFP31s5VYP*4r43lwFS$(*?zcj-a;K7SG%>~IaGM68~6Q6F!aWL~x@S)wK zqzN~2MRa`xtXrKrLi6j&<`n0(eJzvZ)xYq z|EF^UEWT_jO=i`eXo0fosRC7~ByIVux_mSj{Su)g#5Xp(k+lT`w-6kd>uC`p>Kklg zpMUutZhN1pOHJkcIp9v*KI0WNS{rytWpdq-nQ=P_Xz@2~(%K`0C;Rz~51&g`0bBRt z1G!hseZ*DHz}_=6Eg5?|3PFRZy@c|vx6n*pr_!iMEeSN0-`3SW?K~4*iKbH0+>vwj zitM~-@@VE&yshhpi*fZ8azp|N;R!Mh9`+*;pxX?*kw#xjhPJfsP5j}Ca&$T4kIyvt zX?q0p?SJG*J9zz3r-k$5R>W`c_c<(!D9@A~!{WEeN6&UVV;j}{?Cp7A+*vD-)Z#R_9Xj5c7O z)+VGnlS$7Wuo#~0rS_O8GwOwT~0PUtgAol zdUt(cZnWLHvAAC@JfyOJHU45hA>~nOCvf|F@ANkK>g;Gc6kRToDDM*i{I*aA8k_m$ zqvU?;JY3(&t*?gn_xFa}{sEGT?p4y?q7U?6&2f1BpsvEZh{vly?>)u~R%sZ<*55@f z>@KACKlD`a@BKXWxx3gq4G+GKv{-K}TqmB6IIUYZyN}T>I11DT0t)OQ^0B=IGZ!}+ z%Tr`nT%+<+eFEz1w_AoKXq3&b4ZTDGshg%x^miPM$0Ji@|M*eqKLTt;x)A7{H+^R5 zPnCuyYeRaTAFj;WE)w?%1TD`UXCUc^#e$BO!}6X=^GT_v4u3eAQwqy`XpG6>is!c;E7x0*5EL4FEA1rwpb+HE=Wg}lk0Lx(b3rPi>!5qB>OHexS}mb#q!B4qKvCn z-adtTn0gAOZ<60R4NRbz%kyN8$RvdloHvFoe^lV=D?^w_fFLDa4d-$`+K&V?E|is% zbj27DXXad*6|s;Z{ZdM(h>^5oP297>0SG9o$RK6>Gd110n57#A^{6FVjo>#oL+w4{ z49UIIeV+D`SKDnF^AKOr>UMwJiT6_wE7*w7Q%ws_f&dc-i5RCfCt_ZSZHX4BX^LNf ziTfUL*s`?Iq&Gc*M3vl$)ro-Md~9D-cMv>u8Fu*e&IIa zU8$+zd7-XZ>_R9C=pGSB$a=jD=U zcwoT4iboKU19SWh=X$nAgBRi2K8iifY5dg$4r3wi=`dINdY0uqm0PeWnk> zNyZ)d4q z-r~pKIa4moic!VTe5Cd;)_0l507iU>W$9O?BX7fwf2<9PMYXuh&@@D zP4uBFzd5@Etrz36nKSmM0;b4mvYR!W_tp9EUxl+rIM}NL-lDF?`iDLTLe!x$n~~(U z2W}Z9j4rz=cWz1321vYUVM75X;|xFH-{hoWE#5jLP7FTGSuLZ8u#B7#8MEl-jz{OO zXtOEPG%-zC8p@3>@!g?8XD-Zn&?O;5TPQ_~x)tJwBSY1MhyUK`z z%W3Z?x0}Vu@$ysOm|i-&Z9KOxpqboZr9>K$j*0yT--Tib?XTrzV)>GyB78-yC8}^c zeN4prV+(!4pzZp#PDDHNFsH|Y>SNUEe|^@L&zt$LW6J5c>YTVx5rLx~rG+$;GA!Tm zB_&zY;7iU(6y?}oua<>6sEq|PhU?E3$yhOQ5G5Oa1y{ThAN&msvNF--mm?T6q@rkb zS%=xGx>dmiDJXz!>Njg$kSl3n=O%XRroWqJ<#=};w@>D#@j=3y-&t>q-Y_&EbFEC~ z&da{zlQxN$*?a@XBJLVrIhy+&9Z{${2YIBLq8-nGQA`2v%3f^kxPnn*lPDWQky=;! z1=sZZG8>+>|I}7u_va0%QRY`tUkLbL2t3NU;U884*hRbk44Rc16opQ``Sx2!ndmiG zj}=Y3OkirdU*6|TV})O?fr)G(ef|zlBXUNj4W|AW9p#Bv!y=P$O=R8ngTztv!;L&| zyno;HH1E)!U-N(}iC0=;XHabIHAne$kWf8v;b2k8tA)wpM65}(mLJsfy};k*iO489yqV|}Fj3_?uvzV-T6Ec|Iof~!^&xNuDDCkVlCLou?`u$MwrTh% zB`=OFb7|CVJh5$z9&VR>=pOf15o?xuu@%W`%HJz-DDL9|BD&B1K0f&V1wu~CYbvD}*$Lsvq{N{}CDh(9cd`kFPqmz4* zca;!32)nBnn5a9iC=3k{P%BMpsIM9|CanlR_=u~vUQ8BygT&0384D8~U#U~(FfRMB zY?DehEw`#>u^NpT1V$Uc)C%}(I%T2MvaFE01I=2`A?CZZ=ZpbjXzuAla=)ovF1bnG z*;c1QLC&!JWY-V@W=ARZmKybZmlJ>K&qFtm965X7^B;}Y3k%_ktHF&&0`+lx9j^y( zueR{jHNQq-Ahk+qf7He_3% z90F6TgRIlzzVQ;z!R+a9Tch55;=UAVOO9(cUi-yefzfA;diJVgIPgpE8l5mn{&YZN zi1Q0=z%)Fbme-khw@f!gIysaTxAiINY}G#&?93q~On2_&*S>t8BS1F0kGnA;mr+=* zvO<_xa&(9GWuf+YLalfnW?ooAp!W%z!v)i6X_Z;QQ3mPA*ENZg)BSR?9lDlHvA|jU zT*|Q=r5c1VT#t`?r6e{mX_oLV{>a0mD4TK$@W$pgQ=7MB4OQp{#n%t!31Fb>2Wip~ zldCVXX&eToWOHP@%i4mjalM4|&rJgEgoa@kuG7z*+y|R__=N=OND~YLBw8a_Atph^ zmJ^%?$1<(ChQ%u(1{?{(f6BzVA5rM^RqGtfz#Cy_s+PWyx#0wcgEPvPDfB-n5@y)gJ1Mo3D}_+z^e>H?T-TBxOJQdWmH%}TaiO- z2UeD>B*elnoCvi_=`ok4(t95^7g$Gsz2R^y`NwyfZSmT~97zS?rp-b~uyBHgX_*(8 z)%m}(+msGSRbtoC;7INdlm+;N!F_Z?9AM5gBqN$lQ5;;_-P5zPlDdK&xA>y1=|P}=v~t$`)%9oIIT#ob@=ieF?sqTCV`Y*O(=1os>~rF>f%^<{^Z}%{{Zes+nWFY diff --git a/tests/data_minimal_case/files_info.xlsx b/tests/data_minimal_case/files_info.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..687f990a5c691c3a0fbed64a49bf1427d4ff9970 GIT binary patch literal 9100 zcmeHtg;yNe_H`q{-GaNjYl0J`aSKjxcW5AZ2Z!JqBv=S8!QG)DNFYFPmxMI#68QDx z&3rSHneQ)nuX^>Z)vIcss=jshIp^+E>OcfU0ss;K1polh0L%|^Y)#+*fCNMU03Uz? zZz$v9>}lofX{zn(YUN?f>Eq-`U4RJBoCko1o&Ue>zjy{JQa-BoapTBcD%{CzaVRZ) zkVFyK2M^(~YKV9DCl8ic80Om9J>*8+;z;Kc+6&dPW!I&YDoAY!iNu39a~6hl zaaRXhFv<){L?DYjYpS{{xWXf)pK&%P@u%(WSo};*H#rm$V`%-Bh(IRq)M@mzSj;tB z+;7r>ogU21d^0aB;8(cA6FRV#$5)ZsI)x}z_(?yMMxT`#3??|Rb@?0lzwBa;i#b!j zX%!^2HRUkOl?rerrALmc|+GrlAjr*k6Nu z6Z3<9+BST3aGdrni1!`|0C;#n0I2_smJK@GbZ0PMQ--k)9Y#x2cPmE^F3#V^|IzWk zn1g@$>lH~VYJJ=o;fD%$5s=H7ZE;=dygCwiJ0Vlb$pzhZt>1vewDwO<)t(U30h%$64lMv!zZZluy4{@Y9zb4huvK6tbG0$}urS&znbnHHr&FaGc;m@I( z4bi0K$GhU4kRM{mI}b9b=Rf|K{(;sO7{3BM zwot67Jr=ZTf%46h*T70l!+g^tPU7uHZ)MId=-$Z7cbYssOoE-RPBAA&1hFoR7jLmM z@#oW~LLV35WvfD;>RhCkuLiX(pG(^4~aTgf8eYWp`lV#txCv9;lxO4PK;bn40RPJa4yV@KO3# zC9UD8V!j>$_5HP*>Z!86+*Ei4$K}Z&14foJEOnILki;!iUtM4Uusl`gE&U_oRPs9gTGNjkVt#iU$EieDdhuJ*Yy z4d$>$N%Bl=-5c%lZ06`rI$@Fh;s+_Wq$~_Nh&`3>!<|Jq4ID3tZTs{Tc=!7?X39x! zyR_LlEh|<)5N2xMO7t#oPq`2@ZiAZ40$F}DbFq0dF1vYH><~s_sX*`?|ZiNs_%^Pz?!j;VY|ayKaZyt&8K5a+w+Z< z=Hm%p9v$onJAPD5<$7mTJkBi`#!ck;-X-P0I*$k8vRI1WtHWpOie#LeYN(1YC%hAX zl#h#e^V$*DvRHOUCkcwNag!`08``>@@|DQoqo+=j>SzWZ%8D|^;570z!z&h5o(@9~LZ1QyXh)^7#hgla)6!Szi|)NfdPQM{WRJPi( z4>8>q<`b6_b3wY;HY=hN+N_MNc#o>$$ro@nYMo6Bs8*X|FhrC#t9mQH+D36|=SQ3m zVV5T-AkV0mQBZr9$-bTqSoe+QPqEmJ&IQ$Iq7T=Gcex*ynrIr$)33Y)uBttW(~qbM zl^!>XY_$$`Mig;h_{JtXMdVOoDp2v9?sc+=aNPF#99QJpQ)*C!)s1;RWWC5|!CQL6 z7(L+aD-`@)_Vu<P}w?JY!*|)Y__i zJ}V(X^0f4^hy3QU(1K$=X#T4eKxsYnz3z97FBXdWTCh0gO^m~UnvymK>8hcb6wZ7JX0Gpy- z%z5odi#>-8v9hzYOw|WN6msx8srJba+!F=dIg1$3I*dqsiyaYq8POhse$+W4vqLxXxYBD{;-`cC9E#GUoL|A#E!ALQ z$=CP_QxGZEgy}DN1f}KmXv|J7reyZLv0d#v=%H6G#6E(=0{&+|=ISjB%NBRN-NoMV zs)Scxh-MUS(x?Plu7HLvSBt4y??rQ`pry>bAGYmEMGbGv9p1AMJ9FC{S{3(qXcycv zvA#|4i=GCEs29PZFpP`W=JyXUHKzESM`hzJFhz&q4DQl;;nU2_YL+HF1q#qvclo5p zlVye%UnEi_w=%ALTFa3l10}}DK<9uvdW z>sSh>HuC@-PeKr^F6Nnm+m5c@XD}Ez8P`lngm2&jzK&ubhBSgX8<$iwvhinGQ2^B!(t_b@9;U~QM1 z#&UdxeFbCc!^6)h{=lN3tah%#t~4>qrVCM2VXM{Q{lq7vqscs?JQ3`nbb{`fijhi? zbOKa5@#qvKq>H&tm`F` z-*k$fp~2JE%E^lB=lC-!K=sF*$%Jq^AAc3Y@l#`;PckP-L;$932)dciDMgc)8r!Gi zdbd!sh_w<9*Gh&I*i8aMR$ekMASnc!Oucu@Do_b&myXy{GBD6~WJ0=mOyS6Jxp_C? z8s7SHUuxtN4Q|p&1)(x@sliN=9&zsdumXzVT(~2qQ$&4Dn9YtMfynUh$XRgQU zGl|B!gJ0ojV%$SEz*w9H<=Fq8sa9J(hx1w&Qw`#U29vBDavE)S>Bd%y8p?&MUOI59-i|m`9N&{4g-=u6|D1 zP^p9LB1vRw+Wlq0Fh{kohb&=0i0G(;ZS2|D0GWEPt)PLVL&U|)N9QD#komaBomfZb zN^oxrsY$IT-|1uc7N-puL?3!gA_ODxR@jD5(=*ACOA~mzisBGBAo;vnn<8r?7ztw- zLmyWEWYhr0_P`+Fd4IWgeOa9=AqV@41YSX}*42jMDJdlGCa2s557yW@UHq^2ZGr-S z@Z4;E6XK!@S|9qzd;f(>{CazL;)f(8uSJ%t)Bj|#j)|-D=Irv2(r?WW9(&|STqR1j z34*(@%ktG4V^c{LX~SRa2DNw`D;wa1{jExge614ITHGkBd9aNw&*v)m%g`l-J~keY z?sVVvvn0`O<8l2g^lCU~TN4KK%OQQJ4dA zFrMBSeSa<};+dF$li?eBh`*C@EK13ieWP&UHZp?L6{WL*fAWI1m$!>OBXihWDAELlPpkZo zj1R0M1Cb=w9822{H8+}X&!k-=2ICY?0h2C+1tklrKUC~4TFpn9wKu&7d|rw=^-OL7 zqjK;Wd`J#V^?aI+g%{~?zQ-U>w6)$=9Uwhix3;mTPZ&7uJ|cRvTCkSa-~9lVWIBWQa+ zL#I+3IJQ%!D^QTBxmzNrMdIIFLdrf~fohBbUU^ip6pU;95vvZ>ntlq@ikSvP1egpr*?h{=UtO2 zUq4z3eO*=6wERNdPZia(2m~ALgcxQOVbyk6Ch4umeeB_F`Rb@-7w;MCP`|VYW!=|Q z^s0n4+=6>mDIj6mi+YTL{G;Wxh3^5LjksX^8IyS-Rt@9EM#L zq;)_byMZMQVo439Ydu`I1X+38?m=^^SGQgzU~$}3S1~h~-V|KU@8onk!Z9v4mC99v zF!H z&C(WHtY>Eca8bv?%khK3EnU5}40yKtIsyOF?9{6^$|tYmv7SYC5YzFasKqnM2ftW{Q{*ki2>C zmv(7iA=Ns&-Y-$duot zIO#9J$62LxF4S{?_S3m>&?}AwR}F$AWgckjNZGj z6@?HFWtcXMNIW_232;3VG&+vYOuDdtfHOMoNihqe58i4T7cdlhfGuPGS+S1zP+8o< zj)0H>0L=fSB@a(uM=Ot?0)&o}OBOfIy|~>&&xyA^H4Tt9&;AXq&R((Neo=r|H@)r; zaBOm->g}Nzf+A$0qrns$(CK%#^mG^KKd%r18l5&OM?`)s{!DqhbIEpR36um|}5Ad|5zDv3$RaSBwrfxm^oddlJJEw~cQ{h#J>P}#uF%5iW z>{TVH4%;U4$ZEs>on74h&m4(0d@ zzXn!1zIN}>5$vJC@nXo3VEv(Db$~)}&@t5NNH8Wgq*jbsuzQ)0!mbumE97#;f{!2wG=qUEvp-&d-fu3P*MU zyP|v<&0=~MhxR_;BRRu9{JS64$7YS~!_qE$T*WIs{j@Gb9BC%}ohIz*^P|{5zEi!T zxemvjANRyzA)au6do_JFe8IBv))AG`QS_#AP|^<)K%{zC&V1D2xP`ppBmI8GlQ+7Z z9j$lUvb{O#gKhWIm)p$eia4G!^NI00X+XwiQClx{TiZv8FK#2~AyTOBH+{-M|H_gp zjRP}!*z!IAc9h_Eme`uPTUly(y4yM1{3MHGlGJZID@Sp3fvHfi)C{l<@l}YYd?0 z{$S0oE{r}ZzK#x>D5}&TxEwVoMXlK9yn$D@#A**?#war1t3o0xZBLk*5NO6 z241p1i3=N4$s1Iu8C2=sCs-N}-fZ7je#eQY70PYey%fa@3VFgTda#N0Kxh_msP*r$ z$5OimpaGjzYuK#f!UPse7YlWF7grB13m12*-y3)T(^X*Z7Mf%V?BgaFSbfwbGwhdj zpV=DLh>-eJgjiU^AANY4$Z;w`wk^=_GpL*bP1?l$u%#>8L$UM82&# z>iwiB_td?}CvQe27Nhcj;^bwPth#)WvUuIt-eS7~#_vu(52%5L5>GERWb#{$c_gjI zNz9N+%2npKzwFnJ92d=T)D+BC{;2YXzX~v{D)mrDCVK&;cE2-R%MS<-!FY011xZ+6 z@*Sh3Y+}T@Ht#cItv(nO!WoU|%jVy5-*^TuGH|%`MjFxwJ_v9W?q41Gwm1>%m47Tg9w*%}+(Wj}>etf%)zz-0 zl5-J$=IHKZk~hp@6LFsDCB=U3Si!c5n{r24BF1fO>#>PvKQmSF2*_jgRb05yBC&%s z6ze@srQ$fEd4;MVf51LjQIVH}Llk?H|D3_USNPAiA?{=O(N Klo!;${rW#iPsDrx literal 0 HcmV?d00001 diff --git a/tests/data_name_to_properties/checked_compounds_properties_correct.xlsx b/tests/data_name_to_properties/checked_compounds_properties.xlsx similarity index 100% rename from tests/data_name_to_properties/checked_compounds_properties_correct.xlsx rename to tests/data_name_to_properties/checked_compounds_properties.xlsx diff --git a/tests/example.py b/tests/example.py new file mode 100644 index 0000000..4c44779 --- /dev/null +++ b/tests/example.py @@ -0,0 +1,37 @@ +import pathlib as plib +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib.transforms import blended_transform_factory +import seaborn as sns +import ele +import pubchempy as pcp +from gcms_data_analysis.fragmenter import Fragmenter + +from gcms_data_analysis import name_to_properties, Project + + +folder_path = plib.Path( + r"C:\Users\mp933\OneDrive - Cornell University\Python\gcms_data_analysis\tests\data_minimal_case" +) + +Project.set_folder_path(folder_path) +Project.set_auto_save_to_excel(False) +gcms = Project() +# %% +to_check = gcms.create_files_info() + + +# %% +def test_load_files_info(gcms, checked_files_info): + to_check = gcms.load_files_info() + assert_frame_equal( + to_check, checked_files_info, check_exact=False, atol=1e-5, rtol=1e-5 + ) + + +# %% +to_check = gcms.create_files_info() +to_check = gcms.create_list_of_all_compounds() + +# %% diff --git a/tests/minimal_set/files_info.xlsx b/tests/minimal_set/files_info.xlsx deleted file mode 100644 index c489ddcf264cae19dd98093e95dc5cb1200c7e70..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9014 zcmeHN1y>wt(;Zv`gS!NG3vNM^VUVD~-Cc&@9xQlp4*`NZ!QCAKgamg8u3_--&Dz=B z?0&!C-JUbuXU@~N`b>4*s_I*0sw%=0F$FED+4$HAPx}#zyqMd>q*w+Vz_qV8GMg|UcY8a3S zaP|!x(034##JlKdALrncNpO=G8b(`W08*VBdQ^F3)+E7Yq*erk;sG03^8+t&R{EPT zOLU5a1{b>eypX&Y@*uaa+4z8 z4q^7fwN&$yDAJh4U9j*@Uig+J< z+R}Y}beg>D&+`Kr0C;#n04V>BmUS9jwBKQ|rT}9d28@=5F6MR+PL5yp|IzV(n1g@$ z>t*qZFM7BzLytjsVS|@5OL0Icd3Q-r%VTxF0J%l%y68MQvZW42a-cftYlP?iZGLyd z3rnKWdxMXm>%3)=xcDNp_3q`NDL2lqP*@mTlAk%3t@Yx#&0Net)1HGpncUi9SxOs= z-pdcI&_0_!m8k|!aA=aE;e8+vB@{~w)ah5$UN*U^fSZw0J1h^aY7ofTOPI*;|C(H| z`{-%7kix-K8o?mM#B#2}XVC8P#Wkshnw5}sl}WaXFpa07nO*0(OhyOZy$`!m=AbGK zFYXo3#Pb3AoC|-QkG!WNS)RQDXg#HaCqqGzR7K0M3iVHtSVia)F(3c{xUf=&4fBkL z9jCjai>-;HqwOzsD__IjF@p>EL)`iS?pg#1PYPFEXX{b9s;;cuyn|}~FcPB&(x}~Z ze#y;>duSa07_T`6*zQ2&so&92wUF3~S@Ee%h?yuhP`vuBz(htOY`>nY2f*D1b_=kj!bcP0~-lFFYQ?{5iiO}d}I_8aYPb5hNK)D z9#|t;C&u3U&nh%5`H$+DVyhHJ~liuJJ3EVX*jgk$LZ&zmo*w5r+uMgCR zoEo6>hY1N{4b>s~nly)syzIWEV|5JMY6~>J?U>{Ol}<%q58cw~cn^$8S{BMDPc7&) zBQrL?LuTZ#z1K*Y>}-m9y;#gh#a z)ugUtzj#Q}2oX~d44+M%GNi}DAIAcrb_XWQhKbyTL*;xSbruJ9Qh8Ip~u^t z5;;^0_GnM7E&~;Fhn)~|tC*cCC2pP|-o0j%%da(&WU#5fxE&}1Sg#38WiT0c-uCU_ zPCcc4p^VAo*(z2_6lcHKJVCp*aZ6+;(&jeX#c<>nT43w#TT3-B1`%{xiU|`j7PRV+ z;_5!9RRSm3?U%hr*pyPiR?sw?)~Y&8Ve`v7HF2a;yQE6*hn66G5twJsv%M}r731Z1 zXN$cSmSAwBF8X%A*37e9twm4G+%gTH;&nu3UW!O;YS*Ik&^iWwF9;R!LFS z5Zj4?Ym;mdsnQi2aFr%c=Qx_%Hd<}6@W!v7rw}ELS$JO&+kSf={Y=mA4CG#J!$)5d zT$>Xs$p?K!=3wcbFyduGc9WfXzY7Yh(Z3fMAn{pH^~#5Mc;|1N+}QZN%0J%n@bf34 z++XYz(s*3WxT);MrA7NbX8EynPbFQ2;2O9g>s6!u_7Z@Ht z@%z3)#?q{>^+lc&!7%VwI~7?(F@P+SOm#&?0gIX)#R|8GUz!H z9`3>3e(50wRyqW)Nj!d#EgN5O`-+L4BrLChM&^6dR4A{_IM*v48^|Y$VOSa-`M4Qy z6RdVn9`v!HfjXVpV(XzvdI5_3F36K=@|*-sPV|u{U~=d4W7exKSt#mR>z#$7=eTWG z-VNp!d8s^Q37?oxs4I2Y;BzW}=6J|fZd!}R@tUjD?J#r4!WrAh_hXCAp@Eqd*q#1o z7Un~CJZgbuVKYnsfEaeh?^zgPWp3^Y;r#W${YyH|Owf;Ar^XLG7MOdGXl(d6Q$Mf8 zm_fn##69Ca-8TO-uKX^%pLy+rFZS#n9YYLBUZ#%6yGwGxQ7Wt&Ns)?ZW&6N9>fZ1M zg``}4qN}eHg_HQSrq;Ss&+1w`pF5oNte+QN8|+3G3pJpkmNPrq+o#}}XpPbH1@3uZ zJV~00*SRYF+UuG=Jr!vtz0RfZCO@~2QL5tDxTpj->M2WoQfnP!`1Yf^xQ=33OXPI3 z%T>CW40{0+uv>uEn`g6mVJ39iKWZG33QCdp#AWb?KS}logsym0ogPPFb{kPy|+Z< zc9HfV32pNC;O0citSUvfHXyi?Pkd^b%C_C^?}yPY;F$gPLn&43+NjG(IG4*|ld8=I zVS8sOFub4BNoKfg_O!Cq8of0bJVtd~9d&NLCptK{d3bJgwdsA6@r=H5r@ne+=0{rO zEBh{?K(K!4=GN(3GGsCIWywrI7PUvzMl!06BgkUUunX;Et!)dj;@QZbSCLnjSA*N3 zcx`ud!NG07_4apRx?a+Ng<~Wj3f$s3S&$rOfFNqTQ950MYx>goBa1eI-rBPQjd=`e ztRS}3UO9eWN3t3Nv}(D&0=r87Ax^2AzQkr#JE_b9)_(651%n566Q8umQV5Y!rg zdk!YW$tR8XAWNj#Vl9Wy9Xhg=MjE|1S$pbwTpfZ%nc&_~e^}aF8F_blPbR7)6_ImW ze4OPeOeu4cD7Qr3VW6?Vci8sy&5~|n1dSuxt?`=k8)o4ItUeCxFdFO(f2pS6e3@Nb z9D0_NaAs$>H^I0>DIjL&S8t+l=jowOa7cY?dj<#IhZIYrMI=gEbm1Yk2R__&OVZ&ruWn+46SE`kAoMW=*L13n)j#JAr`^z%jZ_A%p zVawF?nC5&*tr#^xeY!FX{+U!>3sFnQ_G%8gYs$A#I3Z z@QlHVd<8PH+j-sHIq*l^=5h7}`?!o|zg^>P^>5cW@Kx8EEkPYSwrKU~*|LQG>D3x$ zkiv@#MK9yPc3S&3WE0=;a#oF?aeuV-T%t< z1MM*fG67&a(T)hv`vu!vya~x$1o`B3erJ4kVJ36=b^3CLzcYn1l&5Z%B`l# z8LU8VHPt(3Ts2u#g5yFXqL3p!ssnfO=1S$As4|?17K_IuI?Fa*nVo@t7g--eLGl6n z;eOhS>dIMQ^vg#taIh^DO51eKhoMpJa9nla`)(~+4zHd~0Gx<}3SK7jjOjSG&1X-s z1s;5rozDWdsX|WaZ)`Dm1Qrh6+O!8v-jK%K>*#h?YLeEKYoItv5E>eGex29NQtIg< zi|Z30JZWPcdp6cbrrd4CuOnd_2K7R^ATb-9iy>;qI=KMDz00E}wV3>%jpV1(z#S1R%A zt#1?e5`#HSGMw!`XA8BAob5N?FOMm`S9RgBhfl;6-^er!;>>?z-m$>k04pJ{`-t42 z6^vtL0_?GOE5PKduKE-AFJak*c9?zw&zFWjj=uAPBV3FlyC zK#y@bpnYJ;(Qm#e-XA8uOpz&6aTo(BN}tYgwOf%3r`@LPN$chw3YWT}pu$<7Jfi#% zqwj9b)VL`fa-{OP|FdnpIE7#!@5)DMwUd$F!Y7UM)niBcpSMQ~FZhLA-^Kw`J%f+& zwo^}qDOodbKv3u5VdRcC8tZsxMl{_#6|4jv!VFP)4l?=Q51ZH}R^q1}jfyl48jfq^ z_Gm(KX{p9CQqOr@GGc|%lpvL(yz;}d*qf+Q_<3R#J}3G|7U6!#Pgm`VTaH!N8*jf$ zIfeDdfX)Gvj{Uhs^Gf%M*3f2?Q6}{b_dX9VVf(JhP5C!jc=R45M}}G+4X1(&w7~sH zl!=z++lnLPhwJ7Rwv-7S`)^2u>6@O0cFmiv)(N8dQ4~dJv;rS?6X?kaRj>&a5gZE| z=RPGep42fNVQ~=79!Lh2#ukCGa5JT2_4#NG*)XQ6WrR|=7Shb0_{8IV921m#KgOS6 ztUrNSPYEh>^E6BOI$XOJ6_ti>`ov#}q}&?b(!Abn>o8TLTwQ)_yW}NbZkp=1B7QXz zpT;6mw(&AFebk_3q_V{T9Q_fjS~Lr~DYpj5`Dga@i;!jzgCI>v?8oIu@e)!-MN@Oq zXOvXCLdW=2NsWAba}BF*gp5gZGs#sMUdQ&Wyc7=K45mE2X(+T`Dk&wu&wKf)tnv#2 z{(36`rg2$Fr8Sm8O7m$CTWHIBWi&EqAwwzI{~`B#x8hBw`GcegaB)?d{Kp*~ew4k~pg{$16;e85@}zgk=lU?A91! zEqPKIS<7*-UC)Od5=A;#7`8Y;NiM_>6Y7)S(DaDPWakkcFK(hoxwhBCbEtQ}N8dtw zbpHIQp=3PHXSuj?e26gUrjalw7zRAq` zK@FX@ESQ$#U}{FK1WmR^bI7D@vu%?1bm%`G~rlQ=9wI=k{1B;-H7X^Vo>E zIV#5Qf%D&Pm349Mz8EdABC7Zf@l%3`K)RGDp9JS*$rREP?X@C~pxp~hjc+-Gmq-i; zrv!FgC#(;&A=@<&m(z=-Z`V=!TC>%!nx-#ON#mhr5qnT8;X&eoRKvRAr=q7_zE0oy z^-g2c;-NMVaC)a*iN^kP0h_Mrh8-KdnXKJ+`R7C!o|EJ0j7?aU#+ zX#ook3tX7KR8{`K??=OK8*FHHM2cz%NX^C)B1V-F?2J;cxXnI5gb!ApU&l9EHDJ!k z)2Mu&e2gtOpc#$7la`ilo3y&xJrG{3N2F(udw0lvZb3YgToQy{t`&4y)~~rcfbDF5 z86;*$Qj1Ehp0!$DHxm>myuu^5kDy7}BO}1N7imkTF$X7k57RPF?`0I6typQi~;nXvXq#&OW)$nrrF=Ib|5jX~Alj-kI z82~sY&4(b+TSC`Dj*ny^H(?R%4{##Prm0s@XZtj!C> z9U7}t#07TzyEw<@+U6Gm&L?6wax2Nw@@x65pUAbczaIv9K1~*9ies>p5^XeUPYDxk zs<8p*yYVDID?nVMIGTt(xH7vb`VP&}BQwP1J(OLcmR-38qN@*Y)B$(Pv`El3{#*@u zXoX{@%#y=g@_@5H`_(hKFHQN}yx;~|UHdBxy;l^;JwmVH5KX_FEow_7)= zTQ!~K4vo3g4R8DHtlW`C?>rEHIwV3W;Z9|Px9~ zzt42ib})tAI+QrxI`^IDtcD3yb9(5Ee>fNCa%WWXD1mxM94*G@W0f9H*G$e#&Zk*) znl31(WSk_L%bi|f$G^&Inqwk@l&}>S7|rm1Cz6%1i@BMatBbXR<*$6Qi|>NX0)7!6 zTcwjkM)>Iviw?0snT_R{w4A5>k-f~-BHY>8N+Q&CBHOp$ZyU-!XS~BN#Up$_8m-Nq zTp$G%0#cEl&$T@j%>d&IN{mKD3+1IMdi!feTJc$ElS0%F4a*7qdl72}#m=mtO?015 zwXt5_1OYe0i9Pv~=xly^S?F z;%)@U@;`R|D#81`XQ2a^22GN$&BO$Q8qYf!H4gZH#UDFwRt_AS(DMm;u7mTS~MSUgiU#7MbOgR-@Y{Np5nbWqf!%JgeJzW3zaBee-OY?-wa4BI(Bk z33_HgJ2rhSf^}<8Z&wZ;Iy~@(X@Ha+TwjQ|sPZ{J8E6ymq#ji(JiRDQ?oEAYT^Cv< zf@4YQm(WHc4e#^IEiWjR93o_2EEfjW_y zX@kzb_Nphu5+2pDjjo$;RHaRF-%(Co6&9ahwjzw@ZjgNhC-`}+w48*#7~?S@ z3#vSThS{iOAkq)CjkpOYyOZmlWesWAccTw;jvdQhPEGWL0*gjw!Hw{g=t=&%mGOOMgOJVe-dM z)25%ne^=7~gu+Hu=)Z#hA6@-tJ3q^-e_E1)39kR2*!r`TpEa95tvmt#YUO7I=Vt>y zch`R!7$*GHz@NSL&(NRyi9exuq`yIb?k;||@b?P(CmsMWg=tX#sHZ=}|DN{#3O}Uy b3;g#as4R~JySo4YI_whwOO!d(zx?_?;9iPg diff --git a/tests/test_minimal_case.py b/tests/test_minimal_case.py index e69de29..c5a368e 100644 --- a/tests/test_minimal_case.py +++ b/tests/test_minimal_case.py @@ -0,0 +1,70 @@ +import pytest +from pandas.testing import assert_frame_equal + + +def test_load_files_info(gcms, checked_files_info): + to_check = gcms.load_files_info() + + assert_frame_equal( + to_check, checked_files_info, check_exact=False, atol=1e-5, rtol=1e-5 + ) + + +def test_create_files_info(gcms, checked_created_files_info): + to_check = gcms.create_files_info() + assert_frame_equal( + to_check, checked_created_files_info, check_exact=False, atol=1e-5, rtol=1e-5 + ) + + +def test_load_all_files(gcms, checked_files, checked_is_files_deriv): + + files_to_check, is_deriv_files_to_check = gcms.load_all_files() + for filename_to_check, checked_filename in zip(files_to_check, checked_files): + assert filename_to_check == checked_filename + + for file_to_check, checked_file in zip( + files_to_check.values(), checked_files.values() + ): + assert_frame_equal( + file_to_check, checked_file, check_exact=False, atol=1e-5, rtol=1e-5 + ) + + assert is_deriv_files_to_check == checked_is_files_deriv + + +def test_load_all_files_wrong_names(gcms): + wrong_files_info = gcms.create_files_info() + wrong_files_info.index = ["Wrong_filename"] + wrong_files_info.index.tolist()[1:] + gcms.files_info = wrong_files_info + with pytest.raises(FileNotFoundError): + gcms.load_all_files() + + +def test_load_class_code_fractions(gcms, checked_load_class_code_fractions): + to_check = gcms.load_class_code_frac() + assert_frame_equal( + to_check, + checked_load_class_code_fractions, + check_exact=False, + atol=1e-5, + rtol=1e-5, + ) + + +def test_load_calibrations( + gcms, checked_load_calibrations, checked_is_calibrations_deriv +): + files_info = gcms.create_files_info() + calib_to_check, is_calib_deriv_to_check = gcms.load_calibrations() + for to_check, checked in zip(calib_to_check, checked_load_calibrations): + assert to_check == checked + + for to_check, checked in zip( + calib_to_check.values(), checked_load_calibrations.values() + ): + assert_frame_equal( + to_check, checked, check_exact=False, atol=1e-5, rtol=1e-5 + ) + + assert is_calib_deriv_to_check == checked_is_calibrations_deriv From b2858a6d334c740fee51230903871104610633ec Mon Sep 17 00:00:00 2001 From: mpecchi Date: Thu, 28 Mar 2024 12:51:16 -0400 Subject: [PATCH 5/7] fixed comp_props issues and separated plotting --- example/example_gcms_data_analysis.py | 105 ++++--- .../checked_compounds_properties.xlsx | Bin 10552 -> 10801 bytes .../example_name_to_properties.py | 13 +- example/name_to_properties/ssss.xlsx | Bin 0 -> 7240 bytes src/gcms_data_analysis/main.py | 226 +++++++------- src/gcms_data_analysis/plotting.py | 295 +++++++++++++++++- .../checked_compounds_properties.xlsx | Bin 10552 -> 7239 bytes tests/test_create_compounds_properties.py | 0 ...operties.py => test_name_to_properties.py} | 99 ++++-- 9 files changed, 544 insertions(+), 194 deletions(-) create mode 100644 example/name_to_properties/ssss.xlsx create mode 100644 tests/test_create_compounds_properties.py rename tests/{ZZZ_test_name_to_properties.py => test_name_to_properties.py} (53%) diff --git a/example/example_gcms_data_analysis.py b/example/example_gcms_data_analysis.py index d1c418f..4a8131d 100644 --- a/example/example_gcms_data_analysis.py +++ b/example/example_gcms_data_analysis.py @@ -1,15 +1,16 @@ # Import necessary libraries import pathlib as plib # Used for handling file and directory paths -from gcms_data_analysis import ( - Project, -) # Import the Project class from the gcms_data_analysis package +from gcms_data_analysis import Project +from gcms_data_analysis.plotting import plot_ave_std # Define the folder path where your data is located. Change this path to where you've stored your data files. # folder_path = plib.Path(plib.Path(__file__).parent, "example\data") folder_path = plib.Path( r"C:\Users\mp933\OneDrive - Cornell University\Python\gcms_data_analysis\example\data" ) - +# folder_path: plib.Path = plib.Path( +# r"C:\Users\mp933\OneDrive - Cornell University\Python\GCMS\NNDNDD" +# ) # Set global configurations for the Project class. # These configurations affect all instances of the class. Project.set_folder_path( @@ -17,7 +18,7 @@ ) # Set the base folder path for the project's data files Project.set_plot_grid(False) # Disable grid lines in plots for a cleaner look Project.set_plot_font("Sans") # Set the font style for plots to 'Sans' - +Project.set_auto_save_to_excel(False) # Initialize a Project instance to manage and analyze GCMS data gcms = Project() @@ -41,8 +42,8 @@ list_of_all_deriv_compounds = gcms.create_list_of_all_deriv_compounds() # Load properties for standard and derivatized compounds from provided files -compounds_properties = gcms.load_compounds_properties() -deriv_compounds_properties = gcms.load_deriv_compounds_properties() +compounds_properties = gcms.create_compounds_properties() +deriv_compounds_properties = gcms.create_deriv_compounds_properties() # Flag indicating whether new compounds have been added, triggering a need to regenerate properties data new_files_with_new_compounds_added = False @@ -56,7 +57,7 @@ # Extract specific files for detailed analysis or further operations f11, f22, f33 = files["A_1"], files["Ader_1"], files["B_1"] -# Add statistical information to the files_info DataFrame, such as mean, median, and standard deviation for each file +# # Add statistical information to the files_info DataFrame, such as mean, median, and standard deviation for each file files_info = gcms.add_stats_to_files_info() # Create a samples_info DataFrame without applying calibration data, for initial analysis @@ -90,7 +91,8 @@ # Plotting results based on the generated reports, allowing for visual comparison of average values and standard deviations # Plot results for individual files or samples based -gcms.plot_ave_std( +plot_ave_std( + gcms, param="fraction_of_sample_fr", min_y_thresh=0, files_or_samples="files", @@ -98,7 +100,8 @@ only_samples_to_plot=["A_1", "A_2", "Ader_1", "Ader_2"], # y_lim=[0, 5000] ) # plot results bases on aggreport -gcms.plot_ave_std( +plot_ave_std( + gcms, param="fraction_of_sample_fr", aggr=True, files_or_samples="files", @@ -107,14 +110,16 @@ color_palette="Set2", ) -gcms.plot_ave_std( +plot_ave_std( + gcms, param="fraction_of_sample_fr", min_y_thresh=0, legend_location="outside", only_samples_to_plot=["A", "Ader"], # y_lim=[0, 5000] ) # plot results bases on aggreport -gcms.plot_ave_std( +plot_ave_std( + gcms, param="fraction_of_sample_fr", aggr=True, min_y_thresh=0.01, @@ -123,43 +128,43 @@ ) # %% -import pickle - -folder_path: plib.Path = plib.Path(r"C:\Users\mp933\Desktop\New folder") -pickle_path: plib.Path = plib.Path(folder_path, "pickle_object.pkl") -with open(pickle_path, "wb") as output_file: - pickle.dump(gcms, output_file) -# %% -import pickle -import pathlib as plib # Used for handling file and directory paths -from gcms_data_analysis import ( - Project, -) # Import the Project class from the gcms_data_analysis package - -folder_path: plib.Path = plib.Path(r"C:\Users\mp933\Desktop\New folder") -pickle_path: plib.Path = plib.Path(folder_path, "pickle_object.pkl") -with open(pickle_path, "rb") as input_file: - gcms: Project = pickle.load(input_file) -from gcms_data_analysis.plotting import plot_pave_std - -# %% -myfig = plot_pave_std( - gcms, - files_or_samples="files", - width=12, - height=5, - legend_location="outside", - y_lim=[0, 100], -) -# %% -myfig = plot_pave_std( - gcms, - files_or_samples="samples", - width=6, - height=6, - legend_location="best", - y_lim=[0, 100], - min_y_thresh=10, -) +# import pickle +# folder_path: plib.Path = plib.Path(r"C:\Users\mp933\Desktop\New folder") +# pickle_path: plib.Path = plib.Path(folder_path, "pickle_object.pkl") +# with open(pickle_path, "wb") as output_file: +# pickle.dump(gcms, output_file) # %% +# import pickle +# import pathlib as plib # Used for handling file and directory paths +# from gcms_data_analysis import ( +# Project, +# ) # Import the Project class from the gcms_data_analysis package + +# folder_path: plib.Path = plib.Path(r"C:\Users\mp933\Desktop\New folder") +# pickle_path: plib.Path = plib.Path(folder_path, "pickle_object.pkl") +# with open(pickle_path, "rb") as input_file: +# gcms: Project = pickle.load(input_file) +# from gcms_data_analysis.plotting import plot_pave_std + +# # %% +# myfig = plot_pave_std( +# gcms, +# files_or_samples="files", +# width=12, +# height=5, +# legend_location="outside", +# y_lim=[0, 100], +# ) +# # %% +# myfig = plot_pave_std( +# gcms, +# files_or_samples="samples", +# width=6, +# height=6, +# legend_location="best", +# y_lim=[0, 100], +# min_y_thresh=10, +# ) + +# # %% diff --git a/example/name_to_properties/data_name_to_properties/checked_compounds_properties.xlsx b/example/name_to_properties/data_name_to_properties/checked_compounds_properties.xlsx index f7d745ee69ef5595d3b006a4b755fd13c77c983d..360c5b1da9fc1b23e785e06dc5fb37eb093b0d65 100644 GIT binary patch delta 4329 zcmY+IXEYpK*M>*$Gr?eDh!SN4(M7M(J<2e;M2{%Z88I?ybP}URi58+0T}F@IOGGDv zQ4%dmNQT#&AJ6xG`{&tf?RD0=&c4sScQd@!xMqkH9pEDyxK0QFc#r}BAOHY>bQ22j z@OHKH@Nm74bayL4SbJm%gM;7JUx)nBi|0@Z(=OrSvM6$oovjRxXYmX}*o!TS$CTmE ztp_v~9YNM{bkN`3M@KEVH|6>twWUD1&?<*m&2d#ArsZDqQqeXpDN|M{vDTQ;7&Zv6 zf5Ji<+``(B$Gf(=OZp$*Y1@aWG##$5JaD1 zsh*v)!o17mGChi>e7S8FW$9#OX*$TSK85!wRubpOy}Os|>NvA-24H5>eEj__GUF8! z-S_(vNLDzT>sjX_ORo9Oc0xsgn8^vTIyRt?w#EdPO8hD4t_5;yt8lMeg6aF2jdj%Q z=>v;~Ou7Ly?abWMEmySKXcgnvpjW369dYic-1gluWL(Cx?~|rKagdTas?GEicbhwYuJk=u~^3v|}VQU{O*)$d+1R-g8 zt5ap%6@<}gV&Naju0^g#exH1pxaz@+d-V3R|58rr!LP1|OUg(~qih?9&VPQVa=3V@ zpftwO-+QE)p$exkPu5b5|CY~`XfFi4aFC>W`9eukin?T;J+a$4*|9JW1f4WY=3kgQ zc7Df*mQ)Mw#F7WU`)Cw_)ar@jrA#vNkclNLjFEja#@WQ?lq;L-DE8%N4?(8h?<1*b zbW@f)Xw-77acj``$He@ym-&15*d@R{NA#W7utQtBMYRK@txnC4iM_Wt1+jL%P*Do9 zA6&+!I)Z9RD@We>;|qO0Pdxob`DP8f$EEymLSmkK&r3#wW1lUEWpzg7{g91(U5N3g zkr&cT@0xa%X1Gw1b^oHU?bkKkw0;Q!U|th}>lP`*2!V!_TcwE<0KjcN0008OjPf$0 zvoSgzi^A0X2SjJA#LM;=NoEC%ou%10Ax~QwS46-0(J((p z*-6VKE?JW5)ya|wyZr(#5!d6{4Nj-|L6O-KnLiAt zM9$ci=XuG0QW|CL^WdLf2rd33DAhaaw3rxSBFJg-A#Hz7na_~ZvI^?{tE>+TG8`h2 zhG6Yc$;!o99dI}t;_kOY$W^DXwRckDHq zQ6SICwr#iKSsdalx$d5p=nmM3IuxaLz1~5YXfD~lWe_pgI^h_)lV+$nkDB$HsEcR| zwx{nm?Z_kj3P5mbqr$7|SCIYOdXWS5IpI&dzit5ARjhF*6g~}T+0+uL-wLP9-vy3m z?+`Otn_deSg(Q2po*Nx>grcoXfvMB%y>7uD9eR&wD>@a_PT7yl@?9O)G*eD+G>h?U zm!6Ja%d(5Vv1_dh$E=HR^VdA%kWZnjZO|)EU*VUjeXcEMgRNBz{G|Lw{i#$Sh-_~n zlD^REYkdFC#{RHucP0=bPCCNXOxx2+*LEXo*Vi|ic72^P%A^Zj|G7L}-1ylv zZkmq9?+LXMg;LW>ZpYFuFz9f_^ZKcmdNZR&Gzr+ zhtO9H7Ka``gss7#Dg^rTbd=3Qi3XjfXAy*Chq^FN0GUcC5qco!kCH8=9y>jQi&U7d z@B6OKOg>tH>=;Ig3@LgEml`jg=QB1Ny5Q2Tp-gA^+tELUge!czJLeDE60OK2X*9&> zhWK`$N@2V{lgsmZKxwjPj62j9HlUHpRrU2WBYw-@{2-Z3U{%QG3L{t*q1>gTFiFb@ zw!UM^;&!YIMKgqZRi2WX_~{2#Um_%U)IBWe{fs3u-xps4s!>v7h1jOzgf4X)rU^KE zrHM0V0$1Q4wm@T_@wFCi=N62EXf;`-y>G9dRu2|Fj1nvAS56?O6?k#yV-TpvB1kJA=O&Bi))U(X%bF`Mu zXYwksAIT*LrVAH+-FbCWTr{t8!7TA|Lhn^*jgH-wy`TYjJNCgmRRLJ*_ephQH*3vS zKfZ>0un}J8gU_{1Q&V4%sRf^ByVLHB2|yoq+;8lPtIiAA0q?EouIcEfVDq=r3S?vV zfA6@U;}necLkpzYnxMVl90WX?XMPS?<-hV6g}Il})20Xnqdo+Bj%k5kZ@Z;Ne^@bV z-zq#Ciz!@T|43I8oD?D}N)B(B~cJkt`4rj*M{g!8+4Lr5HR_&+i157ZYy=- z?UXuwqe;}# zd}&DFwMY~%ORhK4|HFko@yN$SEbv{otO$B{|fXy!t*bRSP+Pb`{ffFJN2_ zSGUY=r8U=~(an0BEB?SHKbspJf3Q|YyV~Ue6>Ja3mj@##Gx%>V|5WMZ3yB5=6PSA` ziC|MUcIA|xfMR<7=G~q-+hbkT5S)tTT0G-(mTN_46?6PEc)8ILUIynrIIxP4cw7$2?HA zWiu_pQWv4T;Zf7fRHb_=J)CCB9LQ(e>rFGdvO~*8Cz%Cgv0JF6bfC7*WOb_M{E*S} zLli4(jP^9-HQ@9a`}H$uXK0149rqmwcW_n8^$JKRBX|ubJ&@==|C~50)tJ zj{S}+oVq-p#2SXhgB}fQ4KKoIRNZrrXse`e@Qgl%u3IK6!B$E6K}~EF@@*R1thXb8 zOk0{T+yOo>`$l898_H-qzLuDNL-%cy?x3Dds^+JYj^Lm^9G{)bD9NRa*w$xOCCVSo9Qsc8?Ko zTnZH|6UW8B*Dfq0#1|E3R`>Sj6%nYi%Jp-*Mql-Ji+-Sh+`iNkj<+NR#dze^W94hI zx>&FdB}ERC(p=Kb_qvH$xxY@$gao!mHQRVvM~pL<-1sDl)JtmSUQou zf;r2s1bWp@q}9al1}B7PM6HfM$Ky@M(=Cu4=H)HPEpO#UP)S;4K&xEu>;7|(?ZNYU zj$@^WWPx+pVm$5?r_W!RK<4^x3_p~y<;vn}QTt|_?6WY%fMJ7#$RT%*ot}{(f*$S)F*xTfyK1fs1Ofo(HPC)uwN%O`ZOUF-|t(^l#k?+d;U^mN)lT zNhv;v*yk#phm@w;oCs)s+_Mi=oMyv{yeJ!ibddYG^r`ds3sv?+TT6ZwW&~R9)U+Mf z9w8TO9};`qdoWeDKw^eg2Ixx&jJNLy2f>P-m7ZPeDwqCjeB4c=y!A{9c_Ao6$HE5{XP*!r|f zZurc}CZVN2e>!?Vr$GEoA*m=Z6QGrI`0`FPHbm`^YSQdHNSY>921|n=$qLh%%MktZ zySD?x{`y&`3nRJH_^A_p2+p$EnF3!RQyUpt|Kn~XL@)#5da~A=&Y3BnHa|CLS-vqC z7~k3D8v^t@6G8ZWvm!1bjwiz@PwETzHwOS4`4jd&ZByfNvNr7zy`H+&*MH}Kl7$R< zRlT|}4juyZW1SvhaPKNfhw5NZ)>WoYL?cnEyfnnj5Jfe(%;<)B_3-11plSbEVDO>H zdG;UgG46}YnVBwL2!D!z0bMT)FbHNkHps)d&{rAQu%>3uYO93G)Hqu|dc3JiUrhZy ze##~r07WL(N9+)?=1ddY4<}9-Km(&r)*ofQltH7#$tw7QO2|5&vIuz*C3^3|_Cxb? zqqpNX+*Ec!Mj*hL@XSPrFC(vE7#a)IwZFy+!^EE4wJHfN@mZ42sx!=9;^pVDUu-nY zq7gw|5qCUed;791{}sLpk|&Cb7bcqFGdjcj1(f;7LC>fvO8qQ5_Syh1zNA|2vb?t; zi`QgTOGHEEe7A*@L&+2vZp)(hh34_94a$zpC2_)I-M)tJ_*%3EID_|AJDOKZSja}D zd(qM5_Xagvx#oYw|Du6KEu||K{vrJ9tMGS)h$#Nu7+@rL^@xXW92RDef?2>ohk#HJ z00xi($Ztk&4#5AnUpGhp#ue3#8GA_B&48hYDN>uW{9QWu_y)Q;V)9@NEdOc?8Q`Ye s|3(2DCJDyF@|P|F0N`{ZK(zmx2iS9BCSme~+gzAie4M0Ny#G!92LoFxvj6}9 delta 4063 zcmY*cRa6`3)(uY3;7;)pT#6K@!QHL6yQWABq(E^f5CW8W3LPM5 zarl|}*P46p_i$dm{jk?QXYI9jgV860`e9tyRFaD2HYNbzhYJ9Z0sw$uPr)D`e-C>f z9}j_GFV7NVly{C0WmrKoOd-6iAH<@_X-H1bKP9gG*LOJ;0m+`qhHS5oO0)N2G~`o( z3y`^@Z~cGzhK3$~-Z`1BB_GF{j^WK_^WR{N9kQgd^fov<9av^!8!+P`Bd9+jzX>wK{BMQz=irhttfWHQ--jF+{U%3;X%S zVr6fB27%Uos7p6}E!_->8Gb7k{}{zu!GO19#7RO3n4zm?hIpI zGaL}V3vLkUy~BO>#a;;VF80Q4H6cauD2hAwrn?W%3pyno>ehmFk+@Ijc=b_EVH~ne zIlyj*q0Z^3>>MTZUe#JMxyy4i{Yr-R3WIfnyK^|kk5Z+HF9#cIKJGo-64jrn%G&~~ z2h+dx8Gm83SDOM7bPGOK+CWVdRqcfe1>v1~Zj$`rxc$CJ5*mB#m`Ns|uhX<2)Uoh; zvU9`JBVTKdByWX3D|!(#Oc38z^(K5D&V7UoZne72q-oXC7fQk{`#h24qrsy6)lM3C zJ`2}G6$f=%?l!d^FF&`mnjVlcxTF)VI(&y4XoYTG&_%s^<4$o&*&J3T><^u)MbhBx z`OkM}8m0-hOa~rBS<~!yQ*X%w($lagn-+kgm;_5^4yFzG0DuigIyDgsOgm*!2o!_P zxPHR77QdBR4 z>vghQ3uO|(5hfoET9zX3FaOLR1^Zbym%3^Z;>bo6r&Ogp$;5LAs>MCR2G;Pp9)x9y z?l=-#d>VvKZfZGIQ!~S02brJ8Be`$1+wIgDCq7VDmSVBCT?lCzgMkw&hhoj)Z_Qla zsovF^9w z(T3y%P=`?gzR(_IIB&;7$06ZaA}6Pj{n};S&Jo>A>u-GZV#?^)(Vsu|=yJcoA$ka> z;z^`#6ptJIOcXy1#be1?&Em(1D4&|c(Qa}bPEUdpOLm*|U~u}QJ5k*+RJWsa_(O5{ z{TrQZ@do$xAI6@ne(5p#Z9x94Hh16do=Yq-EwW!NTiIMhyX=9c_j>w@5CC|1c(kpD zhZ1u)-E9@n9WU>n17~5?W*iee!a^5B0ih6VaDg}~QVn*3Hq398gM{eo7OEK z`ghm<`jwa^pI=~umr|FKMI(QIdoz;a<8rb;GGddmX0uyY$CDp+rTTH8tgib(IygM! z_5J3X_|! z+EBsHFAsx&aGz1n%#r5p*B2ewl+;U9h4t&TkB|$braoUggMuAu1(P80kVMZ6dhzya z(MyvP2LeOBPEI|f{V{(8$*k~~pUwKs9nn%wZRN1sQ)0!2lo@u7j+flOL$0&aLP%B_ z#BSUnujY|Oyc}zcaAiu*>c8kGegsX|cg2tM@<+{?(3V{}1G-d!#L*-@bjT=zDJ7=u zBZ*eI!;gL?0V;{>+Vuib4hseyv0F(L!_HC>%U~rJP#y$|f#Os$1v)jENi>NqzTqu0 z)PUiY)oh=SBj*a9S$bR?VF{@` zKP}Zh_BE>_Ow>N|*M&Q8gC#^8m7mSTJ_r9r(98TfL(9>2NNhtZO+w5f6`pji4`e7# zwQfwlqkf%2?^7{oDoq!q-!DY9%4FW39s~0Pt)fh~*dig1OuK(!!^=QoAPvQo1*@}j<7;+u1ZEY4nrBdDwPkVeXys z)Tp#`WZXJv>s_D{_K;!RIEz&5ZV~hQczl0Tkd;K!otDz%8!QyRQV?jR4Rx=iG}%#t zL#X_1gO6gtP|4`IDA};R?K#E;a#&wQ?A>#Qkpe_8r~;9RzBS zl3#ZL-hQN1=a3<9hw)i6?opGNT?j=K3z+cDwO1E`3b%=dXW<2PQHOXfVx^P}v0Z9v zpxVmwd$NO1c)3@Ie~z>k*{bX10&I|Jhokss#*%U4 zZlpR|YQs`Z%)-?Lr^bssBF$*&n0?i&w@E6vDlF+^W%sVw2)y{Zi=fI0!DZx>PeZ_% zNiIxjKr1DlN&pdxpn@wMcnp!hDPDW&$Ut7jpSy6m+C!MGN^h;ra%Tg2!I;&#*?^EAYKY>;`GL+nhTc)76^jHy zUN{GfUN3bJsn8*1-zXB&!M!fa^POo=>}>F#J}GHZaf1W>${Fo&#=XWIXt2k2_2Itd z5Va*qcM>iOKE!bhu5gJ4e+B83F$beF5la2VX8>yuE-Hk{G!xAd^bdVdm@A~F;%bZd zAauNZm#vNy`S+o4HAXldb{d|gRlsyoR=T`N@BSfwu>_v1EHdKBJBk?x$)__|jo&@w zv=3(wC1<3{7BLE*UK(3DZ?C)`9TeQ%RP<(@*9pt!tK@BST*q-vre5_>J_smoK~lVY z*6D=brJCHvfisO;Ycy%??q1a-{VDTlc{uUeF}QTm-L4@=QgT=l*{N$UAF=(fIXS&B zUo}@eY|r)v#ViIwMM_72RP6Fb@(O5Tr8dxtB9s)>=-K_EB&*U}zPh;C1M7x>liyth_a)2@J6-`6l|?k~;P7o(Xy0mlsTG?KSo4SE=i=Z=6?y z`>o}z+&Av*Yj!kt!cT-wWw|P(d_#us&~##Q2zIHdd=er5QhKZ7SV~wAb{7aN_dP(cr+oEc!t1a(pCXIXHw!hua*N$^9vYW1i@!KM`tvIUQ?H>oHu+K zIC~q^ubV-4N`urHqjj{uSe8=Q|N0J#kI`)Ji}fX~Onx9OSx?UJSaW3{nF$PJTbA1l z!8XqDA48U`RB8S!`XD~4LGbCShX|3(UVa^{I-CKm+Iwz_{Z`il)8B$RH3)$r%}G-J z=6W1XP$VYrA4u!MqAVtA{M1!dc})qerhhPI|73;epY&DSu@gF#CcXe==U}tS8~}&*|{%OCDAorYs8Jy zsOpacT*-P0z=|!>PVgt z%NXa1TPQ|L`MR3h*kp`?{^0kl7PsvJ(BQ!R-(?dpbxL4643Mx?XhuSC{K{%d1VU$RK z#)dS7RNm(JLT>Wrww#nO-BPReqJ13MiOZ$e-I!o(IPrHNR z=0d%VEhNt-)(VY%bW>|>h*nf(u-%8SXxuJ=3L&v+L^1>wgYz5Joz$c$OH`Go&uqTA zk^M~c%1_Tp;ybyxze~Ic8}?(-1>fs-vMYrwefF zV2q`Qaq~0&r*r@Sw2#U3zaCLKJrRBSIyVpIPs(&6usmisL%KHj3FE(pfd>Fk{udqq v7+_BS^9gl&J{ZLK&kivF0Ov<%A^RVY4{Q1`SQZn_o(|+;!!6?a`}6+*g>QeH diff --git a/example/name_to_properties/example_name_to_properties.py b/example/name_to_properties/example_name_to_properties.py index 31b2b10..a55f707 100644 --- a/example/name_to_properties/example_name_to_properties.py +++ b/example/name_to_properties/example_name_to_properties.py @@ -48,11 +48,12 @@ "n-hexadecanoic acid", # different names same compounds "phenol", # ring "phenol", # repeated compound + "carbolic acid", # same iupac but different comp_name "2,4,5-trichlorophenol", # clorine (udentified) "phenoxytrimethylsilane", # silane (not listed in fg) "bromophenol", # Br not listed "9-octadecenoic acid, 1,2,3-propanetriyl ester, (e,e,e)-", # large compound - "wrong_name", # test for legit string that gives no pcp result + "name_not_on_pcp", # test for legit string that gives no pcp result " ", # wrong entry or datatype None, False, @@ -62,9 +63,7 @@ list_of_compound_properties: list[pd.DataFrame] = [] for compound in compounds: print(compound) - n2p = name_to_properties( - compound, dict_cl_to_codes, dict_cl_to_mass_fractions, None - ) + n2p = name_to_properties(compound, dict_cl_to_codes, dict_cl_to_mass_fractions) list_of_compound_properties.append(n2p) # %% @@ -78,4 +77,10 @@ to_check, ) +to_check.to_excel( + plib.Path( + r"C:\Users\mp933\OneDrive - Cornell University\Python\gcms_data_analysis\tests\data_name_to_properties", + "checked_compounds_properties.xlsx", + ) +) # %% diff --git a/example/name_to_properties/ssss.xlsx b/example/name_to_properties/ssss.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..fb40a9577c3db2dca81ad6347429554189d0f189 GIT binary patch literal 7240 zcmZ`;1ymf{vK^ctfk6Vn2@>4hCAcTZ1h>I0xLa_y00}Mw1RLDlC3tWGB)A11B*-T> z|NZ+e|GnpF6=_&LD4$1)P(51U2r#?TIp&_yak%^6ZMB>hy_fS_5_ahkMD{_ve-H`6r z3T%+cA3omT8AGko)de9=aW5v37KA9V4=s@vl%176YCP@PmGA)?sM47;}jBEdm7m3R!xNs z_CSi619cNv*N>Fxhb5y zcIusbYu%?U-pkJ6N});93%OmgAwH#rkzGa>+}@14Yx_|6`Ds4!W6E&>K^oQ*n4MGB zbS@r&2ffp~>d*NJDe}7QO6BCY<~gTz^E~%#nTWL^2KSnN@c(4yqi#ACHZlN^{t^Jd zePG7jhSk{uYzzK#X8+C2fwqp*0w135A#~i`9A=B&#}iOjgX?7Jx>)lu=s=k~7zm2C z@i2`K@@tpj!5&V=JV;P#N)tk4+W9pqC9}27=W+Uz#dtd;Ea}AEaGrH>rn#PVc~y&p z8Ys^8ybyZSfd*03uhq5$bo2QP;^K5K1xBJOxZ@4a=&tGzu~N-DE%W=~=*g|=HL2>jOm8lD#mAy2JbhKz4 zl__htZ&NYheP!eBm~F=@q21--eI?`0dzQ2>Z_BOocb-{DNU!aA;^5bAh)q!dKn|`m zUnUWf_35u&l8EL>o4<+c9)W)HhXe2WdJ$(VC!Z&CNjql5Aw31~_Dm?MdPBwQB38Q+ zCUxUx#^`HRiBLZc1f0tRQuZ!EZ_iVj2lF{SGkgR_t&{vLOW+C*n$yM-Z=AN*vkDVu zl_W+TT9}?3|Dw_{3VmrOi0ddoVlLr527|>zXgJ0Wuh90Ch0Mt{lPmPb#?~{V9I2YW z!9YGR6&X9K+0Px-FJ_aZsim?8n48vte6@0z9Jr#W86l}wswD9FPuP=lLN06iN`b4m zKJ{&=11r&C7bW(egtc-&atlG1*wREl(w#P)CTxD8K%^&N8ZAqCDIk2f6v7p*Vs571 z8TStz$7Sh4q_{XEN$AMpAt}_fIG_1_m1J@67$FOH=~H4yNvzb@^6{~j)l`Mfv?JS` zM4i%5VwX^hNg>ZWq^oppG?I8?Qh0OXYH7+K%jP38D0R%IvMwN_goyET9wNuuX90EZ zJ_i|ULBQ+x6D+};uHd^#i<)^B8QF`dY){X z>d-tHR-Xd%M&M*h5Zmp%IhX%1y8&&aYY@^|-+|^hi!hB{2^nnFjG%K$);lWf)+M#o z((=ilReo_$CL6{OG#5%Bt#ivJ<^BcJb6ux`u)RrrV88e;%>XCsoUKYi?rV*H))mMc$!MGHHL(C57~^ui6{DoK9kx<*4b~Igp2w1C7v^%}kP1f!82f3!@5-%pC^eTcIDBn=K;L2!y3i^FFSURfjsIc{X%OmEMD9$gVh6_#GAnmJ#$fahW9ZeLH<3BK(0R z#%)bdl`EQb!5XjQ0E)DmnZKj;)A$Bb_!RO%BE=X7K#e>9rC__WxD=;BvQ3m7IBNj#E1`^P$-yme(Xp{2V+ z%!;m_nC}OX?Qs8`K+5A{NUYER0DA!dfbh=*;%4t??PLK4J3F!db@^)+fu#Ao`5{Em z*B-HEAV4+Q^75jH6O!G_tLnWnKQCT7v=n@j)KtE5C)Wv1i%V}BtA=i~&hqcbIdWH4 zTx4$Cj~XQA&s}oM6`tOX%;u)vPT7pgKnG3vRz}PQ)f2avvM|tZ!FQACV<)DUHP}j% zv`^ue(=qy|M&7sfUR+;w>ccei*DNQ;csK162ifi|Xe?oE?cqFrert|5C)!MH6GC%o zm<9>ruXV?2#Qf%Udp(F$5_Y`Pm3U9qJ^VOd-3RH_d}Uf#ib}lj*f^K=;E#O^v%iwM zu=L}k^ZJlKSVZ?8ANuBYT;p)qCe$Da;gt;^hHri%&`Ms^DcXV5(weG2kUv88*g@(4DFr z)gpWvPl zr)qr_La$;S`?d*Uurr_`?gmLy!0*^GM~ox-W>4pVObV0aWlNNi*5G)_=`t}B%jpp(22REYpyGkPb~wzb!-hW~$=di0{$rdcPdp!!EK%Mx%~TnDUop0&4U$C81Yy|42^|Lt}WZmCjLpOcUp-3ZB{4gKM(( z{|4B9X6OcJMwvrkD>t&xjKHA;W%KpDa_p*@ZbHg`LWNpKITx@>iI~HU)3;g6E_Jqz z668b;AHFZ|o*ez%+H}Jj@vL@sefEf+UWIopUSuFTOV>=g#mww(Vd2nL&~}7im2rM? zc|WWl@uFgQR2CwUdaP$P65DNs5X~$~bypctf-xV0j2c@K1-j*D7qHZz&2d4(e@z_x4i{5w8H z3-TOhhaH@&xpN-}m)HfMf{jao#)W;jiGeaacT)e1nNA(ClLt`{Tc`&44O?H$+OF3N zsZMRI91Yw)>@z0f?(yxRaXJf&slF7Z=)3mhP4pugg;O&Wrs$cs+O?fc5H-KsH3pq( zE-`mquywMT^IR1d#rE@#QA|y9OR?;>=csjD&CC{-Vv_-C=8Kbn@7x`B09W)1!931n z2wW42e{jy*VgZHs5>nVUk`)M@lGbe2Oqgi+lH=7j7z!qg3MSc*5&gu^rz9)J`s5R# zUv4cF?xpO0ozS;7E-dQ`ZH`jWl#tQyl($9;mpay1Glx=bvfAl_>mEL)iI`j0&G2`; zFc0Jkr}qa|&DiOx`ecZ~RigpI6v``6xDFBg-GuDLZ0Pwuxu!E1Sb^C>{b&V&{%`Ap z6(|k>hFW6uP`Gao`~^@pAUIzDyZ|6Vi3)JC+6&uriTh#1WUh+XG$39vW@iky?sZ+D zMfM@fwQopR)d4yhQrSngkZkj5Lc3lqyI$+eq>b3OXf~~v_cq!hkd>O2M(=`NYsHeG zS*6Hd)+A2%5j*gIIeg2w`M(ZNr#{0jwD0@O$z1IcZ3$$oL{cJTtp#s0uiSLu z*`Ulm+Ds}@z=+9%4f4Y1D`W3kTwD8>XFKfj;Zt+3TrwGqvaqm9()ZDZ=Ysr5jK3Rb z=us7eZ2*G6I}I1KvyI?BsAFsfq`8kI$05tI&KQA`cEpH~rL)QiwVRV)g6a9Rt_4DN(R%*LQ z%_&nB$7uh~_$}zob^6w#;f)XD!i=A1)~^Of;w?u}gD?$P+1O7TujUDut7LB&Ed3CE zXJB`I!<#?{!G-R2S_B@X3)cMdkVtq0Oy`fo&sb~U_-DX98N;kAxn*5NR4Zkc*U8xc z3R9>WXrpJJvF6OCqlUpaN{|UOt$W!ek}Oss5C4ar_V!7a5gTmRWiBD#Cyun&F#E+70>V@kdVh-Cq;lmk-D zhdy1&?zRV^kf`~@)_adzMe*gO(kbIjRQC@pAXLFd#Nae#oAC;-of-GZX$(NN_%lxG zg%D3QV9&lDd7KNNg3(a1{7;OqH~w3UV3Mi5CV0Kl#@8FM=mkYZn+`GTCu9BWlY$;| z9+e?9n3ySX}JtI^0Fzb9euFAa(HcvRn z`cLgw>YTGgS!;5LuC+xc4?X>^T3e)jT&&$e4J)~at*>Z}eZuzMZ8e{~AHtR&oO!w| zjnHD(iClk?iEKLaxfNY57PF|?p1(4HnUOyjK1=*)DX3WjTM}c+RyZ zUB0k^eI#4*D+=S;A6<53Ze_B&N=d6xM&R1EARMeOxjM=2NODk0cZ5zI)MfFbW&fGh zBe>TgIT~EgRSRO9;3!`kLuN9!0y|&C)rl{_?NG|b?#F9g;|yJBag^6jd~rP?#Msn% zT>z&g5{uZbaV0!fzssuPI{%bFQX?a z0L#M_6*$l16cxD0%OpR)-R_EIxwTl8(eiaZkm2=p-ji|jbv~5A^2G}QAegx|h_v3b z86t;v;5`lCCxK7oLo|E0o!O_rYN07o0P@8?wMlJ{Dm;zm@|Z#M*)mBZzB+M=9%hZO zsbh=iFQuy*{IOx3%S+=iFLt`c#HeYIWT~0OE_})Q&-LjAEcQtuJOE&e4gg^OS)V#N zd)R=TewQ}BX)ea3v0>lh_n;&xsj3x8bLoTPGZv+)ktS}>*UVGd{;i6~>mLUhzH!i7F?N|1 z1PLMEPFlN46Q%Kmg=p<`m4u{J7y(lVt!f}aJTunStF+#;@ebja{Q$wBq}k&DFIN6BEzQBP@qE#ETAI8+1iH)`L2s7>uwCg7lqZHR7!( ziO)bAdoJ|&bPFx#l>r79QZC~vmlCP&`ljs-O&?cWBw3?=h2lw0ySHCz07(+5fE_@eeX>y`Ia5tPu71Zpbfx8ScF^``Gi zm5#(TcCdtsPF_rS;!Qpw)yl9SDmSPH>&_K7!&EA622 zxsCNG+iroEm2U7P)}AQEA3)8{LHI!P>ln$PH4q8jNZ+s}-*l@ywLtO(-I9&*W2=PXS6x2yu-=#cH zMTV6R8*Q%Nw_aiXQ_=M@dl4NEN`CgB>IYE+3>?8WPOL1y&lw2{cJS;NVo)5gf#T;L zc=pn{3du0^hVZStk3I1wJ!i1!xef{opu&A-CNmU^E8CUCX%S-6$JV1eugP5osIsOv z%zT#%V&GN~W6t4<8$X}JX4*WZ8DQ?Vo2gfHx{*6Wi{EQmUn-ml9$w; zf9KDsa6hs%hW7PS2DEhlrjYxn;LGVbMO?0olmpXQe7<6&YZ6o>^IKc~Da6FM#(ib` z{&c(uPxz`mgAEcsIV<&q*1qEEQPmda0$0rTP_dZgZc3Fb<| zjG|ASSTq#qBA+s6 z=CEkwaI8%3#1oFKze-c;HC6mf+ii%>1iFpqo!D$%zRC|yAsy1Hu>3cSeU?|Q8fUxLm4txPiDLz41;1w|7Rb@%n0dXflmb8iO z?O1aAg{e6!SA>_K+N>bXsRiv2xPPAvkS|DrFE#TWk2d-|?+1ksy7G>AEvhBSN+!M@x?MIfo=)uqx;O~kp6+#(a=!mC z4v-buGPZgr(a#ei8mhN+pw-AdCeu(xxq7zuo88%Eq#*?!C1YWN)sxZz{`DZ!gZY<1%kMC+HThI7sq$Tq z5PK`Wke|9`y}Z>S^1#ZuJ@lG^LK6E_FvGqJLq#77J#g@NfdAh0c~H$?oBspD|Gx+H z82z{h^9Kt6_`_BFJNjQ8naA+Qjf%hFLl3Y24{eLb1|Aox{$-#p03G!&1OKsX_1Mbe z<;1_N_`{Xp{AK0u#l>Uj1K-0Ha{vGU literal 0 HcmV?d00001 diff --git a/src/gcms_data_analysis/main.py b/src/gcms_data_analysis/main.py index d5ee66f..7d3177f 100644 --- a/src/gcms_data_analysis/main.py +++ b/src/gcms_data_analysis/main.py @@ -13,13 +13,18 @@ import seaborn as sns import ele import pubchempy as pcp +from rdkit import Chem +from rdkit.Chem import DataStructs +from rdkit.Chem import rdmolops +from rdkit.Chem.AllChem import ( + GetMorganFingerprintAsBitVect, +) # pylint: disable=no-name-in-module from gcms_data_analysis.fragmenter import Fragmenter def get_compound_from_pubchempy(comp_name: str) -> pcp.Compound: - if not isinstance(comp_name, str): - return None - if comp_name == " " or comp_name == "": + if not isinstance(comp_name, str) or comp_name.isspace(): + print(f"WARNING get_compound_from_pubchempy got an invalid {comp_name =}") return None cond = True while cond: # to deal with HTML issues on server sides (timeouts) @@ -48,9 +53,19 @@ def _order_columns_in_compounds_properties( ) -> pd.DataFrame | None: if unsorted_df is None: return None + priority_cols: list[str] = [ + "iupac_name", + "underiv_comp_name", + "molecular_formula", + "canonical_smiles", + "molecular_weight", + "xlogp", + ] # Define a custom sort key function def sort_key(col): + if col in priority_cols: + return (-1, priority_cols.index(col)) if col.startswith("el_mf"): return (2, col) elif col.startswith("el_"): @@ -76,10 +91,10 @@ def name_to_properties( comp_name: str, dict_classes_to_codes: dict[str:str], dict_classes_to_mass_fractions: dict[str:float], - df: pd.DataFrame | None = None, + df: pd.DataFrame = pd.DataFrame(), precision_sum_elements: float = 0.05, precision_sum_functional_group: float = 0.05, -) -> pd.DataFrame | None: +) -> pd.DataFrame: """ used to retrieve chemical properties of the compound indicated by the comp_name and to store those properties in the df @@ -106,25 +121,28 @@ def name_to_properties( if GCname did not yield anything CompNotFound=GCname. """ - # classes used to split compounds into functional groups + + if not isinstance(df, pd.DataFrame): + raise TypeError("The argument df must be a pd.DataFrame.") + + if not isinstance(comp_name, str) or comp_name.isspace(): + return _order_columns_in_compounds_properties(df) + + if comp_name in df.index.tolist(): + return _order_columns_in_compounds_properties(df) + comp = get_compound_from_pubchempy(comp_name) if comp is None: - if not isinstance(comp_name, str): - return df - else: - if not comp_name or comp_name.isspace(): - return df - else: - if df is not None: - df.loc[comp_name, "iupac_name"] = "unidentified" - return df - if df is None: - df = pd.DataFrame(dtype=float) + df.loc[comp_name, "iupac_name"] = "unidentified" + return _order_columns_in_compounds_properties(df) + try: - df.loc[comp_name, "iupac_name"] = comp.iupac_name.lower() + valid_iupac_name = comp.iupac_name.lower() except AttributeError: # iupac_name not give - df.loc[comp_name, "iupac_name"] = comp_name.lower() + valid_iupac_name = comp_name.lower() + + df.loc[comp_name, "iupac_name"] = valid_iupac_name df.loc[comp_name, "molecular_formula"] = comp.molecular_formula df.loc[comp_name, "canonical_smiles"] = comp.canonical_smiles df.loc[comp_name, "molecular_weight"] = float(comp.molecular_weight) @@ -157,16 +175,14 @@ def name_to_properties( df.at[comp_name, f"el_{key}"] = int(value) for key, value in el_mf_dict.items(): - df.at[comp_name, f"el_{key}"] = float(value) - cols_el_mf = [col for col in df.columns if col.startswith("el_mf")] + df.at[comp_name, f"el_mf_{key}"] = float(value) + cols_el_mf = [col for col in df.columns if col.startswith("el_mf_")] residual_els = df.loc[comp_name, cols_el_mf].sum() - 1 # check element sum try: assert residual_els <= precision_sum_elements except AssertionError: - raise AssertionError( - f"the total mass fraction of elements in {comp_name =} is > 0.001" - ) + print(f"the total mass fraction of elements in {comp_name =} is > 0.001") # apply fragmentation using the Fragmenter class (thanks simonmb) frg = Fragmenter( dict_classes_to_codes, @@ -206,17 +222,15 @@ def name_to_properties( assert residual_fgs <= precision_sum_functional_group except AssertionError: print(f"{df.loc[comp_name, cols_fg_mf].sum()=}") - raise AssertionError( + print( f"the total mass fraction of functional groups in {comp_name =} is > 0.05" ) if residual_fgs < -precision_sum_functional_group: - df.at[comp_name, f"fg_mf_unclassified"] = abs(residual_fgs) + df.at[comp_name, "fg_mf_unclassified"] = abs(residual_fgs) df.loc[df["iupac_name"] != "unidentified"] = df.loc[ df["iupac_name"] != "unidentified" ].fillna(0) - df = _order_columns_in_compounds_properties(df) - - return df + return _order_columns_in_compounds_properties(df) # %% @@ -340,10 +354,9 @@ class Project: } acceptable_params: list[str] = list(param_to_axis_label.keys()) string_in_deriv_names: list[str] = [ - "deriv.", - "derivative", - "TMS", - "TBDMS", + "deriv", + "tms", + "tbms", "trimethylsilyl", ] string_in_deriv_names = [s.lower() for s in string_in_deriv_names] @@ -798,20 +811,22 @@ def load_compounds_properties(self): """Attempts to load the 'compounds_properties.xlsx' file containing physical and chemical properties of compounds. If not found, it creates a new properties DataFrame and updates the 'compounds_properties_created' attribute.""" - try: + compounds_properties_path = plib.Path( + Project.in_path, "compounds_properties.xlsx" + ) + if compounds_properties_path.exists(): cpdf = pd.read_excel( - plib.Path(Project.in_path, "compounds_properties.xlsx"), + compounds_properties_path, index_col="comp_name", ) - cpdf = self._order_columns_in_compounds_properties(cpdf) - cpdf = cpdf.fillna(0) + # cpdf = _order_columns_in_compounds_properties(cpdf) + # cpdf = cpdf.fillna(0) self.compounds_properties = cpdf self.compounds_properties_created = True print("Info: compounds_properties loaded") - except FileNotFoundError: + else: print("Warning: compounds_properties.xlsx not found, creating it") cpdf = self.create_compounds_properties() - return self.compounds_properties def load_deriv_compounds_properties(self): @@ -819,17 +834,20 @@ def load_deriv_compounds_properties(self): for derivatized compounds. If not found, it creates a new properties DataFrame for derivatized compounds and updates the 'deriv_compounds_properties_created' attribute. """ - try: + compounds_deriv_properties_path = plib.Path( + Project.in_path, "deriv_compounds_properties.xlsx" + ) + if compounds_deriv_properties_path.exists(): dcpdf = pd.read_excel( - plib.Path(Project.in_path, "deriv_compounds_properties.xlsx"), + compounds_deriv_properties_path, index_col="comp_name", ) - dcpdf = self._order_columns_in_compounds_properties(dcpdf) - dcpdf = dcpdf.fillna(0) + # dcpdf = _order_columns_in_compounds_properties(dcpdf) + # cpdf = dcpdf.fillna(0) self.deriv_compounds_properties = dcpdf self.deriv_compounds_properties_created = True print("Info: deriv_compounds_properties loaded") - except FileNotFoundError: + else: print("Warning: deriv_compounds_properties.xlsx not found, creating it") dcpdf = self.create_deriv_compounds_properties() return self.deriv_compounds_properties @@ -844,18 +862,20 @@ def create_compounds_properties(self): self.load_class_code_frac() if not self.list_of_all_compounds_created: self.create_list_of_all_compounds() - cpdf = pd.DataFrame(index=pd.Index(self.list_of_all_compounds)) - cpdf.index.name = "comp_name" + # cpdf = pd.DataFrame(index=pd.Index(self.list_of_all_compounds)) + # + cpdf = pd.DataFrame() print("Info: create_compounds_properties: looping over names") - for name in cpdf.index: + for name in self.list_of_all_compounds: cpdf = name_to_properties( - name, - cpdf, - self.dict_classes_to_codes, - self.dict_classes_to_mass_fractions, + comp_name=name, + dict_classes_to_codes=self.dict_classes_to_codes, + dict_classes_to_mass_fractions=self.dict_classes_to_mass_fractions, + df=cpdf, ) - cpdf = self._order_columns_in_compounds_properties(cpdf) - cpdf = cpdf.fillna(0) + # cpdf = self._order_columns_in_compounds_properties(cpdf) + # cpdf = cpdf.fillna(0) + cpdf.index.name = "comp_name" self.compounds_properties = cpdf self.compounds_properties_created = True # save db in the project folder in the input @@ -874,82 +894,54 @@ def create_deriv_compounds_properties(self): self.load_class_code_frac() if not self.list_of_all_deriv_compounds_created: self.create_list_of_all_deriv_compounds() - - old_unique_deriv_compounds = self.list_of_all_deriv_compounds - # unique_underiv_compounds = [ - # ",".join(name.split(",")[:-1]) for name in unique_deriv_compounds - # ] - unique_deriv_compounds = [] - unique_underiv_compounds = [] - for name in old_unique_deriv_compounds: - underiv_name = ",".join(name.split(",")[:-1]) - deriv_string = name.split(",")[-1] - if underiv_name == "": - underiv_name = name + deriv_to_underiv = {} + for derivname in self.list_of_all_deriv_compounds: + parts = derivname.split(",") + is_der_str_in_part2: bool = any( + [ + der_str in parts[-1].strip() + for der_str in Project.string_in_deriv_names + ] + ) + if len(parts) > 1 and is_der_str_in_part2: + # If the suffix is a known derivatization, use the part before the comma + deriv_to_underiv[derivname] = ",".join(parts[:-1]) else: - if any([der in deriv_string for der in Project.string_in_deriv_names]): - unique_deriv_compounds.append(name) - unique_underiv_compounds.append(underiv_name) - dcpdf = pd.DataFrame(index=pd.Index(unique_underiv_compounds)) - dcpdf.index.name = "comp_name" - dcpdf["deriv_comp_name"] = unique_deriv_compounds + # In all other cases, mark as "unidentified" + deriv_to_underiv[derivname] = "unidentified" print("Info: create_deriv_compounds_properties: looping over names") - for name in dcpdf.index: + underiv_comps_to_search_for = [ + c for c in deriv_to_underiv.values() if c != "unidentified" + ] + dcpdf = pd.DataFrame() + for name in underiv_comps_to_search_for: dcpdf = name_to_properties( - name, - dcpdf, - self.dict_classes_to_codes, - self.dict_classes_to_mass_fractions, + comp_name=name, + dict_classes_to_codes=self.dict_classes_to_codes, + dict_classes_to_mass_fractions=self.dict_classes_to_mass_fractions, + df=dcpdf, ) - # remove duplicates that may come from the "made up" name in calibration - # dcpdf = dcpdf.drop_duplicates(subset='iupac_name') - dcpdf["underiv_comp_name"] = dcpdf.index - dcpdf.set_index("deriv_comp_name", inplace=True) - dcpdf.index.name = "comp_name" - dcpdf = self._order_columns_in_compounds_properties(dcpdf) - dcpdf = dcpdf.fillna(0) + dcpdf.index.name = "underiv_comp_name" + dcpdf.reset_index(inplace=True) + underiv_to_deriv = { + v: k for k, v in deriv_to_underiv.items() if v != "unidentified" + } + # Add a new column for the derivatized compound names + # If a name is not in the underiv_to_deriv (thus 'unidentified'), it will get a value of NaN + + dcpdf["comp_name"] = dcpdf["underiv_comp_name"].apply( + lambda x: underiv_to_deriv.get(x, "unidentified") + ) + dcpdf.set_index("comp_name", inplace=True) # save db in the project folder in the input self.deriv_compounds_properties = dcpdf dcpdf.to_excel(plib.Path(Project.in_path, "deriv_compounds_properties.xlsx")) self.compounds_properties_created = True print( - "Info: create_deriv_compounds_properties:" - + "deriv_compounds_properties created and saved" + "Info: create_deriv_compounds_properties: deriv_compounds_properties created and saved" ) return self.deriv_compounds_properties - def _order_columns_in_compounds_properties(self, comp_df): - ord_cols1, ord_cols2, ord_cols3, ord_cols4, ord_cols5, ord_cols6 = ( - [], - [], - [], - [], - [], - [], - ) - for c in comp_df.columns: - if not c.startswith(("el_", "fg_")): - ord_cols1.append(c) - elif c.startswith("el_mf"): - ord_cols3.append(c) - elif c.startswith("el_"): - ord_cols2.append(c) - elif c.startswith("fg_mf_total"): - ord_cols6.append(c) - elif c.startswith("fg_mf"): - ord_cols5.append(c) - elif c.startswith("fg_"): - ord_cols4.append(c) - comp_df = comp_df[ - ord_cols1 - + sorted(ord_cols2) - + sorted(ord_cols3) - + sorted(ord_cols4) - + sorted(ord_cols5) - + sorted(ord_cols6) - ] - return comp_df - # def add_iupac_to_calibrations(self): # """Adds the IUPAC name to each compound in the calibration data, # istinguishing between underivatized and derivatized calibrations, diff --git a/src/gcms_data_analysis/plotting.py b/src/gcms_data_analysis/plotting.py index 2c3f143..8ecee68 100644 --- a/src/gcms_data_analysis/plotting.py +++ b/src/gcms_data_analysis/plotting.py @@ -471,9 +471,10 @@ def add_legend(self) -> None: def annotate_letters(self) -> None: """_summary_""" - if self.kwargs["annotate_lttrs_xy"] is not None and isinstance( - self.kwargs["annotate_lttrs_xy"], - (list, tuple) and len(self.kwargs["annotate_lttrs_xy"]) >= 2, + if ( + self.kwargs["annotate_lttrs_xy"] is not None + and isinstance(self.kwargs["annotate_lttrs_xy"], (list, tuple)) + and len(self.kwargs["annotate_lttrs_xy"]) >= 2 ): xylttrs: list | tuple = self.kwargs["annotate_lttrs_xy"] x_lttrs = xylttrs[0] # pylint: disable=unsubscriptable-object @@ -492,6 +493,7 @@ def annotate_letters(self) -> None: xycoords="axes fraction", xy=(0, 0), xytext=(x_lttrs, y_lttrs), + size="large", weight="bold", ) @@ -639,7 +641,7 @@ def _broadcast_list_prop(self, prop: list | None, prop_name: str): ) -def plot_pave_std( +def plot_ave_std( proj: Project, filename: str = "plot", files_or_samples: Literal["files", "samples"] = "samples", @@ -906,7 +908,290 @@ def plot_pave_std( hnd_ax.append(hhhh[0]) lab_ax.append(aaaa[0]) if show_total_in_twinx: - hnd_axt, lab_axt = axt[0].get_legend_handles_labels() + hnd_axt, lab_axt = myfig.axt[0].get_legend_handles_labels() + else: + hnd_axt, lab_axt = [], [] + if legend_location == "outside": # legend goes outside of plot area + myfig.axs[0].legend( + hnd_ax + hnd_axt, + lab_ax + lab_axt, + loc="upper left", + ncol=legend_columns, + bbox_to_anchor=(legend_x_anchor, legend_y_anchor), + labelspacing=legend_labelspacing, + ) + else: # legend is inside of plot area + myfig.axs[0].legend( + hnd_ax + hnd_axt, + lab_ax + lab_axt, + loc=legend_location, + ncol=legend_columns, + labelspacing=legend_labelspacing, + ) + # annotate ave+-std at the top of outliers bar (exceeding y_lim) + if annotate_outliers and (y_lim is not None): # and (not df_std.empty): + _annotate_outliers_in_plot(myfig.axs[0], df_ave, df_std, y_lim) + myfig.save_figure(filename, out_path) + return myfig + + +def plot_df_ave_std( + proj: Project, + df_ave: pd.DataFrame, + df_std: pd.DataFrame = pd.DataFrame(), + filename: str = "plot", + show_total_in_twinx: bool = False, + annotate_outliers: bool = True, + min_y_thresh: float | None = None, + only_samples_to_plot: list[str] | None = None, + rename_samples: list[str] | None = None, + reorder_samples: list[str] | None = None, + item_to_color_to_hatch: pd.DataFrame | None = None, + yt_sum_label: str = "total\n(right axis)", + y_lim: tuple[float] | None = None, + y_lab: str | None = None, + yt_lab: str | None = None, + color_palette: str = "deep", + x_label_rotation: int = 0, + legend_location: Literal["best", "outside"] = "best", + legend_columns: int = 1, + legend_x_anchor: float = 1, + legend_y_anchor: float = 1.02, + legend_labelspacing: float = 0.5, + **kwargs, +) -> MyFigure: + """ + Generates a bar plot displaying average values with optional standard deviation + bars for a specified parameter from either files or samples. This function allows + for detailed customization of the plot, including aggregation by functional groups, + filtering based on minimum thresholds, renaming and reordering samples, and applying + specific color schemes and hatching patterns to items. + Additionally, it supports adjusting plot aesthetics such as size, figure height multiplier, + x-label rotation, and outlier annotation. The plot can include a secondary y-axis + to display the sum of values, with customizable limits, labels, ticks, and sum label. + The legend can be placed inside or outside the plot area, with adjustable location, + columns, anchor points, and label spacing. An optional note can be added to the plot + for additional context. + + Parameters: + + filename (str): Name for the output plot file. Default is 'plot'. + + files_or_samples (str): Specifies whether to plot data from 'files' + or 'samples'. Default is 'samples'. + + param (str): The parameter to plot, such as 'conc_vial_mg_L'. + Default is 'conc_vial_mg_L'. + + aggr (bool): Boolean indicating whether to aggregate data by functional groups. + Default is False, meaning no aggregation. + + min_y_thresh (float, optional): Minimum y-value threshold for including data in the plot. + Default is None, including all data. + + only_samples_to_plot (list, optional): List of samples to include in the plot. + Default is None, including all samples. + + rename_samples (dict, optional): Dictionary to rename samples in the plot. + Default is None, using original names. + + reorder_samples (list, optional): List specifying the order of samples in the plot. + Default is None, using original order. + + item_to_color_to_hatch (DataFrame, optional): DataFrame mapping items to specific colors and hatching patterns. + Default is None, using default colors and no hatching. + + paper_col (float): Background color of the plot area. Default is .8, a light grey. + + fig_hgt_mlt (float): Multiplier for the figure height to adjust plot size. Default is 1.5. + + x_label_rotation (int): Rotation angle for x-axis labels. Default is 0, meaning no rotation. + + annotate_outliers (bool): Boolean indicating whether to annotate outliers exceeding y_lim. + Default is True. + + color_palette (str): Color palette for the plot. Default is 'deep'. + + y_lab (str, optional): Label for the y-axis. Default is None, using parameter name as label. + + y_lim (tuple[float, float], optional): Limits for the y-axis. Default is None, automatically determined. + + y_ticks (list[float], optional): Custom tick marks for the y-axis. Default is None, automatically determined. + + yt_sum (bool): Boolean indicating whether to display a sum on a secondary y-axis. Default is False. + + yt_lim (tuple[float, float], optional): Limits for the secondary y-axis. Default is None, automatically determined. + + yt_lab (str, optional): Label for the secondary y-axis. Default is None, using parameter name as label. + + yt_ticks (list[float], optional): Custom tick marks for the secondary y-axis. Default is None, automatically determined. + + yt_sum_label (str): Label for the sum on the secondary y-axis. Default is 'total (right axis)'. + + legend_location (str): Location of the legend within or outside the plot area. Default is 'best'. + + legend_columns (int): Number of columns in the legend. Default is 1. + + legend_x_anchor (float): X-anchor for the legend when placed outside the plot area. Default is 1. + + legend_y_anchor (float): Y-anchor for the legend when placed outside the plot area. Default is 1.02. + + legend_labelspacing (float): Spacing between labels in the legend. Default is 0.5. + + annotate_lttrs (bool): Boolean indicating whether to annotate letters for statistical significance. Default is False. + + note_plt (str, optional): Optional note to add to the plot for additional context. Default is None. + + """ + + # create folder where Plots are stored + out_path = plib.Path(Project.out_path, "df_plots") + out_path.mkdir(parents=True, exist_ok=True) + if only_samples_to_plot is not None: + df_ave = df_ave.loc[only_samples_to_plot, :].copy() + if not df_std.empty: + df_std = df_std.loc[only_samples_to_plot, :].copy() + + if rename_samples is not None: + df_ave.index = rename_samples + if not df_std.empty: + df_std.index = rename_samples + + if reorder_samples is not None: + filtered_reorder_samples = [ + idx for idx in reorder_samples if idx in df_ave.index + ] + df_ave = df_ave.reindex(filtered_reorder_samples) + if not df_std.empty: + df_std = df_std.reindex(filtered_reorder_samples) + if reorder_samples is not None: + filtered_reorder_samples = [ + idx for idx in reorder_samples if idx in df_ave.index + ] + df_ave = df_ave.reindex(filtered_reorder_samples) + if not df_std.empty: + df_std = df_std.reindex(filtered_reorder_samples) + + if min_y_thresh is not None: + df_ave = df_ave.loc[:, (df_ave > min_y_thresh).any(axis=0)].copy() + if not df_std.empty: + df_std = df_std.loc[:, df_ave.columns].copy() + + if item_to_color_to_hatch is not None: # specific color and hatches to each fg + colors = [item_to_color_to_hatch.loc[item, "clr"] for item in df_ave.columns] + hatches = [item_to_color_to_hatch.loc[item, "htch"] for item in df_ave.columns] + else: # no specific colors and hatches specified + colors = sns.color_palette(color_palette, df_ave.shape[1]) + hatches = htchs + + if show_total_in_twinx: + plot_twinx: bool = True + else: + plot_twinx: bool = False + + if show_total_in_twinx: + legend_x_anchor += 0.14 + yt_lab = y_lab + + myfig = MyFigure( + rows=1, + cols=1, + twinx=plot_twinx, + text_font=Project.plot_font, + y_lab=y_lab, + yt_lab=yt_lab, + y_lim=y_lim, + legend=False, + grid=Project.plot_grid, + **kwargs, + ) + if df_std.isna().all().all() or df_std.empty: # means that no std is provided + df_ave.plot( + ax=myfig.axs[0], + kind="bar", + rot=x_label_rotation, + width=0.9, + edgecolor="k", + legend=False, + capsize=3, + color=colors, + ) + bars = myfig.axs[0].patches # needed to add patches to the bars + n_different_hatches = int(len(bars) / df_ave.shape[0]) + else: # no legend is represented but non-significant values are shaded + mask = (df_ave.abs() > df_std.abs()) | df_std.isna() + + df_ave[mask].plot( + ax=myfig.axs[0], + kind="bar", + rot=x_label_rotation, + width=0.9, + edgecolor="k", + legend=False, + yerr=df_std[mask], + capsize=3, + color=colors, + label="_nolegend", + ) + df_ave[~mask].plot( + ax=myfig.axs[0], + kind="bar", + rot=x_label_rotation, + width=0.9, + legend=False, + edgecolor="grey", + color=colors, + alpha=0.5, + label="_nolegend", + ) + bars = myfig.axs[0].patches # needed to add patches to the bars + n_different_hatches = int(len(bars) / df_ave.shape[0] / 2) + if show_total_in_twinx: + myfig.axts[0].scatter( + df_ave.index, + df_ave.sum(axis=1).values, + color="k", + linestyle="None", + edgecolor="k", + facecolor="grey", + s=100, + label=yt_sum_label, + alpha=0.5, + ) + if not df_std.empty: + myfig.axts[0].errorbar( + df_ave.index, + df_ave.sum(axis=1).values, + df_std.sum(axis=1).values, + capsize=3, + linestyle="None", + color="grey", + ecolor="k", + ) + bar_hatches = [] + # get a list with the hatches + for h in hatches[:n_different_hatches] + hatches[:n_different_hatches]: + for n in range(df_ave.shape[0]): # htcs repeated for samples + bar_hatches.append(h) # append based on samples number + for bar, hatch in zip(bars, bar_hatches): # assign hatches to each bar + bar.set_hatch(hatch) + myfig.axs[0].set(xlabel=None) + if x_label_rotation != 0: + myfig.axs[0].set_xticklabels( + df_ave.index, rotation=x_label_rotation, ha="right", rotation_mode="anchor" + ) + if legend_location is not None: + hnd_ax, lab_ax = myfig.axs[0].get_legend_handles_labels() + if not df_std.empty: + hnd_ax = hnd_ax[: len(hnd_ax) // 2] + lab_ax = lab_ax[: len(lab_ax) // 2] + if legend_labelspacing > 0.5: # large legend spacing for molecules + myfig.axs[0].plot(np.nan, np.nan, "-", color="None", label=" ") + hhhh, aaaa = myfig.axs[0].get_legend_handles_labels() + hnd_ax.append(hhhh[0]) + lab_ax.append(aaaa[0]) + if show_total_in_twinx: + hnd_axt, lab_axt = myfig.axt[0].get_legend_handles_labels() else: hnd_axt, lab_axt = [], [] if legend_location == "outside": # legend goes outside of plot area diff --git a/tests/data_name_to_properties/checked_compounds_properties.xlsx b/tests/data_name_to_properties/checked_compounds_properties.xlsx index f7d745ee69ef5595d3b006a4b755fd13c77c983d..bd814d57e02748dd9c00c5940d799956a036835a 100644 GIT binary patch literal 7239 zcmZ`;1yEeuvK`!AgA?4{Ay{w^Hb9Wz?hfZZwt17_2;sO8wL;#YC4^Ubcf*SaASM_vZK3%2`CaTU3jxKEP z935HT+u16`D4}$+W4?Zr-(9xXJfYyAm?~K7_m<`3jWd;W55DtJhr})ludqio;SB)0 zsXTZ>xLne}gf?Q{)=SydyoR72GCi z;aO+cEv*ku%W_S%=Ai$XYI7bFJljHE=Wz25u08PZZo5?>nutRwAO2K70_dn-|1f;_p4) z_*VBOKXne?3%&eP2TzZb12F;3Z7n~iHxlNltkySIiY3gWMI#=JQAYsyv=9SUDuo4t zZBwArMXz{gi%U=h(b*N1;6g%$<}vMe3KAav-pdI`{Ain&kbYnyqOLz%q%YvqB`we* zBGRTfNLL$KMw+0SEFK=fT`wL@aO1M7aZ(P{H}}WuV-7(|7|oek&uB@>JUkLFdC7E? zP^K6$&C0fQ|K>;O_4sAMBW+Fj=j%(+DD8kRYqRa2RsPLP`r9ljOn3kwiw*$5d1B_h zEt{*QnVs36mE$)zhq`($3;ei#$KbK|7FTvCy}SW+H8?I-?u#|)L5IrZ!C1g(TTj#Y zApdq5Ud+K1w8I3Yrc7a2=G|W-QZm~+{GR7Wtj0SbVM%B2jpo@FXPWETR@SsRsje0EMG1PTLq(b9Q6fbGNYa%4~F3kd!mPo}Cy-;XmT= zX;WD@?7K#1cZaowsFuyQ@d;T_V83q?T$9{Y!6Y6cCz_C8HgLq<=c@ZKH)>$PmD`SJ zZR7Mkf8F3jq$3IJnp$&CXZD3{d!aZ|O@Y;z>D<*qMSSs>XmypzYv0xVOv^0fXg!rl z8;|A5nDE~6vG-_grz)Xc72&<5W3KyFv~>3sHif(RmJ-q%`(D_1wOeA76ab*3`^?Y; zOo{=+ty>b&JZbZ?_}dfUkw7@sLvIi4jMc=86mDtf>^L}d0AKgGqN)#Aye?v`GhyOw z+{`FLttt^>Mqj|COdw^?68QcywYk5L%PZSgaKt9b->MYq(_wSwXyTpA&SvhX#91YY z5yuv0q|;widhbH%?1gZg1xYL(T_EfW6MEi*+RzllNnNpzQ!8=)XW2dOnk%R2W# z(I5?KOq%gUVCP5>+wZDh$|uimfg9-?g!MLe!TGMQm`87f4YzBCQMe@QofY=#lGaNVVXs9uPQgL zooyL4vvj>t!|W+D?wY9)OCG`pup0E)8%-k)yYY`#S&P)*DUUwrl=tPLVkzUJV9S~$5fsaR zJb$6yw#_8ht;ydm#Z`x$O4zdT#) zd;-o*)uV(scpRwVr2f|@(1E7KB%rigl-4uUMh3i*@>c@>Qp_Jhlno|muyG?QQQ;^F zoHve-6Rka>W^b7T?1D5nO{WP4B*P&hT+Whn8Z_q>3$!S~i|ZQ0%|vaqMhabC_Sr0S zkBbIVA~gge(&q)~Yh=}-E}32pol<3wz!|a|&Q$@&MZpyqgC97|l7)T{--rkhR;h7Y z6IkVjCQGQs`!s+e^KNFb@M?Jt)n}?yx5$(UOH={Dhdcq^Buejs+b|Z_xV*Tkymx^E z)e{6fN6k0qUk#PYAmW;iEE9-Eb`YNy+Y()zLbJR=<0grRGvG+C<1N@!MiyRrI7X}N z?2h?<7}*Z>?+rx1jUmPg831q<1ON#CY#<&E&NeQVW@fG~Y=8aywTqNz`l_!B6ZE!6 zY#0hsEws>Gmv9y3_3)|UPGyrKR57{`44}E`x8ElQ4)Rh8xLJ^UXs)I1Q;Wz|(AF*c zkD5P@7$#D${uEHAJO3~|E1dgKUC&3}M!Ny=;mf4OQOc>Fe;>DBU!B@|JPj8>dEBYAO>4_eMB&54UU97iSr4ujYi7 za?uSE>=RZMYsK1^^?E#sq7(all9YJO`_CcfVGl@pmY;Q!cB2!nJ$Emqo%myQt{hH_ zFI_8N;Filzn5Gek!{5%vQp-=&gcU8Gty;_tuTJ9+ob4NakHvp6hUP;ult{1etKie! zrIEV5y)ez_hkEYzft8KT^OCt^T3+~@HAnxHR)?X8E|Dd2d^Y@AHgrP*a-|4UxAXL3 zDt$vg{-NH<(0-$=gX66xj-F8OE)^@%l$e#`5BVy@kOik=uFDj3a||i}eS#Rw=!g^y z+IdrMl0uC0Q+lNzF^I!p-gV!2Wtku<{?1Wu?{q=aZjp2MsZ`q#OAu!< zIC+!}L(MKOrWlNVc&S}eZF>;6qM@LleyBOHurhPSRQtg-t<}83%Ymu33v$5m>*hQ5 zs%Au@XHG-!k#Eu2D)lHW>ns}6l7UU+_x)il;JX{9)@J`M>i z`>9_3hM{=M?Nc!Kq+0JEI6>giF64c~FZcHbPUygaRoX6B_T1PUnf}BVrYc|}Ed_)Y zT|pyu`ikhpyGtrkKh#O%)0A09mh>sr;pJYzhsW-4AE%dO3aLuGErE!nuhmW`X>3@{ zdaTdC<*zq~9UR}&v!M+L&uP+%@Rszib8gVuKs6S5qQF@&0S9~|0iwGs1x zsU+*T1vJCWk#-yf_@PO1sV5;qpnEuI6L3PxC*1Nf*aMAV0+kTS#4$6d-r157tG~YX z6XTb88P_)wWU)ZlCau*bEoSJgC7v=_a!({EXgfRs_%4@T zu9GotG&lju+XghV1CxuOe)~8#aqu;-2rAh0a!-L6Eb_)b%gI0MwknRxYz0< zD$}#^21@As>fpuvWYUYo-;?z&icm8RZem4^(NPF=jCY_bzi)sDbuR*caB zppRkXn}=c#wfv+8DEtu2vP`QaS-sXV(e#c4_$zm=KnWg-xtb=0TrEtri+QQsQB1Wu zU-A4rf#38E5p|XS94LR^Q}kutBn4W5?crPTmil|8L8cOJ7K3-XW^MMmOSv0qzbq6{ z#PNX87Ggs#AbrBJ&KeLuDVK7xReBMVB1%aeN~H}&G9Rb{q4L!jTs`Or-RL)Y zh%>|7TIu+>U!Wkx)B1SDaKV8;6bU8AxNJ}-v6&{ZPjat4cF;?=s>e@whRxIcduSdw z#Wqp$TBBYwI4z`We?Vqi6r^{L#)_CY-17fa6@JFA*||dgMCLqRN)?j7Xm_V*sjjrf z;2gl6@veFg)pGDDxlrpA_)%(_WhogZnq^C&?5f$R-mt$+dg)dDIY_g?1RoufN|ur_ zlC%i1a{57`wB|Kba+ak{Qo@@>V=#zn5Bh*;Cz+VyT!g}$d?#5?%mbm)8x%|lyRYSs z9I|z6@YTb4JlEB74#a4`K-!08(lmKlX8g9InH(1-1NaUVl0r;$M5ld3SIUs z!lpbTv)kdTqwe*xtWYnvs;-serD2q8Pgq@cy(4!^hQspsb-AV9qk})|QABO_Ai&J? z#JMf9JVr3#!Im6`nU#yl{E2@;w66z0HEusGyyzGSKxRX&E4^o1gHf-X)1(JlhB_cKdS_`4gA&Am1bwIz0BlYonHio)O6-hb-+)HwGjIyU(s0^{ z6=boMsLBh+li*^CqMXwuXCQH6uEfkj-gAFl>--u{aD9WB7^YNgOAl_w=-npH-ZXjX zXjY^scyrdk-KHvi*?om*PGf7FIm{pnSfR}dnynzBe>4qC_7AL0YTr;E7+Mg^g}%&& z;uuZlIg6iK4MHGM^Np$Z8Dos%FG!iUuoeuCbv;VPBIe!3&Fq-vp>r1kQ_SHlJWoXbCCg!#ohL z#DM4_uDlo%Bv_FgV6RD_g&DZe;-h#iUpBRH`Ia3BI{`17Ng)EgTeW;HPcGr)PqX3m zV%U~07Gl)KU`FJ)>q-4nYir%}W2jt;sA*#8uHzoJR_>J@rf-8YYv%I1v$3qT7?a74d5_nPkffxlRw7ZzJ0YLEU|qBv+LmDy zNMl&SQEJ-W##M)taDcviRh1WXN|M_sWGd27uo+9b%_>0uIh64*YhToCzH-08xC>#@ z1x?nb%oshzJr}KYtL#Nwp=n)9GZSY|kTRc*$QSRe5K{~3!!H-CN?(VYc)Z@QOy+@F z6;C(Q`x%!x8LXK)&5MJC;qNDG+@*;!`NKlAcRNc%QY+tKr4m}#6b13l*i^64`pm{V zhX4Eq5DH3~Jq<9fK1z34+Vcj&Cd{_=e!O!%ploY3`_^-2;srUua8xH=boAb$FX6T6 z*9%2uy2>WV+C5bx-j0&^jAiS3{BuL%RafQ%xuDGg!l7VT_W`9Kv$ z2^mhXrb>Pr`o>v*`k_qeL`-uRL%8JZ<+vB_1QMxswk1)8VLjkLNUKW)k_S4cbcX4?^1hsh|j0xc`u;6T4cfmIT`#jA(eb2Y{3lgEi5g(&Y-a1i#`=5B zPEfFi=0Ft#W1AT&e(r|mD4VO43`1!M-!4e+jyLJPxQd?Zps>VJc+AOR0b_9Ixs$jo z7TNZ)cfXz2;wcAwwP7&Iary{k)w6yPU&h{&7yz$I@j#Q?KZ9CwGAyzu)9i zW9EU3dkOWkqAP<+UQ&173B;xFIJ`89Je83RE<3pU#B)+Sbbd(@_fba5k@+INP%+Xy z3H&N&xvlWL$i%qj{p!x+`B(|A$W41Tdr|nrtP~QhL*>o0rtOs*)ED-rhAqQWpcsEr z_xGEnv4exn@8pe+8&`g6*-H5r4Iqr!2g)h#8>E5c5HTgaM|C}FQ@WEk>?^@^atw;= z!F1ndXZl+WvcfQgK1yD_i#~T@)nu9&K3~#i0Um-&UKX-KrTA}FgXhd; zE*d01oaubI;LYQVL}$s#XVuK-T%FjBCmh{;ovGAgs`#0<%Ls)TcpuL<{-b&2rZ8-! zwP&a(0}pDB!ds3w9Q>i5DrhTzKlo@I@kAa)nEjHp$+t6z@J1@eHKNK~S&v@xBP~XL z#Xck!c1%hz$As{t=|MUsi-^coRTLpE_CwqkWV&dY_~Kc<#KpBXz8|cXY)9d)_a?S2 zduJ&BRuR|!MpO9{slT2GCi+9}A0hoG#ea#aHeLk%13PLk7<&sqSTdClUCdgBM-Ucx zgB9?)sd*|0xE$PZ@cMq$vaAmfq&uPTMP9 zh;9OXToL{L+uN_og`h!fEH-$n*y`Oxf3G6ZQ2nJt?M9wanTB%8wTu1rXF+C$NCXi- z1vm@yr@5^s>0s|_X76gK;pu4RqW@b(<%y%eRn%)(wC85c#qTJ#&l<&a(18-vtzYmX z*Vw_;A$IYrd|nk=vO+=c)wRnVO=-Oe?2w>^h?b`$E_txnG$;`mf|x_)CaaA7Seza9 zrq&r@-LKIHlkq*Y3fv{pjaO}e?h%XGD5*mMQriFyaH#qdN0o(UW7IeiP58$8+9ihk zsT6&qb04Jl^;<`Yobc)}$_u9N8b7^2Nl_uTOgOEw->g1XL+@Hb4Gt1nG@-iL;qK&J z#VE)}ArK+fzA2e(sS=cdtj}iIIPQ>^%c6O6*lZ3;Qkt7JcgTKtbaI z{{NxRlWP9@fSwrszayaM=;tGtKUe?&1XcN;=)VUt&*9JK6#u{no*w^SrWVf)JnvKe zZJ;gy1@SKf|FvuN+{*Lg#NSpxP+zeBvhvU2;yLtr-TnvagZHm0{v7zc4*ml)!~b8; z|E`M9!O!#jA8;qZU*P{I>z~_so=X4N`SjHF`!9*5ssQ&iRssN!p1!Z1D){+}-*^85 DDeU-7 literal 10552 zcmeHt1y@|z(smQv8+Uhi3GVLh?k*i9!3h#1kPZ?wXmEE67F-((?jexi?tDFS=e{$O zx!*6i_w?$sSFb)*tIpo_JXO1PX(+?O;Q|lODeE{Oyek-2%iT?XKTXH%~hugr#|QaP~$9==$}R@UNx9@bia z3tFjB@$ z<s#5+CB@AtMU@pmc*Qw8uceSvvdZS1ib8Id>ebNaofs| z=U~oeoGlSb6*e^w&~T2e$FJaan+W_Oj+d`8a@<3?a!0p;L;7Wn#HKYnA<;&SklK}X zy>K8~M*X<}b68N(;83$*m->Mbr(ISr&?xO~;W>*q{Qp&!P3T;$6K_QCEm<{pyv?ME?45UCE;&52`;EtgC z;iVOk_#cB*mz%uhvDmo6G>yI$k*T+yFA$mOy;9^n%QwDY`OIF-UZ%+_`7`=-BrumX z7w0MutzP# zX%AgI`nk&fR2tr(x0U@uWx${d)x{05uC~K-$7-u=FCl7wa~qegGntG|oQD85wah^+ zYF_MX?g{w;x}1w(qk7(xku3i&0?55(gU3T*u@uFt&oF85gyJSSF($FknVy zPO@QIz>+`wuZYvFnV{%FHS0&-7@xSC{bRM=%l9 zC3k)PF58@Ek_Ri5XC9tA=bT-q@=dj`Ks3aGz8L=Y({{rir1YN818??9^1*+??{fuqGFr{Ir>Vj{`kw7{MHB`!TyegY1G_> zfKfvl_8`{Gd|8;WUIAld?qV$gPHRaKIsx!vtBZnKL~Oyo#1hc*5ox5pAZ0A6-lBo# z;%c$9H#-?cWk0~alem&-2EzcU5teNX=cRqhOp025pCm1;ZZZp78n8hx!uWtgMUk%z zFBLI(HYXIUvtCLL-~HqII0g^>kfm?^+iHr^rm9(*t|XhOrk}NYbGM6pnGq+eWM{H2 zivc{4%^txQ^U?Y-?P||I_aW-eaWy7E@;IC^>aMd7(G@f$9N^dhdlkEnYIBn=CNVC( zjXOYN38P47BWoAV`BvUcFuucO^Zkto;SCJc24nkhq$jRi9cu<0?0ocFunD9;MV_Uh znu$2XeO$_XuQCD1;plfFvjLgNE87d;^hG>#*~Iu{v;8(t5EOM{lY%9ltyZ`1+qQ5y z*}mc8oUQT|BWr~%HDd8uAP16#{9VvKnDIE*uhH~Ag`iPIlQRyXsAVF~U5Qe)$5I&i zViv3n$@gtH*{y3lS$=L}Iz1qybxl-u_!_~-^r(J89rEIp6V5emeP9un7ig+7mjHgx zYrZo@`yEHaw9i4f8Nq%h{+2k@-Tw)QP_d-cGw469P*4y9kYJ#I_*dZgD=hvUL|~vf z9TZmo-B-Dms`3|3U74<6P6h=Y1x_a`ROP(9NUJrz&T`5FmxtEs`9G6$@C zr2o;_fX@{l#wIMzMK?=v3^v?L9G4?uc=xfhF<2z4g9`F6d3YS`{U1Ne4p1@Dy>VK^ zz=N#WxPm*^jC4d%`Jbp|&ReD;d7Z{NUvN8lPm&EoNpz%sJLLAY_I^cJeNz)9qacPSt>m4y9Qn9f4a<)fwg`ObSq#^3jh#8 zpZJ4Ay&Y_ALEap{4_v>IY36&?ghftZeP4w0v zKWg!h;3GjQjW^N@@z}366YN({;!+djR}%HB)zTVsL}Hji_0qK(mckydKWC3Jd79N1 zI;fz+i-zVWlqFE4AX@uYA{@gZSFku71g7%r*q|FV4}vB)Rcy=g$%7A4TE@c|?^IhY z6-g&P;FlJ{QZ-$1s^~HyPskne)dz(bIJ}mBsMO0MaWq;DpYA!lT#+GOp|Z*+{-!RP zgw1xA>grrR&r5jqu1E@;-zU)RF4Vd=;4b1wSK;~T6brnNbHjfRZw#f-f~D)Dp5r7# zKV7O&eVtP=MUl2@FpJBr+Wh`A)B)SG4rbI_VKH%>eSe2dXP)tg+%nq|&R!~Jt6b(? zoQaQOCm(8OYpZfTqF@|SWcbzGAfPK??nY>1qkybO*;xa+bAwpg7TOL`&tvK6bgb5{ zt9FiwW*RS%6!UQ-qDH@e+at>S0+!ZDvdy2&RS#!&B%TRpgD{)Wl~a0>CKXS8g;%X} z7=9lIMi=bV>xQB8I%M&jL%5+MymBlT~zQ@_!Ato|@YvD($GbE$>RTba3U`AE+f z`!1|sAx+khTeawkp!atA2K6_8_}wWA3?0?C<$w%ZFZ_l|O#Oriy2OG)GLRN=PLTf4sjNNpQD6-5(h- zPgpbGt*Tqh=|!9tqm=Ur?M68Jx^sD8h>gAd+)c2<^%44al#iw zRQAg+h`8*R!=r0*-oix4zIevSTCOc&>6rzCg1S$tVPHcL(j(Odhl{_2ms7o734P=$ zS6P#_l|;f4vhoZi_7FqE0CW}7lV_}VYK@}J+D@mDYjwgFiZRR8^}SxRzAZw?wy8Mt z3|+D&VTMMj&6x4h|2FNNKgKEv-<^~6i}~C<7TPsZuq>`i`A_1BZ@$yj9dE{2*uuZ+ z5f3FeZ`tyNN=2U%h;ioAD3Gf*ug5xeUkxB9s&1&Nx@LwhnyO#{=3loKF_H*K`k{R~Dk8}a9LiD$& z!=#~hcK(0^7Xh;*K^PEOQ4{huy6sBJRyR8mMr>V;JWQc<%I27yD}+4b=tIg@_xyv3 z23o5{l6YFnZEa@dUDK(su~{&Ricu*=^k61+-f-HRXtot{%^>!?(LvcHqL%2=+nWRI z7{;_#Y9&De&G;Z@>RnN%c5!^jyNlfDb>P-(9~rnI?dWkzp{U(F^0#l0y!3&l0(B26 zGLx@hA#5_fz?CM*qm0aCTLC<(?3WEzoCR$eo##AQC)SjjZ>g<}LE=2z6Owi|Z!8_O z741=m>V1=0Yb_Gm)C9k3#N#Isw@>%$>ZT=#)!v>$=K|ZvR@y!pJSSsYVurc`t|W?z zfu|}RhAc6@p1psKI6x!~(n`p#x?Bum%PJ)E;9!fBV0~kx4dNe; zyLUNXb+}#_B-^3Q|DL?0Te};kcqFu8qQGb5U=Ls6iXHk+XX%7y)wQ=y$geCgF1@sK zmv01|f7^jlW}8H>W1CHYLYjijg{ws<#Gi;F%^8Wi3|SWqIN_3EK(@V`t;bNTOeG5F zY0m~s<8QN5wTA5y=V7{Di$>g=+AJ8&-&(O?cIpws^pWX%m{X`DB5(MT1EU1RH3~j{Z-+uwOPhAKrhp?^2{`V z|2KuCeCYiHkzpjUja(xvhBS80*NF8(Ywi*?Oqz{6PBnzvqAZV@*4XwMuj$i*Iypx$ zvS%@=CH%PSm^A_1*sdnnqgYyDNze&{-iS5nBocwEK#8pcb4r(%)D8`zDAfX>@V%t97>a|?&rg^N7o99? zG6V&O1#{cg#Y4CMkCJSq3|9>$4_njRfRT$nAnw8uWIP&i9dU^xbeSgLiX;dVQtQ$w zRghZhCSIOj?+kl~LdRw_f3oh8(o`TQtB81HqLk9tYVP^gNJuYak8eDo>GBo| zr11m8C|Pdy+>$SBr62Wg^5|=@iu^87&zXe5$N;>fJpWH}fTF zhC0zHXS4qP9-B~pZCtKXq?55tkcO|}3&wv;bM`*bHQPbAiP|K;l{tS;b3hKZuC^S% z-+#|=_6^6}NCbeN33i2nuhdx=z*a=>V3kug`8}=Xm10OsOr27(d|N1)gj?~3Y9vDQ z93je~t1lTB5fnnqriwf>@>E0HrJ}Z#jEr<#7!Ym=$XwX2w(ci9B3oY`NDfa@VS!J} z@pL%!pUKoF%Jaq=cV`71O}z{$=rMdvG@-!q0Z3%B6!6qh<|j=@ONVr6?*-cwCQ}k% zZ7UmD9rw*P`#st!dW%9$m^@aM%rgp@m86PtJ?8ji+-X(`BDK){gN?*UxC_1^3vZ?N zk##O@EsobJqz+SG7A&hF#idwY)WRw`(xXN&$8WARKZsg!jXoef{ceTLy|i2mu*mgV+BZUW#z81_jog?r4% zy<;(ZiZyiqtL$QyQiqoJ3EizTD!0JWflr6wpw%1V#0Mket}1=vh6-IocL{uR^RBOp z##w5;-6V;90{F)rEMszGeIyz^4*W(E&QX^y;V+161{dBEe8xDwP=fiGPf2VCDKtd$ z|CG{a6m#Safek_6E_aBWp=FRJmBRJ)5CY;lBlv}{Pm#3Y4oA?9p$=(GnlyxAdZQ6> z6SF>kdukuL#Dj=57$}Q-2-k8?1O_IxNf)h1UM*yHwH$y zAHPzF-F*8#@gOmn(;~z1IpB1uj)CLz?fKOa`Kxu~XPCpsVybUsng+2JzccOHp=~Lt zA#4T+-y(k+$H)Y@V(wKck*-%D+liTEH21f$yzVGsMK^(wphMkpl~LnT{J~ zpjN@SIhfI*UJV%T+q3uEE{pX?iLH`lKCe7@>s_2aoda@NQ;eqBA@5D=;TnpTx+SB) z+JqdE7rr(1b!2SbmX0{ooavu&28)pihVrh}OKTsGd@15-UaT2AG@aQQExO=;4tkde z{NVrk2xsTRi4ZwU=B>h|=kPE>=NsKkoKp+x9_~sOJU=1&xO_L6g7bqGHiO>rh&&pKv5XIA++G<8Lda^~RinJh!}FNiNK&}@qLl&1riXUXK?vgOE+uV8 zTAR&x=TaU~{cjb{01)^7yy8W*2UW+*R;y7)oh{!!zn4O;-H>hNH(5Azenf}n27XN^ zf=e{OpRtG&ZLN2ehX_wMt!=ES6GpDz;qlYA{morkw?U5YMGE4`ijipq3isaAk>YD& z;;X{Ce`;QsOky~0U_8WN$DiMq3@b}0R>Hu}luj_^qcLYiovM*}p31qDX3G-*#;G3@ zRLmXYe{X3zf!0W_Q10VzllpbIZX+%(4cD3{SdFN{@tM7CqtCa454sgP%40jFdVG0l zTHlNLwTS|ni-}ps%aKiy!dBtSmqV~jM=yD!(CX98rfik z&XNH=KksN6HJpK6qzBED6WQoXO_X_sU|6QPIV!?Oex|OsmCi#R3R1d0K8}6MnuHRX zN;d{ro{2K@4qg3L6ybL-io?XPl0Aeiq2=n3GG15LvthPz*{NiXYP9i6>lB7&#wH=z zqBNHc`UHFK4-707SeT%*VYejVPG|A)~)~#@?cG5aJN})4&g168>svr;{@3wsY*ca&G~jN}$unegzG|1RZLmh45ON zydzwz+0Ff0T0_Y}868EFG~*h??ZoQOm7(Z?jfjk)8UnKgjIap4+>guOslyR!+`Jx3 zvJzBx&KeP0g{#?8R)YJ@3iOz73-fdKMDJA8`B)IEp3hJC&n(6?3p# zg`OqEBrZ@eoQE!cztb?rx}URHVu8~P9O5TeAW$$SN1PN~kR?$`PjWSgIlSyyVr+iL zF0?{qJ~$R~8z!S72^UvLPNJbA5IV=EhjtBst|0^tc zgZy1=z5i9eTyk6DL<^*-4hEhd4*P7eBD)Z%YQw^}T1p68RK>6{NWI{)DfAZFUz5K9 zH#;<;EhtlK&OoRz6$kX=ad*?w(w&pn*Lwz{OH2q%T(R#DxX$bdXH!bUP$~?mondq`ETp_=;Iwh_`q2{V=LC)v zvPn#TPMHDNA+|%_mnnytqsU_k3LydHT0kuE0K8Y$<1*>M-t3n5m_?o?$rYNFzQgo_ zaCz+Tu?&U5#iL}QJstYQUpaQuP^#fR6nIuf@DGN85?ki-ifyt6(D4FXo4r{|wyoCE zr0pGM4`p22L_a*A^;1zfyJO|r(W*Qh2h$WnTrhr_^G&z&wCN+dO2 zrq@mYzKAI8m(`PFzcyK;y^bX*wvzGg*y{1RP*Z}bb9hUAbEx_=(wb#$18=g_f^G%* z&QDO~*z+L`FM#1YVpY|>;SxS|_rOKR&sd8es}}JcRkY zS>n}bAi3JZPsZbp{VB}$p``Klg>&xHT1F)O>7i5p;XJIX-BHQI_msP0$ZrGct4+AO zXLDwACg)M8yDvE;6D5(o?oEn1|Km{riMMr%4-{esP$1#{0VD@YFIyXJke8#I{qJ~s zt*e@l0j>4v{|J`Il;}~WL|)OMVyP&MA&FYa*3;Y;RI7G!iOF)iz9Lg-t*BPKNn96w zWw+|rZzgW)wDDk7v7@vTbjo=qN?#)6;XnLvRJAslf8;2=9g0L}c<8Hgr=BtKan6VY zb4MxbE}3iWtKeL{6(0n%)DrtczLT@ndXs2ht0U3#-54(ISi!8Z+Gcu_TXJ1{M=$Y= zyH&%}kM@yOUEF5^&SL`3b-6OkLrhr0rrzD>Li`yomc8|}KaxoKf@fStXOuW!;uox3 z)N~D;hxqoZClj3!nA^)3@Y1ut`8TY?)=Ql2tUy`2}YO> zqxjYBI2a|5Pu$Cg(4JD3k4KlmL0)!^1EDZlF^N*XkyFhJO&@Hm8&m9^_jl#6wEqcY-1 zr&IRn06{PWw>!fE%Zx(5a>rnlEui}4Uf0MAMP7n0?1uN3Jxwqe8%*v>FR8Zn0}PPe z?0S*C%cu>Le4>tWcaL`AQp0r=EF9dWm|6uySyCn{3WFkW`2t(Iz$?Rar(nkTo#N0q zX?h7QhW1=UY7PsmWh&yIkBmjRT2Y<3r52hV$f(+p!E*OxfHxo3}sa)?AsN`bl`xjpxpDMo_#kb}6IbCJ;_adk9;3UhD&I(k~U5TUb^Wb(1 zg3|gw3BUZt+C&lR!FEtz#e!DoZQQLjyxcv!Ijr5iZ2xGs_+Mcj3g9@fsd6tTVZ;&q zlk`wvMv%(8NuQir7DQNv7+)&?Cio0Yu*272_eHk`X^!^MS8CarXJ9Y?B-L~HIphlF zBs!)T39hk-Mg3jdwwNQ0Z4uo}ej~{UArA*bbMxd;;3qbha(;^r00BJM>vb9mS-Sw7 z^#BHCiE@}Uyk}8Gff(rlImxitF1bhcrzV52qRf{uuPj{XsfDPXXC&@S^TUSlprI>h ztZuLl9VdJZPd=PL4d?b&+7QMO%Gw-X$c*3I78BwkT542Xw2H<$wZC?KI2X~0kNDx+ z2biU^e{Gm!w$ys$-fFGAxlnCxA)RF&WqJg6d{=3veIzMQVzE!DbiV}5k&b$YCW2Bj zIKNTej*pqJgjagTYyQO%>wBzg_WO)D*3+xUhuFKoVNXhRrbqR58d-jtY?zD0yHop_ z|6J~8!o27Yg!7wkN>$J)bIYdGQof8brF4mf|NVVB_;CZnM@(R0g3&T zL9@>`QC>=wJvbT+rM$D|RBVV^ZyF;Tx{<43-Ag~rMK-Il5S0xZHov_LL=WBe z%>Jl24}6V1oNs5(h%jKN{KUtWe{g;5-{MMWP&V@EWSn@@_y?lB_7?+9pSqg$WKs^i zX|}G<4D!ZVEYIC$dx$ahUCLRuu#)b{i-kE&9lW=&oo1)X;gz{;cf|x7t>1MJzs4v6 zRw$0cS(U5t^9CM}6cm_Sz3e3p(NM%k>-|jXJkTIKJ8jn0_qm#fP{OY!p&7IdLs8bE zc#e1iQd#CfYlj`r+aeh!J-g7~c^~Ar)^;4JKE`cE5>~Z;8Revz0Y^8`G2$bj;Y(_K znl+^B+JiF8F?J+-H8s)eJ2zdY7z93}d26Q>6m0T<{BO#HdBz3}A&>W{3`fshryqs?a*SX#Z!#KMw4E)$(fv>`yJ Date: Thu, 28 Mar 2024 22:28:24 -0400 Subject: [PATCH 6/7] added tests in minimal case --- example/example_minimal_case.py | 220 ++ example/name_to_properties/ssss.xlsx | Bin 7240 -> 0 bytes tests/conftest.py | 2287 +++++++++++------ .../compounds_properties.xlsx | Bin 0 -> 5495 bytes tests/test_minimal_case.py | 184 +- tests/test_name_to_properties.py | 26 +- 6 files changed, 1851 insertions(+), 866 deletions(-) create mode 100644 example/example_minimal_case.py delete mode 100644 example/name_to_properties/ssss.xlsx create mode 100644 tests/data_minimal_case/compounds_properties.xlsx diff --git a/example/example_minimal_case.py b/example/example_minimal_case.py new file mode 100644 index 0000000..ce81bd2 --- /dev/null +++ b/example/example_minimal_case.py @@ -0,0 +1,220 @@ +# %% Import necessary libraries +import pathlib as plib # Used for handling file and directory paths +from gcms_data_analysis import Project +from gcms_data_analysis.plotting import plot_ave_std + +# Define the folder path where your data is located. Change this path to where you've stored your data files. +# folder_path = plib.Path(plib.Path(__file__).parent, "example\data") +folder_path = plib.Path( + r"C:\Users\mp933\OneDrive - Cornell University\Python\gcms_data_analysis\tests\data_minimal_case" +) +# folder_path: plib.Path = plib.Path( +# r"C:\Users\mp933\OneDrive - Cornell University\Python\GCMS\NNDNDD" +# ) +# Set global configurations for the Project class. +# These configurations affect all instances of the class. +Project.set_folder_path( + folder_path +) # Set the base folder path for the project's data files +Project.set_plot_grid(False) # Disable grid lines in plots for a cleaner look +Project.set_plot_font("Sans") # Set the font style for plots to 'Sans' +Project.set_auto_save_to_excel(False) +# Initialize a Project instance to manage and analyze GCMS data +gcms = Project() + +# Load metadata from a user-provided 'files_info.xlsx' file, or generate it from .txt GC-MS files if not provided +files_info = gcms.load_files_info() +# Load individual GCMS .txt files as pandas DataFrames +files = gcms.load_all_files() +files = gcms.add_iupac_to_files() +list_of_all_compounds = gcms.create_list_of_all_compounds() +files, is_files_deriv = gcms.apply_calibration_to_files() +samples_info, samples_info_std = gcms.create_samples_info() +samples, samples_std = gcms.create_samples_from_files() + +params = [ + "height", + "area", + "area_if_undiluted", + "conc_vial_mg_L", + "conc_vial_if_undiluted_mg_L", + "fraction_of_sample_fr", + "fraction_of_feedstock_fr", +] +for param in params: + _ = gcms.create_files_param_report(param) + _ = gcms.create_files_param_aggrrep(param) + + _, _ = gcms.create_samples_param_report(param) + _, _ = gcms.create_samples_param_aggrrep(param) + +# %% +for param in params: + print(f"'{param}': ") + print_checked_df_to_script_text_with_arrays(gcms.files_reports[param]) +# %% + +for param in params: + print(f"'{param}': ") + print_checked_df_to_script_text_with_arrays(gcms.files_aggrreps[param]) +# %% +for param in params: + print(f"'{param}': ") + print_checked_df_to_script_text_with_arrays(gcms.samples_reports[param]) +# %% +for param in params: + print(f"'{param}': ") + print_checked_df_to_script_text_with_arrays(gcms.samples_reports_std[param]) +# %% + +for param in params: + print(f"'{param}': ") + print_checked_df_to_script_text_with_arrays(gcms.samples_aggrreps[param]) +# %% + +for param in params: + print(f"'{param}': ") + print_checked_df_to_script_text_with_arrays(gcms.samples_aggrreps_std[param]) +# %% + + +# Load classification codes and mass fractions for functional groups from a provided file +class_code_frac = gcms.load_class_code_frac() + +# Load calibration data for standard and derivatized samples, and determine if they are derivatized +calibrations, is_calibr_deriv = gcms.load_calibrations() +# c1, c2 = calibrations["calibration"], calibrations["deriv_calibration"] + +# Generate a comprehensive list of all compounds found across samples +list_of_all_compounds = gcms.create_list_of_all_compounds() + +# Similarly, create a list of all derivatized compounds found across samples +list_of_all_deriv_compounds = gcms.create_list_of_all_deriv_compounds() + +# Load properties for standard and derivatized compounds from provided files +compounds_properties = gcms.create_compounds_properties() +deriv_compounds_properties = gcms.create_deriv_compounds_properties() + +# Flag indicating whether new compounds have been added, triggering a need to regenerate properties data +new_files_with_new_compounds_added = False +if new_files_with_new_compounds_added: + compounds_properties = gcms.create_compounds_properties() + deriv_compounds_properties = gcms.create_deriv_compounds_properties() + +# Apply calibration data to all loaded files, adjusting compound concentrations based on calibration curves +files, is_files_deriv = gcms.apply_calibration_to_files() + +# Extract specific files for detailed analysis or further operations +f11, f22, f33 = files["A_1"], files["Ader_1"], files["B_1"] + +# # Add statistical information to the files_info DataFrame, such as mean, median, and standard deviation for each file +files_info = gcms.add_stats_to_files_info() + +# Create a samples_info DataFrame without applying calibration data, for initial analysis +samples_info_0 = gcms.create_samples_info() + +# Create samples and their standard deviations from the files, storing the results in dictionaries +samples, samples_std = gcms.create_samples_from_files() +s1, s2, s3 = samples["A"], samples["Ader"], samples["B"] +sd1, sd2, sd3 = samples_std["A"], samples_std["Ader"], samples_std["B"] + +# Generate reports for specific parameters (e.g., concentration, mass fraction) for files and samples +rep_files_conc = gcms.create_files_param_report(param="conc_vial_mg_L") +rep_files_fr = gcms.create_files_param_report(param="fraction_of_sample_fr") +rep_samples_conc, rep_samples_conc_std = gcms.create_samples_param_report( + param="conc_vial_mg_L" +) +rep_samples_fr, rep_samples_fr_std = gcms.create_samples_param_report( + param="fraction_of_sample_fr" +) + +# Generate aggregated reports based on functional groups for files and samples, for specific parameters +agg_files_conc = gcms.create_files_param_aggrrep(param="conc_vial_mg_L") +agg_files_fr = gcms.create_files_param_aggrrep(param="fraction_of_sample_fr") +agg_samples_conc, agg_samples_conc_std = gcms.create_samples_param_aggrrep( + param="conc_vial_mg_L" +) +agg_samples_fr, agg_samples_fr_std = gcms.create_samples_param_aggrrep( + param="fraction_of_sample_fr" +) + +# Plotting results based on the generated reports, allowing for visual comparison of average values and standard deviations +# Plot results for individual files or samples based + +plot_ave_std( + gcms, + param="fraction_of_sample_fr", + min_y_thresh=0, + files_or_samples="files", + legend_location="outside", + only_samples_to_plot=["A_1", "A_2", "Ader_1", "Ader_2"], # y_lim=[0, 5000] +) +# plot results bases on aggreport +plot_ave_std( + gcms, + param="fraction_of_sample_fr", + aggr=True, + files_or_samples="files", + min_y_thresh=0.01, + y_lim=[0, 0.5], + color_palette="Set2", +) + +plot_ave_std( + gcms, + param="fraction_of_sample_fr", + min_y_thresh=0, + legend_location="outside", + only_samples_to_plot=["A", "Ader"], # y_lim=[0, 5000] +) +# plot results bases on aggreport +plot_ave_std( + gcms, + param="fraction_of_sample_fr", + aggr=True, + min_y_thresh=0.01, + y_lim=[0, 0.5], + color_palette="Set2", +) + +# %% +# import pickle + +# folder_path: plib.Path = plib.Path(r"C:\Users\mp933\Desktop\New folder") +# pickle_path: plib.Path = plib.Path(folder_path, "pickle_object.pkl") +# with open(pickle_path, "wb") as output_file: +# pickle.dump(gcms, output_file) +# %% +# import pickle +# import pathlib as plib # Used for handling file and directory paths +# from gcms_data_analysis import ( +# Project, +# ) # Import the Project class from the gcms_data_analysis package + +# folder_path: plib.Path = plib.Path(r"C:\Users\mp933\Desktop\New folder") +# pickle_path: plib.Path = plib.Path(folder_path, "pickle_object.pkl") +# with open(pickle_path, "rb") as input_file: +# gcms: Project = pickle.load(input_file) +# from gcms_data_analysis.plotting import plot_pave_std + +# # %% +# myfig = plot_pave_std( +# gcms, +# files_or_samples="files", +# width=12, +# height=5, +# legend_location="outside", +# y_lim=[0, 100], +# ) +# # %% +# myfig = plot_pave_std( +# gcms, +# files_or_samples="samples", +# width=6, +# height=6, +# legend_location="best", +# y_lim=[0, 100], +# min_y_thresh=10, +# ) + +# # %% diff --git a/example/name_to_properties/ssss.xlsx b/example/name_to_properties/ssss.xlsx deleted file mode 100644 index fb40a9577c3db2dca81ad6347429554189d0f189..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7240 zcmZ`;1ymf{vK^ctfk6Vn2@>4hCAcTZ1h>I0xLa_y00}Mw1RLDlC3tWGB)A11B*-T> z|NZ+e|GnpF6=_&LD4$1)P(51U2r#?TIp&_yak%^6ZMB>hy_fS_5_ahkMD{_ve-H`6r z3T%+cA3omT8AGko)de9=aW5v37KA9V4=s@vl%176YCP@PmGA)?sM47;}jBEdm7m3R!xNs z_CSi619cNv*N>Fxhb5y zcIusbYu%?U-pkJ6N});93%OmgAwH#rkzGa>+}@14Yx_|6`Ds4!W6E&>K^oQ*n4MGB zbS@r&2ffp~>d*NJDe}7QO6BCY<~gTz^E~%#nTWL^2KSnN@c(4yqi#ACHZlN^{t^Jd zePG7jhSk{uYzzK#X8+C2fwqp*0w135A#~i`9A=B&#}iOjgX?7Jx>)lu=s=k~7zm2C z@i2`K@@tpj!5&V=JV;P#N)tk4+W9pqC9}27=W+Uz#dtd;Ea}AEaGrH>rn#PVc~y&p z8Ys^8ybyZSfd*03uhq5$bo2QP;^K5K1xBJOxZ@4a=&tGzu~N-DE%W=~=*g|=HL2>jOm8lD#mAy2JbhKz4 zl__htZ&NYheP!eBm~F=@q21--eI?`0dzQ2>Z_BOocb-{DNU!aA;^5bAh)q!dKn|`m zUnUWf_35u&l8EL>o4<+c9)W)HhXe2WdJ$(VC!Z&CNjql5Aw31~_Dm?MdPBwQB38Q+ zCUxUx#^`HRiBLZc1f0tRQuZ!EZ_iVj2lF{SGkgR_t&{vLOW+C*n$yM-Z=AN*vkDVu zl_W+TT9}?3|Dw_{3VmrOi0ddoVlLr527|>zXgJ0Wuh90Ch0Mt{lPmPb#?~{V9I2YW z!9YGR6&X9K+0Px-FJ_aZsim?8n48vte6@0z9Jr#W86l}wswD9FPuP=lLN06iN`b4m zKJ{&=11r&C7bW(egtc-&atlG1*wREl(w#P)CTxD8K%^&N8ZAqCDIk2f6v7p*Vs571 z8TStz$7Sh4q_{XEN$AMpAt}_fIG_1_m1J@67$FOH=~H4yNvzb@^6{~j)l`Mfv?JS` zM4i%5VwX^hNg>ZWq^oppG?I8?Qh0OXYH7+K%jP38D0R%IvMwN_goyET9wNuuX90EZ zJ_i|ULBQ+x6D+};uHd^#i<)^B8QF`dY){X z>d-tHR-Xd%M&M*h5Zmp%IhX%1y8&&aYY@^|-+|^hi!hB{2^nnFjG%K$);lWf)+M#o z((=ilReo_$CL6{OG#5%Bt#ivJ<^BcJb6ux`u)RrrV88e;%>XCsoUKYi?rV*H))mMc$!MGHHL(C57~^ui6{DoK9kx<*4b~Igp2w1C7v^%}kP1f!82f3!@5-%pC^eTcIDBn=K;L2!y3i^FFSURfjsIc{X%OmEMD9$gVh6_#GAnmJ#$fahW9ZeLH<3BK(0R z#%)bdl`EQb!5XjQ0E)DmnZKj;)A$Bb_!RO%BE=X7K#e>9rC__WxD=;BvQ3m7IBNj#E1`^P$-yme(Xp{2V+ z%!;m_nC}OX?Qs8`K+5A{NUYER0DA!dfbh=*;%4t??PLK4J3F!db@^)+fu#Ao`5{Em z*B-HEAV4+Q^75jH6O!G_tLnWnKQCT7v=n@j)KtE5C)Wv1i%V}BtA=i~&hqcbIdWH4 zTx4$Cj~XQA&s}oM6`tOX%;u)vPT7pgKnG3vRz}PQ)f2avvM|tZ!FQACV<)DUHP}j% zv`^ue(=qy|M&7sfUR+;w>ccei*DNQ;csK162ifi|Xe?oE?cqFrert|5C)!MH6GC%o zm<9>ruXV?2#Qf%Udp(F$5_Y`Pm3U9qJ^VOd-3RH_d}Uf#ib}lj*f^K=;E#O^v%iwM zu=L}k^ZJlKSVZ?8ANuBYT;p)qCe$Da;gt;^hHri%&`Ms^DcXV5(weG2kUv88*g@(4DFr z)gpWvPl zr)qr_La$;S`?d*Uurr_`?gmLy!0*^GM~ox-W>4pVObV0aWlNNi*5G)_=`t}B%jpp(22REYpyGkPb~wzb!-hW~$=di0{$rdcPdp!!EK%Mx%~TnDUop0&4U$C81Yy|42^|Lt}WZmCjLpOcUp-3ZB{4gKM(( z{|4B9X6OcJMwvrkD>t&xjKHA;W%KpDa_p*@ZbHg`LWNpKITx@>iI~HU)3;g6E_Jqz z668b;AHFZ|o*ez%+H}Jj@vL@sefEf+UWIopUSuFTOV>=g#mww(Vd2nL&~}7im2rM? zc|WWl@uFgQR2CwUdaP$P65DNs5X~$~bypctf-xV0j2c@K1-j*D7qHZz&2d4(e@z_x4i{5w8H z3-TOhhaH@&xpN-}m)HfMf{jao#)W;jiGeaacT)e1nNA(ClLt`{Tc`&44O?H$+OF3N zsZMRI91Yw)>@z0f?(yxRaXJf&slF7Z=)3mhP4pugg;O&Wrs$cs+O?fc5H-KsH3pq( zE-`mquywMT^IR1d#rE@#QA|y9OR?;>=csjD&CC{-Vv_-C=8Kbn@7x`B09W)1!931n z2wW42e{jy*VgZHs5>nVUk`)M@lGbe2Oqgi+lH=7j7z!qg3MSc*5&gu^rz9)J`s5R# zUv4cF?xpO0ozS;7E-dQ`ZH`jWl#tQyl($9;mpay1Glx=bvfAl_>mEL)iI`j0&G2`; zFc0Jkr}qa|&DiOx`ecZ~RigpI6v``6xDFBg-GuDLZ0Pwuxu!E1Sb^C>{b&V&{%`Ap z6(|k>hFW6uP`Gao`~^@pAUIzDyZ|6Vi3)JC+6&uriTh#1WUh+XG$39vW@iky?sZ+D zMfM@fwQopR)d4yhQrSngkZkj5Lc3lqyI$+eq>b3OXf~~v_cq!hkd>O2M(=`NYsHeG zS*6Hd)+A2%5j*gIIeg2w`M(ZNr#{0jwD0@O$z1IcZ3$$oL{cJTtp#s0uiSLu z*`Ulm+Ds}@z=+9%4f4Y1D`W3kTwD8>XFKfj;Zt+3TrwGqvaqm9()ZDZ=Ysr5jK3Rb z=us7eZ2*G6I}I1KvyI?BsAFsfq`8kI$05tI&KQA`cEpH~rL)QiwVRV)g6a9Rt_4DN(R%*LQ z%_&nB$7uh~_$}zob^6w#;f)XD!i=A1)~^Of;w?u}gD?$P+1O7TujUDut7LB&Ed3CE zXJB`I!<#?{!G-R2S_B@X3)cMdkVtq0Oy`fo&sb~U_-DX98N;kAxn*5NR4Zkc*U8xc z3R9>WXrpJJvF6OCqlUpaN{|UOt$W!ek}Oss5C4ar_V!7a5gTmRWiBD#Cyun&F#E+70>V@kdVh-Cq;lmk-D zhdy1&?zRV^kf`~@)_adzMe*gO(kbIjRQC@pAXLFd#Nae#oAC;-of-GZX$(NN_%lxG zg%D3QV9&lDd7KNNg3(a1{7;OqH~w3UV3Mi5CV0Kl#@8FM=mkYZn+`GTCu9BWlY$;| z9+e?9n3ySX}JtI^0Fzb9euFAa(HcvRn z`cLgw>YTGgS!;5LuC+xc4?X>^T3e)jT&&$e4J)~at*>Z}eZuzMZ8e{~AHtR&oO!w| zjnHD(iClk?iEKLaxfNY57PF|?p1(4HnUOyjK1=*)DX3WjTM}c+RyZ zUB0k^eI#4*D+=S;A6<53Ze_B&N=d6xM&R1EARMeOxjM=2NODk0cZ5zI)MfFbW&fGh zBe>TgIT~EgRSRO9;3!`kLuN9!0y|&C)rl{_?NG|b?#F9g;|yJBag^6jd~rP?#Msn% zT>z&g5{uZbaV0!fzssuPI{%bFQX?a z0L#M_6*$l16cxD0%OpR)-R_EIxwTl8(eiaZkm2=p-ji|jbv~5A^2G}QAegx|h_v3b z86t;v;5`lCCxK7oLo|E0o!O_rYN07o0P@8?wMlJ{Dm;zm@|Z#M*)mBZzB+M=9%hZO zsbh=iFQuy*{IOx3%S+=iFLt`c#HeYIWT~0OE_})Q&-LjAEcQtuJOE&e4gg^OS)V#N zd)R=TewQ}BX)ea3v0>lh_n;&xsj3x8bLoTPGZv+)ktS}>*UVGd{;i6~>mLUhzH!i7F?N|1 z1PLMEPFlN46Q%Kmg=p<`m4u{J7y(lVt!f}aJTunStF+#;@ebja{Q$wBq}k&DFIN6BEzQBP@qE#ETAI8+1iH)`L2s7>uwCg7lqZHR7!( ziO)bAdoJ|&bPFx#l>r79QZC~vmlCP&`ljs-O&?cWBw3?=h2lw0ySHCz07(+5fE_@eeX>y`Ia5tPu71Zpbfx8ScF^``Gi zm5#(TcCdtsPF_rS;!Qpw)yl9SDmSPH>&_K7!&EA622 zxsCNG+iroEm2U7P)}AQEA3)8{LHI!P>ln$PH4q8jNZ+s}-*l@ywLtO(-I9&*W2=PXS6x2yu-=#cH zMTV6R8*Q%Nw_aiXQ_=M@dl4NEN`CgB>IYE+3>?8WPOL1y&lw2{cJS;NVo)5gf#T;L zc=pn{3du0^hVZStk3I1wJ!i1!xef{opu&A-CNmU^E8CUCX%S-6$JV1eugP5osIsOv z%zT#%V&GN~W6t4<8$X}JX4*WZ8DQ?Vo2gfHx{*6Wi{EQmUn-ml9$w; zf9KDsa6hs%hW7PS2DEhlrjYxn;LGVbMO?0olmpXQe7<6&YZ6o>^IKc~Da6FM#(ib` z{&c(uPxz`mgAEcsIV<&q*1qEEQPmda0$0rTP_dZgZc3Fb<| zjG|ASSTq#qBA+s6 z=CEkwaI8%3#1oFKze-c;HC6mf+ii%>1iFpqo!D$%zRC|yAsy1Hu>3cSeU?|Q8fUxLm4txPiDLz41;1w|7Rb@%n0dXflmb8iO z?O1aAg{e6!SA>_K+N>bXsRiv2xPPAvkS|DrFE#TWk2d-|?+1ksy7G>AEvhBSN+!M@x?MIfo=)uqx;O~kp6+#(a=!mC z4v-buGPZgr(a#ei8mhN+pw-AdCeu(xxq7zuo88%Eq#*?!C1YWN)sxZz{`DZ!gZY<1%kMC+HThI7sq$Tq z5PK`Wke|9`y}Z>S^1#ZuJ@lG^LK6E_FvGqJLq#77J#g@NfdAh0c~H$?oBspD|Gx+H z82z{h^9Kt6_`_BFJNjQ8naA+Qjf%hFLl3Y24{eLb1|Aox{$-#p03G!&1OKsX_1Mbe z<;1_N_`{Xp{AK0u#l>Uj1K-0Ha{vGU diff --git a/tests/conftest.py b/tests/conftest.py index e8f227b..62c24e6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,7 +2,6 @@ import pandas as pd import numpy as np import pytest -import rdkit from gcms_data_analysis.main import Project test_dir: plib.Path = plib.Path(__file__).parent @@ -36,7 +35,7 @@ def dicts_classifications_codes_fractions(): @pytest.fixture -def checked_compounds_properties(): +def checked_n2p_compounds_properties(): properties = pd.read_excel( plib.Path( name_to_properties_dir, @@ -204,7 +203,592 @@ def checked_load_calibrations(): ) } return calibrations -# fmt: on + +@pytest.fixture +def checked_list_of_all_compounds(): + list_of_all_compounds = ['phenol', 'naphthalene', 'dodecane'] + return list_of_all_compounds + +@pytest.fixture +def checked_compounds_properties(): + compounds_properties = pd.DataFrame( + index=pd.Index(['phenol', 'naphthalene', 'dodecane'], name='comp_name'), + data={ + 'iupac_name': ['phenol', 'naphthalene', 'dodecane'], + 'molecular_formula': ['C6H6O', 'C10H8', 'C12H26'], + 'canonical_smiles': ['C1=CC=C(C=C1)O', 'C1=CC=C2C=CC=CC2=C1', 'CCCCCCCCCCCC'], + 'molecular_weight': [94.11, 128.17, 170.33], + 'xlogp': [1.5, 3.3, 6.1], + 'el_C': [6, 10, 12], + 'el_H': [6, 8, 26], + 'el_O': [1, 0, 0], + 'el_mf_C': [0.765763468281798, 0.9371147694468284, 0.846192684788352], + 'el_mf_H': [0.06426522154925088, 0.06291643910431459, 0.1538660247754359], + 'el_mf_O': [0.1700031877590054, 0.0, 0.0], + 'fg_C-aliph': [0, 0, 12], + 'fg_C-arom': [6, 10, 0], + 'fg_alcohol': [1, 0, 0], + 'fg_mf_C-aliph': [0.0, 0.0, 1.000058709563788], + 'fg_mf_C-arom': [0.8193178195728402, 1.000031208551143, 0.0], + 'fg_mf_alcohol': [0.1807140580172139, 0.0, 0.0], + } + ) + return compounds_properties + +@pytest.fixture +def checked_samples_info(): + samples_info = pd.DataFrame( + index=pd.Index(['S', 'T'], name='samplename'), + data={ + 'filename': [('S_1', 'S_2'), ('T_1', 'T_2', 'T_3')], + 'replicate_number': [(1, 2), (1, 2, 3)], + 'derivatized': [(False, False), (False, False, False)], + 'calibration_file': [('cal_minimal', 'cal_minimal'), ('cal_minimal', 'cal_minimal', 'cal_minimal')], + 'dilution_factor': [(1, 1), (1, 1, 1)], + 'total_sample_conc_in_vial_mg_L': [(1, 1), (1, 1, 1)], + 'sample_yield_on_feedstock_basis_fr': [(1, 1), (1, 1, 1)], + 'compound_with_max_area': [('dodecane', 'dodecane'), ('dodecane', 'dodecane', 'dodecane')], + 'compound_with_max_conc': [('naphthalene', 'dodecane'), ('phenol', 'dodecane', 'dodecane')], + 'max_height': [3000.0, 1000.0], + 'max_area': [3000.0, 1000.0], + 'max_area_if_undiluted': [3000.0, 1000.0], + 'max_conc_vial_mg_L': [6.000000000000002, 3.0000000000000013], + 'max_conc_vial_if_undiluted_mg_L': [6.000000000000002, 3.0000000000000013], + 'max_fraction_of_sample_fr': [6.000000000000002, 3.0000000000000013], + 'max_fraction_of_feedstock_fr': [6.000000000000002, 3.0000000000000013], + 'total_height': [3330.0, 1410.0], + 'total_area': [3330.0, 1110.0], + 'total_area_if_undiluted': [3330.0, 1110.0], + 'total_conc_vial_mg_L': [18.0, 6.000000000000003], + 'total_conc_vial_if_undiluted_mg_L': [18.0, 6.000000000000003], + 'total_fraction_of_sample_fr': [18.0, 6.000000000000003], + 'total_fraction_of_feedstock_fr': [18.0, 6.000000000000003], + } + ) + return samples_info + +@pytest.fixture +def checked_samples_info_std(): + samples_info = pd.DataFrame( + index=pd.Index(['S', 'T'], name='samplename'), + data={ + 'filename': [('S_1', 'S_2'), ('T_1', 'T_2', 'T_3')], + 'replicate_number': [(1, 2), (1, 2, 3)], + 'derivatized': [(False, False), (False, False, False)], + 'calibration_file': [('cal_minimal', 'cal_minimal'), ('cal_minimal', 'cal_minimal', 'cal_minimal')], + 'dilution_factor': [(1, 1), (1, 1, 1)], + 'total_sample_conc_in_vial_mg_L': [(1, 1), (1, 1, 1)], + 'sample_yield_on_feedstock_basis_fr': [(1, 1), (1, 1, 1)], + 'compound_with_max_area': [('dodecane', 'dodecane'), ('dodecane', 'dodecane', 'dodecane')], + 'compound_with_max_conc': [('naphthalene', 'dodecane'), ('phenol', 'dodecane', 'dodecane')], + 'max_height': [1414.213562373095, 500.0], + 'max_area': [1414.213562373095, 500.0], + 'max_area_if_undiluted': [1414.213562373095, 500.0], + 'max_conc_vial_mg_L': [2.8284271247461903, 0.9999999999999994], + 'max_conc_vial_if_undiluted_mg_L': [2.8284271247461903, 0.9999999999999994], + 'max_fraction_of_sample_fr': [2.8284271247461903, 0.9999999999999994], + 'max_fraction_of_feedstock_fr': [2.8284271247461903, 0.9999999999999994], + 'total_height': [1569.7770542341354, 749.3997598078078], + 'total_area': [1569.7770542341354, 540.0], + 'total_area_if_undiluted': [1569.7770542341354, 540.0], + 'total_conc_vial_mg_L': [8.48528137423857, 8.881784197001252e-16], + 'total_conc_vial_if_undiluted_mg_L': [8.48528137423857, 8.881784197001252e-16], + 'total_fraction_of_sample_fr': [8.48528137423857, 8.881784197001252e-16], + 'total_fraction_of_feedstock_fr': [8.48528137423857, 8.881784197001252e-16], + } + ) + return samples_info + + +@pytest.fixture +def checked_samples(): + samples = { + 'S': pd.DataFrame( + index=pd.Index(['phenol', 'naphthalene', 'dodecane'], name='S'), + data={ + 'iupac_name': ['phenol', 'naphthalene', 'dodecane'], + 'retention_time': [13.703, 20.942, 21.426], + 'area': [30.0, 300.0, 3000.0], + 'height': [30.0, 300.0, 3000.0], + 'area_if_undiluted': [30.0, 300.0, 3000.0], + 'conc_vial_mg_L': [6.0, 6.0, 6.000000000000002], + 'conc_vial_if_undiluted_mg_L': [6.0, 6.0, 6.000000000000002], + 'fraction_of_sample_fr': [6.0, 6.0, 6.000000000000002], + 'fraction_of_feedstock_fr': [6.0, 6.0, 6.000000000000002], + 'compound_used_for_calibration': ['self', 'self', 'self'], + } + ), + 'T': pd.DataFrame( + index=pd.Index(['phenol', 'naphthalene', 'dodecane'], name='T'), + data={ + 'iupac_name': ['phenol', 'naphthalene', 'dodecane'], + 'retention_time': [13.702999999999998, 20.942, 21.426], + 'area': [10.0, 100.0, 1000.0], + 'height': [10.0, 400.0, 1000.0], + 'area_if_undiluted': [10.0, 100.0, 1000.0], + 'conc_vial_mg_L': [2.0000000000000004, 2.000000000000001, 2.0000000000000013], + 'conc_vial_if_undiluted_mg_L': [2.0000000000000004, 2.000000000000001, 2.0000000000000013], + 'fraction_of_sample_fr': [2.0000000000000004, 2.000000000000001, 2.0000000000000013], + 'fraction_of_feedstock_fr': [2.0000000000000004, 2.000000000000001, 2.0000000000000013], + 'compound_used_for_calibration': ['self', 'self', 'self'], + } + ) + } + return samples + +@pytest.fixture +def checked_samples_std(): + samples_std = { + 'S': pd.DataFrame( + index=pd.Index(['phenol', 'naphthalene', 'dodecane'], name='S'), + data={ + 'iupac_name': ['phenol', 'naphthalene', 'dodecane'], + 'retention_time': [0.0, 0.0, 0.0], + 'area': [14.142135623730951, 141.4213562373095, 1414.213562373095], + 'height': [14.142135623730951, 141.4213562373095, 1414.213562373095], + 'area_if_undiluted': [14.142135623730951, 141.4213562373095, 1414.213562373095], + 'conc_vial_mg_L': [2.8284271247461903, 2.82842712474619, 2.8284271247461903], + 'conc_vial_if_undiluted_mg_L': [2.8284271247461903, 2.82842712474619, 2.8284271247461903], + 'fraction_of_sample_fr': [2.8284271247461903, 2.82842712474619, 2.8284271247461903], + 'fraction_of_feedstock_fr': [2.8284271247461903, 2.82842712474619, 2.8284271247461903], + 'compound_used_for_calibration': ['self', 'self', 'self'], + } + ), + 'T': pd.DataFrame( + index=pd.Index(['phenol', 'naphthalene', 'dodecane'], name='T'), + data={ + 'iupac_name': ['phenol', 'naphthalene', 'dodecane'], + 'retention_time': [0.0, 0.0, 0.0], + 'area': [10.0, 50.0, 500.0], + 'height': [10.0, 522.0153254455275, 500.0], + 'area_if_undiluted': [10.0, 50.0, 500.0], + 'conc_vial_mg_L': [1.9999999999999996, 1.0, 1.0], + 'conc_vial_if_undiluted_mg_L': [1.9999999999999996, 1.0, 1.0], + 'fraction_of_sample_fr': [1.9999999999999996, 1.0, 1.0], + 'fraction_of_feedstock_fr': [1.9999999999999996, 1.0, 1.0], + 'compound_used_for_calibration': ['self', 'self', 'self'], + } +) + } + return samples_std + +@pytest.fixture +def checked_files_param_reports(): + reports = { + 'height': +pd.DataFrame( + index=pd.Index(['dodecane', 'naphthalene', 'phenol'], name='height'), + data={ + 'S_1': [2000.0, 200.0, 20.0], + 'S_2': [4000.0, 400.0, 40.0], + 'T_1': [500.0, 50.0, 20.0], + 'T_2': [1000.0, 1000.0, 10.0], + 'T_3': [1500.0, 150.0, 0.0], + } +), +'area': +pd.DataFrame( + index=pd.Index(['dodecane', 'naphthalene', 'phenol'], name='area'), + data={ + 'S_1': [2000.0, 200.0, 20.0], + 'S_2': [4000.0, 400.0, 40.0], + 'T_1': [500.0, 50.0, 20.0], + 'T_2': [1000.0, 100.0, 10.0], + 'T_3': [1500.0, 150.0, 0.0], + } +), +'area_if_undiluted': +pd.DataFrame( + index=pd.Index(['dodecane', 'naphthalene', 'phenol'], name='area_if_undiluted'), + data={ + 'S_1': [2000.0, 200.0, 20.0], + 'S_2': [4000.0, 400.0, 40.0], + 'T_1': [500.0, 50.0, 20.0], + 'T_2': [1000.0, 100.0, 10.0], + 'T_3': [1500.0, 150.0, 0.0], + } +), +'conc_vial_mg_L': +pd.DataFrame( + index=pd.Index(['dodecane', 'phenol', 'naphthalene'], name='conc_vial_mg_L'), + data={ + 'S_1': [4.000000000000001, 4.0, 4.000000000000001], + 'S_2': [8.000000000000002, 8.0, 8.0], + 'T_1': [1.0000000000000013, 4.0, 1.000000000000001], + 'T_2': [2.0000000000000013, 2.0000000000000004, 2.000000000000001], + 'T_3': [3.0000000000000013, 4.61113287893387e-16, 3.000000000000001], + } +), +'conc_vial_if_undiluted_mg_L': +pd.DataFrame( + index=pd.Index(['dodecane', 'phenol', 'naphthalene'], name='conc_vial_if_undiluted_mg_L'), + data={ + 'S_1': [4.000000000000001, 4.0, 4.000000000000001], + 'S_2': [8.000000000000002, 8.0, 8.0], + 'T_1': [1.0000000000000013, 4.0, 1.000000000000001], + 'T_2': [2.0000000000000013, 2.0000000000000004, 2.000000000000001], + 'T_3': [3.0000000000000013, 4.61113287893387e-16, 3.000000000000001], + } +), +'fraction_of_sample_fr': +pd.DataFrame( + index=pd.Index(['dodecane', 'phenol', 'naphthalene'], name='fraction_of_sample_fr'), + data={ + 'S_1': [4.000000000000001, 4.0, 4.000000000000001], + 'S_2': [8.000000000000002, 8.0, 8.0], + 'T_1': [1.0000000000000013, 4.0, 1.000000000000001], + 'T_2': [2.0000000000000013, 2.0000000000000004, 2.000000000000001], + 'T_3': [3.0000000000000013, 4.61113287893387e-16, 3.000000000000001], + } +), +'fraction_of_feedstock_fr': +pd.DataFrame( + index=pd.Index(['dodecane', 'phenol', 'naphthalene'], name='fraction_of_feedstock_fr'), + data={ + 'S_1': [4.000000000000001, 4.0, 4.000000000000001], + 'S_2': [8.000000000000002, 8.0, 8.0], + 'T_1': [1.0000000000000013, 4.0, 1.000000000000001], + 'T_2': [2.0000000000000013, 2.0000000000000004, 2.000000000000001], + 'T_3': [3.0000000000000013, 4.61113287893387e-16, 3.000000000000001], + } +), + + } + return reports + +@pytest.fixture +def checked_files_param_aggrreps(): + reports = { + 'height': +pd.DataFrame( + index=pd.Index(['C-aliph', 'C-arom', 'alcohol'], name='height'), + data={ + 'S_1': [2000.117419127576, 216.3925981016854, 3.6142811603442784], + 'S_2': [4000.234838255152, 432.7851962033708, 7.228562320688557], + 'T_1': [500.029354781894, 66.38791681901395, 3.6142811603442784], + 'T_2': [1000.058709563788, 1008.2243867468712, 1.8071405801721392], + 'T_3': [1500.0880643456821, 150.00468128267144, 0.0], + } +), +'area': +pd.DataFrame( + index=pd.Index(['C-aliph', 'C-arom', 'alcohol'], name='area'), + data={ + 'S_1': [2000.117419127576, 216.3925981016854, 3.6142811603442784], + 'S_2': [4000.234838255152, 432.7851962033708, 7.228562320688557], + 'T_1': [500.029354781894, 66.38791681901395, 3.6142811603442784], + 'T_2': [1000.058709563788, 108.1962990508427, 1.8071405801721392], + 'T_3': [1500.0880643456821, 150.00468128267144, 0.0], + } +), +'area_if_undiluted': +pd.DataFrame( + index=pd.Index(['C-aliph', 'C-arom', 'alcohol'], name='area_if_undiluted'), + data={ + 'S_1': [2000.117419127576, 216.3925981016854, 3.6142811603442784], + 'S_2': [4000.234838255152, 432.7851962033708, 7.228562320688557], + 'T_1': [500.029354781894, 66.38791681901395, 3.6142811603442784], + 'T_2': [1000.058709563788, 108.1962990508427, 1.8071405801721392], + 'T_3': [1500.0880643456821, 150.00468128267144, 0.0], + } +), +'conc_vial_mg_L': +pd.DataFrame( + index=pd.Index(['C-arom', 'C-aliph', 'alcohol'], name='conc_vial_mg_L'), + data={ + 'S_1': [7.277396112495934, 4.000234838255153, 0.7228562320688556], + 'S_2': [14.554792224991864, 8.000469676510306, 1.4457124641377113], + 'T_1': [4.277302486842505, 1.0000587095637894, 0.7228562320688556], + 'T_2': [3.638698056247968, 2.0001174191275775, 0.36142811603442787], + 'T_3': [3.00009362565343, 3.0001761286913657, 8.332965346087379e-17], + } +), +'conc_vial_if_undiluted_mg_L': +pd.DataFrame( + index=pd.Index(['C-arom', 'C-aliph', 'alcohol'], name='conc_vial_if_undiluted_mg_L'), + data={ + 'S_1': [7.277396112495934, 4.000234838255153, 0.7228562320688556], + 'S_2': [14.554792224991864, 8.000469676510306, 1.4457124641377113], + 'T_1': [4.277302486842505, 1.0000587095637894, 0.7228562320688556], + 'T_2': [3.638698056247968, 2.0001174191275775, 0.36142811603442787], + 'T_3': [3.00009362565343, 3.0001761286913657, 8.332965346087379e-17], + } +), +'fraction_of_sample_fr': +pd.DataFrame( + index=pd.Index(['C-arom', 'C-aliph', 'alcohol'], name='fraction_of_sample_fr'), + data={ + 'S_1': [7.277396112495934, 4.000234838255153, 0.7228562320688556], + 'S_2': [14.554792224991864, 8.000469676510306, 1.4457124641377113], + 'T_1': [4.277302486842505, 1.0000587095637894, 0.7228562320688556], + 'T_2': [3.638698056247968, 2.0001174191275775, 0.36142811603442787], + 'T_3': [3.00009362565343, 3.0001761286913657, 8.332965346087379e-17], + } +), +'fraction_of_feedstock_fr': +pd.DataFrame( + index=pd.Index(['C-arom', 'C-aliph', 'alcohol'], name='fraction_of_feedstock_fr'), + data={ + 'S_1': [7.277396112495934, 4.000234838255153, 0.7228562320688556], + 'S_2': [14.554792224991864, 8.000469676510306, 1.4457124641377113], + 'T_1': [4.277302486842505, 1.0000587095637894, 0.7228562320688556], + 'T_2': [3.638698056247968, 2.0001174191275775, 0.36142811603442787], + 'T_3': [3.00009362565343, 3.0001761286913657, 8.332965346087379e-17], + } +) + + } + return reports + +@pytest.fixture +def checked_samples_param_reports(): + reports = { + 'height': +pd.DataFrame( + index=pd.Index(['dodecane', 'naphthalene', 'phenol'], name='height'), + data={ + 'S': [3000.0, 300.0, 30.0], + 'T': [1000.0, 400.0, 10.0], + } +), +'area': +pd.DataFrame( + index=pd.Index(['dodecane', 'naphthalene', 'phenol'], name='area'), + data={ + 'S': [3000.0, 300.0, 30.0], + 'T': [1000.0, 100.0, 10.0], + } +), +'area_if_undiluted': +pd.DataFrame( + index=pd.Index(['dodecane', 'naphthalene', 'phenol'], name='area_if_undiluted'), + data={ + 'S': [3000.0, 300.0, 30.0], + 'T': [1000.0, 100.0, 10.0], + } +), +'conc_vial_mg_L': +pd.DataFrame( + index=pd.Index(['dodecane', 'phenol', 'naphthalene'], name='conc_vial_mg_L'), + data={ + 'S': [6.000000000000002, 6.0, 6.0], + 'T': [2.0000000000000013, 2.0000000000000004, 2.000000000000001], + } +), +'conc_vial_if_undiluted_mg_L': +pd.DataFrame( + index=pd.Index(['dodecane', 'phenol', 'naphthalene'], name='conc_vial_if_undiluted_mg_L'), + data={ + 'S': [6.000000000000002, 6.0, 6.0], + 'T': [2.0000000000000013, 2.0000000000000004, 2.000000000000001], + } +), +'fraction_of_sample_fr': +pd.DataFrame( + index=pd.Index(['dodecane', 'phenol', 'naphthalene'], name='fraction_of_sample_fr'), + data={ + 'S': [6.000000000000002, 6.0, 6.0], + 'T': [2.0000000000000013, 2.0000000000000004, 2.000000000000001], + } +), +'fraction_of_feedstock_fr': +pd.DataFrame( + index=pd.Index(['dodecane', 'phenol', 'naphthalene'], name='fraction_of_feedstock_fr'), + data={ + 'S': [6.000000000000002, 6.0, 6.0], + 'T': [2.0000000000000013, 2.0000000000000004, 2.000000000000001], + } +) + } + return reports + +@pytest.fixture +def checked_samples_param_reports_std(): + reports = { + 'height': +pd.DataFrame( + index=pd.Index(['dodecane', 'naphthalene', 'phenol'], name='height'), + data={ + 'S': [1414.213562373095, 141.4213562373095, 14.142135623730951], + 'T': [500.0, 522.0153254455275, 10.0], + } +), +'area': +pd.DataFrame( + index=pd.Index(['dodecane', 'naphthalene', 'phenol'], name='area'), + data={ + 'S': [1414.213562373095, 141.4213562373095, 14.142135623730951], + 'T': [500.0, 50.0, 10.0], + } +), +'area_if_undiluted': +pd.DataFrame( + index=pd.Index(['dodecane', 'naphthalene', 'phenol'], name='area_if_undiluted'), + data={ + 'S': [1414.213562373095, 141.4213562373095, 14.142135623730951], + 'T': [500.0, 50.0, 10.0], + } +), +'conc_vial_mg_L': +pd.DataFrame( + index=pd.Index(['dodecane', 'phenol', 'naphthalene'], name='conc_vial_mg_L'), + data={ + 'S': [2.8284271247461903, 2.8284271247461903, 2.82842712474619], + 'T': [1.0, 1.9999999999999996, 1.0], + } +), +'conc_vial_if_undiluted_mg_L': +pd.DataFrame( + index=pd.Index(['dodecane', 'phenol', 'naphthalene'], name='conc_vial_if_undiluted_mg_L'), + data={ + 'S': [2.8284271247461903, 2.8284271247461903, 2.82842712474619], + 'T': [1.0, 1.9999999999999996, 1.0], + } +), +'fraction_of_sample_fr': +pd.DataFrame( + index=pd.Index(['dodecane', 'phenol', 'naphthalene'], name='fraction_of_sample_fr'), + data={ + 'S': [2.8284271247461903, 2.8284271247461903, 2.82842712474619], + 'T': [1.0, 1.9999999999999996, 1.0], + } +), +'fraction_of_feedstock_fr': +pd.DataFrame( + index=pd.Index(['dodecane', 'phenol', 'naphthalene'], name='fraction_of_feedstock_fr'), + data={ + 'S': [2.8284271247461903, 2.8284271247461903, 2.82842712474619], + 'T': [1.0, 1.9999999999999996, 1.0], + } +), + } + return reports + +@pytest.fixture +def checked_samples_param_aggrreps(): + reports = { + 'height': +pd.DataFrame( + index=pd.Index(['C-aliph', 'C-arom', 'alcohol'], name='height'), + data={ + 'S': [3000.1761286913643, 324.5888971525281, 5.421421740516418], + 'T': [1000.058709563788, 408.2056616161856, 1.8071405801721392], + } +), +'area': +pd.DataFrame( + index=pd.Index(['C-aliph', 'C-arom', 'alcohol'], name='area'), + data={ + 'S': [3000.1761286913643, 324.5888971525281, 5.421421740516418], + 'T': [1000.058709563788, 108.1962990508427, 1.8071405801721392], + } +), +'area_if_undiluted': +pd.DataFrame( + index=pd.Index(['C-aliph', 'C-arom', 'alcohol'], name='area_if_undiluted'), + data={ + 'S': [3000.1761286913643, 324.5888971525281, 5.421421740516418], + 'T': [1000.058709563788, 108.1962990508427, 1.8071405801721392], + } +), +'conc_vial_mg_L': +pd.DataFrame( + index=pd.Index(['C-arom', 'C-aliph', 'alcohol'], name='conc_vial_mg_L'), + data={ + 'S': [10.9160941687439, 6.00035225738273, 1.0842843481032833], + 'T': [3.638698056247968, 2.0001174191275775, 0.36142811603442787], + } +), +'conc_vial_if_undiluted_mg_L': +pd.DataFrame( + index=pd.Index(['C-arom', 'C-aliph', 'alcohol'], name='conc_vial_if_undiluted_mg_L'), + data={ + 'S': [10.9160941687439, 6.00035225738273, 1.0842843481032833], + 'T': [3.638698056247968, 2.0001174191275775, 0.36142811603442787], + } +), +'fraction_of_sample_fr': +pd.DataFrame( + index=pd.Index(['C-arom', 'C-aliph', 'alcohol'], name='fraction_of_sample_fr'), + data={ + 'S': [10.9160941687439, 6.00035225738273, 1.0842843481032833], + 'T': [3.638698056247968, 2.0001174191275775, 0.36142811603442787], + } +), +'fraction_of_feedstock_fr': +pd.DataFrame( + index=pd.Index(['C-arom', 'C-aliph', 'alcohol'], name='fraction_of_feedstock_fr'), + data={ + 'S': [10.9160941687439, 6.00035225738273, 1.0842843481032833], + 'T': [3.638698056247968, 2.0001174191275775, 0.36142811603442787], + } +), + } + return reports + +@pytest.fixture +def checked_samples_param_aggrreps_std(): + reports = { + 'height': +pd.DataFrame( + index=pd.Index(['C-aliph', 'C-arom', 'alcohol'], name='height'), + data={ + 'S': [1414.2965902344451, 153.01267351627698, 2.5556827175942227], + 'T': [500.029354781894, 521.310649906016, 1.8071405801721392], + } +), +'area': +pd.DataFrame( + index=pd.Index(['C-aliph', 'C-arom', 'alcohol'], name='area'), + data={ + 'S': [1414.2965902344451, 153.01267351627698, 2.5556827175942227], + 'T': [500.029354781894, 41.808382231828745, 1.8071405801721392], + } +), +'area_if_undiluted': +pd.DataFrame( + index=pd.Index(['C-aliph', 'C-arom', 'alcohol'], name='area_if_undiluted'), + data={ + 'S': [1414.2965902344451, 153.01267351627698, 2.5556827175942227], + 'T': [500.029354781894, 41.808382231828745, 1.8071405801721392], + } +), +'conc_vial_mg_L': +pd.DataFrame( + index=pd.Index(['C-arom', 'C-aliph', 'alcohol'], name='conc_vial_mg_L'), + data={ + 'S': [5.1458961405264905, 2.8285931804688906, 0.5111365435188446], + 'T': [0.6386044305945372, 1.0000587095637883, 0.3614281160344278], + } +), +'conc_vial_if_undiluted_mg_L': +pd.DataFrame( + index=pd.Index(['C-arom', 'C-aliph', 'alcohol'], name='conc_vial_if_undiluted_mg_L'), + data={ + 'S': [5.1458961405264905, 2.8285931804688906, 0.5111365435188446], + 'T': [0.6386044305945372, 1.0000587095637883, 0.3614281160344278], + } +), +'fraction_of_sample_fr': +pd.DataFrame( + index=pd.Index(['C-arom', 'C-aliph', 'alcohol'], name='fraction_of_sample_fr'), + data={ + 'S': [5.1458961405264905, 2.8285931804688906, 0.5111365435188446], + 'T': [0.6386044305945372, 1.0000587095637883, 0.3614281160344278], + } +), +'fraction_of_feedstock_fr': +pd.DataFrame( + index=pd.Index(['C-arom', 'C-aliph', 'alcohol'], name='fraction_of_feedstock_fr'), + data={ + 'S': [5.1458961405264905, 2.8285931804688906, 0.5111365435188446], + 'T': [0.6386044305945372, 1.0000587095637883, 0.3614281160344278], + } +), + } + return reports + + # Project class testing # @pytest.fixture # def gcms() -> Project: @@ -217,9 +801,6 @@ def checked_load_calibrations(): # return Project() -# fmt: off - - # @pytest.fixture # def checked_files_info(): # files_info = pd.DataFrame( @@ -377,33 +958,33 @@ def checked_load_calibrations(): # } # return calibrations -@pytest.fixture -def checked_is_calibrations_deriv(): - is_calibrations_deriv = {'calibration': False, 'deriv_calibration': True} - return is_calibrations_deriv +# @pytest.fixture +# def checked_is_calibrations_deriv(): +# is_calibrations_deriv = {'calibration': False, 'deriv_calibration': True} +# return is_calibrations_deriv -@pytest.fixture -def checked_list_of_all_compounds(): - list_of_all_compounds = ['tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', - '9,12-octadecadienoic acid (z,z)-', 'oleic acid', - 'n-decanoic acid', '2-butanone', '2-cyclopenten-1-one, 2-methyl-', - 'trans-2-pentenoic acid', '2,5-hexanedione', - '1-hexene, 4,5-dimethyl-', 'phenol', - '2-methylcyclopent-2-en-1-one', '2,4,5-trichlorophenol', 'hexadecanoic acid', - '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid' - ] - return list_of_all_compounds +# @pytest.fixture +# def checked_list_of_all_compounds(): +# list_of_all_compounds = ['tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', +# '9,12-octadecadienoic acid (z,z)-', 'oleic acid', +# 'n-decanoic acid', '2-butanone', '2-cyclopenten-1-one, 2-methyl-', +# 'trans-2-pentenoic acid', '2,5-hexanedione', +# '1-hexene, 4,5-dimethyl-', 'phenol', +# '2-methylcyclopent-2-en-1-one', '2,4,5-trichlorophenol', 'hexadecanoic acid', +# '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid' +# ] +# return list_of_all_compounds -@pytest.fixture -def checked_list_of_all_deriv_compounds(): - list_of_all_deriv_compounds = ['myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', - 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', - '9-octadecenoic acid, (z)-, tms derivative', 'benzoic acid, deriv.', - 'hexadecanoic acid, deriv.', '(9z,12z)-octadeca-9,12-dienoic acid, deriv.', - '9-octadecenoic acid, (e)-, deriv.', 'phenol, deriv.', - '4-oxopentanoic acid, deriv.', 'benzene-1,2-diol, deriv.' - ] - return list_of_all_deriv_compounds +# @pytest.fixture +# def checked_list_of_all_deriv_compounds(): +# list_of_all_deriv_compounds = ['myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', +# 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', +# '9-octadecenoic acid, (z)-, tms derivative', 'benzoic acid, deriv.', +# 'hexadecanoic acid, deriv.', '(9z,12z)-octadeca-9,12-dienoic acid, deriv.', +# '9-octadecenoic acid, (e)-, deriv.', 'phenol, deriv.', +# '4-oxopentanoic acid, deriv.', 'benzene-1,2-diol, deriv.' +# ] +# return list_of_all_deriv_compounds # @pytest.fixture # def checked_compounds_properties(): @@ -444,847 +1025,847 @@ def checked_list_of_all_deriv_compounds(): # ) # return compounds_properties -@pytest.fixture -def checked_deriv_compounds_properties(): - deriv_compounds_properties = pd.DataFrame( - index=pd.Index(['myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative', 'benzoic acid, deriv.', 'hexadecanoic acid, deriv.', '(9z,12z)-octadeca-9,12-dienoic acid, deriv.', '9-octadecenoic acid, (e)-, deriv.', 'phenol, deriv.', '4-oxopentanoic acid, deriv.', 'benzene-1,2-diol, deriv.'], name='comp_name'), - data={ - 'iupac_name': ['tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid', 'benzoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(e)-octadec-9-enoic acid', 'phenol', '4-oxopentanoic acid', 'benzene-1,2-diol'], - 'molecular_formula': ['C14H28O2', 'C16H30O2', 'C16H32O2', 'C18H32O2', 'C18H34O2', 'C7H6O2', 'C16H32O2', 'C18H32O2', 'C18H34O2', 'C6H6O', 'C5H8O3', 'C6H6O2'], - 'canonical_smiles': ['CCCCCCCCCCCCCC(=O)O', 'CCCCCCC=CCCCCCCCC(=O)O', 'CCCCCCCCCCCCCCCC(=O)O', 'CCCCCC=CCC=CCCCCCCCC(=O)O', 'CCCCCCCCC=CCCCCCCCC(=O)O', 'C1=CC=C(C=C1)C(=O)O', 'CCCCCCCCCCCCCCCC(=O)O', 'CCCCCC=CCC=CCCCCCCCC(=O)O', 'CCCCCCCCC=CCCCCCCCC(=O)O', 'C1=CC=C(C=C1)O', 'CC(=O)CCC(=O)O', 'C1=CC=C(C(=C1)O)O'], - 'molecular_weight': [228.37, 254.41, 256.42, 280.4, 282.5, 122.12, 256.42, 280.4, 282.5, 94.11, 116.11, 110.11], - 'xlogp': [5.3, 6.4, 6.4, 6.8, 6.5, 1.9, 6.4, 6.8, 6.5, 1.5, -0.5, 0.9], - 'underiv_comp_name': ['myristic acid', 'palmitelaidic acid', 'palmitic acid', '9,12-octadecadienoic acid (z,z)-', '9-octadecenoic acid, (z)-', 'benzoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '9-octadecenoic acid, (e)-', 'phenol', '4-oxopentanoic acid', 'benzene-1,2-diol'], - 'el_C': [14, 16, 16, 18, 18, 7, 16, 18, 18, 6, 5, 6], - 'el_H': [28, 30, 32, 32, 34, 6, 32, 32, 34, 6, 8, 6], - 'el_O': [2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 3, 2], - 'el_mf_C': [0.7363226343214958, 0.7553791124562713, 0.7494579205990172, 0.7710342368045648, 0.7653026548672566, 0.6884785456927611, 0.7494579205990172, 0.7710342368045648, 0.7653026548672566, 0.765763468281798, 0.5172250452157436, 0.6544909635818728], - 'el_mf_H': [0.12358891272934273, 0.11886325223065132, 0.12579361984244597, 0.11503566333808846, 0.12131681415929203, 0.04952505732066819, 0.12579361984244597, 0.11503566333808846, 0.12131681415929203, 0.06426522154925088, 0.06945138230987856, 0.054926891290527656], - 'el_mf_O': [0.1401147261023777, 0.12577335796548877, 0.12478745807659308, 0.11411554921540658, 0.11326725663716815, 0.26202096298722566, 0.12478745807659308, 0.11411554921540658, 0.11326725663716815, 0.17000318775900541, 0.413375247610025, 0.29060030878212695], - 'fg_C-aliph': [13, 15, 15, 17, 17, 0, 15, 17, 17, 0, 1, 0], - 'fg_C-arom': [0, 0, 0, 0, 0, 6, 0, 0, 0, 6, 0, 6], - 'fg_alcohol': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2], - 'fg_carboxyl': [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0], - 'fg_ketone': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], - 'fg_mf_C-aliph': [0.8029031834303979, 0.8230690617507173, 0.8244793697839481, 0.8396398002853066, 0.8405345132743363, 0.0, 0.8244793697839481, 0.8396398002853066, 0.8405345132743363, 0.0, 0.12080785462061837, 0.0], - 'fg_mf_C-arom': [0.0, 0.0, 0.0, 0.0, 0.0, 0.6313953488372093, 0.0, 0.0, 0.0, 0.8193178195728402, 0.0, 0.6911088911088911], - 'fg_mf_alcohol': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1807140580172139, 0.0, 0.3089092725456362], - 'fg_mf_carboxyl': [0.19712308972281825, 0.17694666090169414, 0.1755596287341081, 0.16054564907275323, 0.15935221238938055, 0.3686292171634458, 0.1755596287341081, 0.16054564907275323, 0.15935221238938055, 0.0, 0.3877099302385669, 0.0], - 'fg_mf_ketone': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.491533890276462, 0.0], - 'fg_mf_total': [1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273], - } - ) - return deriv_compounds_properties +# @pytest.fixture +# def checked_deriv_compounds_properties(): +# deriv_compounds_properties = pd.DataFrame( +# index=pd.Index(['myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative', 'benzoic acid, deriv.', 'hexadecanoic acid, deriv.', '(9z,12z)-octadeca-9,12-dienoic acid, deriv.', '9-octadecenoic acid, (e)-, deriv.', 'phenol, deriv.', '4-oxopentanoic acid, deriv.', 'benzene-1,2-diol, deriv.'], name='comp_name'), +# data={ +# 'iupac_name': ['tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid', 'benzoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(e)-octadec-9-enoic acid', 'phenol', '4-oxopentanoic acid', 'benzene-1,2-diol'], +# 'molecular_formula': ['C14H28O2', 'C16H30O2', 'C16H32O2', 'C18H32O2', 'C18H34O2', 'C7H6O2', 'C16H32O2', 'C18H32O2', 'C18H34O2', 'C6H6O', 'C5H8O3', 'C6H6O2'], +# 'canonical_smiles': ['CCCCCCCCCCCCCC(=O)O', 'CCCCCCC=CCCCCCCCC(=O)O', 'CCCCCCCCCCCCCCCC(=O)O', 'CCCCCC=CCC=CCCCCCCCC(=O)O', 'CCCCCCCCC=CCCCCCCCC(=O)O', 'C1=CC=C(C=C1)C(=O)O', 'CCCCCCCCCCCCCCCC(=O)O', 'CCCCCC=CCC=CCCCCCCCC(=O)O', 'CCCCCCCCC=CCCCCCCCC(=O)O', 'C1=CC=C(C=C1)O', 'CC(=O)CCC(=O)O', 'C1=CC=C(C(=C1)O)O'], +# 'molecular_weight': [228.37, 254.41, 256.42, 280.4, 282.5, 122.12, 256.42, 280.4, 282.5, 94.11, 116.11, 110.11], +# 'xlogp': [5.3, 6.4, 6.4, 6.8, 6.5, 1.9, 6.4, 6.8, 6.5, 1.5, -0.5, 0.9], +# 'underiv_comp_name': ['myristic acid', 'palmitelaidic acid', 'palmitic acid', '9,12-octadecadienoic acid (z,z)-', '9-octadecenoic acid, (z)-', 'benzoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '9-octadecenoic acid, (e)-', 'phenol', '4-oxopentanoic acid', 'benzene-1,2-diol'], +# 'el_C': [14, 16, 16, 18, 18, 7, 16, 18, 18, 6, 5, 6], +# 'el_H': [28, 30, 32, 32, 34, 6, 32, 32, 34, 6, 8, 6], +# 'el_O': [2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 3, 2], +# 'el_mf_C': [0.7363226343214958, 0.7553791124562713, 0.7494579205990172, 0.7710342368045648, 0.7653026548672566, 0.6884785456927611, 0.7494579205990172, 0.7710342368045648, 0.7653026548672566, 0.765763468281798, 0.5172250452157436, 0.6544909635818728], +# 'el_mf_H': [0.12358891272934273, 0.11886325223065132, 0.12579361984244597, 0.11503566333808846, 0.12131681415929203, 0.04952505732066819, 0.12579361984244597, 0.11503566333808846, 0.12131681415929203, 0.06426522154925088, 0.06945138230987856, 0.054926891290527656], +# 'el_mf_O': [0.1401147261023777, 0.12577335796548877, 0.12478745807659308, 0.11411554921540658, 0.11326725663716815, 0.26202096298722566, 0.12478745807659308, 0.11411554921540658, 0.11326725663716815, 0.17000318775900541, 0.413375247610025, 0.29060030878212695], +# 'fg_C-aliph': [13, 15, 15, 17, 17, 0, 15, 17, 17, 0, 1, 0], +# 'fg_C-arom': [0, 0, 0, 0, 0, 6, 0, 0, 0, 6, 0, 6], +# 'fg_alcohol': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2], +# 'fg_carboxyl': [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0], +# 'fg_ketone': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], +# 'fg_mf_C-aliph': [0.8029031834303979, 0.8230690617507173, 0.8244793697839481, 0.8396398002853066, 0.8405345132743363, 0.0, 0.8244793697839481, 0.8396398002853066, 0.8405345132743363, 0.0, 0.12080785462061837, 0.0], +# 'fg_mf_C-arom': [0.0, 0.0, 0.0, 0.0, 0.0, 0.6313953488372093, 0.0, 0.0, 0.0, 0.8193178195728402, 0.0, 0.6911088911088911], +# 'fg_mf_alcohol': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1807140580172139, 0.0, 0.3089092725456362], +# 'fg_mf_carboxyl': [0.19712308972281825, 0.17694666090169414, 0.1755596287341081, 0.16054564907275323, 0.15935221238938055, 0.3686292171634458, 0.1755596287341081, 0.16054564907275323, 0.15935221238938055, 0.0, 0.3877099302385669, 0.0], +# 'fg_mf_ketone': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.491533890276462, 0.0], +# 'fg_mf_total': [1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273, 1.0000181636545273], +# } +# ) +# return deriv_compounds_properties -@pytest.fixture -def checked_calibrations_added_iupac_only_iupac_and_mw(): - calibrations = { - 'calibration': pd.DataFrame( - index=pd.Index(['phenol', '2-methylcyclopent-2-en-1-one', '2,4,5-trichlorophenol', 'tetradecanoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], name='comp_name'), - data={ - 'iupac_name': ['phenol', '2-methylcyclopent-2-en-1-one', '2,4,5-trichlorophenol', 'tetradecanoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], - 'MW': [94.11, 96.1271, 197.4, 228.3709, 256.4241, 280.4455, 282.4614], - }), - 'deriv_calibration': pd.DataFrame( - index=pd.Index(['benzoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '9-octadecenoic acid, (e)-', 'phenol', '4-oxopentanoic acid', 'benzene-1,2-diol'], name='comp_name'), - data={ - 'iupac_name': ['benzoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(e)-octadec-9-enoic acid', 'phenol', '4-oxopentanoic acid', 'benzene-1,2-diol'], - 'MW': [122.1213, 256.4241, 280.4455, 282.4614, 94.1112, 116.1152, 110.1106], - }) - } - return calibrations +# @pytest.fixture +# def checked_calibrations_added_iupac_only_iupac_and_mw(): +# calibrations = { +# 'calibration': pd.DataFrame( +# index=pd.Index(['phenol', '2-methylcyclopent-2-en-1-one', '2,4,5-trichlorophenol', 'tetradecanoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], name='comp_name'), +# data={ +# 'iupac_name': ['phenol', '2-methylcyclopent-2-en-1-one', '2,4,5-trichlorophenol', 'tetradecanoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], +# 'MW': [94.11, 96.1271, 197.4, 228.3709, 256.4241, 280.4455, 282.4614], +# }), +# 'deriv_calibration': pd.DataFrame( +# index=pd.Index(['benzoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '9-octadecenoic acid, (e)-', 'phenol', '4-oxopentanoic acid', 'benzene-1,2-diol'], name='comp_name'), +# data={ +# 'iupac_name': ['benzoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(e)-octadec-9-enoic acid', 'phenol', '4-oxopentanoic acid', 'benzene-1,2-diol'], +# 'MW': [122.1213, 256.4241, 280.4455, 282.4614, 94.1112, 116.1152, 110.1106], +# }) +# } +# return calibrations -@pytest.fixture -def checked_files_added_iupac_only_iupac_and_time(): - files = { - 'A_1': pd.DataFrame( - index=pd.Index(['unidentified', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A_1'), - data={ - 'iupac_name': ['unidentified', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], - 'retention_time': [6.025, 36.163, 40.052, 40.492, 43.847, 43.986], - }), - 'A_2': pd.DataFrame( - index=pd.Index(['unidentified', 'n-decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A_2'), - data={ - 'iupac_name': ['unidentified', 'decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], - 'retention_time': [6.025, 26.284, 36.158, 40.041, 40.494, 43.847, 43.988], - }), - 'Ader_1': pd.DataFrame( - index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader_1'), - data={ - 'iupac_name': ['unidentified', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], - 'retention_time': [6.027, 38.123, 41.729, 42.157, 45.253, 45.369], - }), - 'Ader_2': pd.DataFrame( - index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader_2'), - data={ - 'iupac_name': ['unidentified', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], - 'retention_time': [6.027, 38.125, 41.744, 42.161, 45.258, 45.37], - }), - 'B_1': pd.DataFrame( - index=pd.Index(['2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', '1-hexene, 4,5-dimethyl-', 'phenol'], name='B_1'), - data={ - 'iupac_name': ['butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', 'hexane-2,5-dione', '4,5-dimethylhex-1-ene', 'phenol'], - 'retention_time': [8.527, 10.507, 11.071, 11.486, 12.214, 13.687], - }), - 'B_2': pd.DataFrame( - index=pd.Index(['2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', 'phenol'], name='B_2'), - data={ - 'iupac_name': ['butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', 'hexane-2,5-dione', 'phenol'], - 'retention_time': [8.502, 10.474, 11.027, 11.456, 13.661], - }) - } - return files +# @pytest.fixture +# def checked_files_added_iupac_only_iupac_and_time(): +# files = { +# 'A_1': pd.DataFrame( +# index=pd.Index(['unidentified', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A_1'), +# data={ +# 'iupac_name': ['unidentified', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], +# 'retention_time': [6.025, 36.163, 40.052, 40.492, 43.847, 43.986], +# }), +# 'A_2': pd.DataFrame( +# index=pd.Index(['unidentified', 'n-decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A_2'), +# data={ +# 'iupac_name': ['unidentified', 'decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], +# 'retention_time': [6.025, 26.284, 36.158, 40.041, 40.494, 43.847, 43.988], +# }), +# 'Ader_1': pd.DataFrame( +# index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader_1'), +# data={ +# 'iupac_name': ['unidentified', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], +# 'retention_time': [6.027, 38.123, 41.729, 42.157, 45.253, 45.369], +# }), +# 'Ader_2': pd.DataFrame( +# index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader_2'), +# data={ +# 'iupac_name': ['unidentified', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], +# 'retention_time': [6.027, 38.125, 41.744, 42.161, 45.258, 45.37], +# }), +# 'B_1': pd.DataFrame( +# index=pd.Index(['2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', '1-hexene, 4,5-dimethyl-', 'phenol'], name='B_1'), +# data={ +# 'iupac_name': ['butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', 'hexane-2,5-dione', '4,5-dimethylhex-1-ene', 'phenol'], +# 'retention_time': [8.527, 10.507, 11.071, 11.486, 12.214, 13.687], +# }), +# 'B_2': pd.DataFrame( +# index=pd.Index(['2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', 'phenol'], name='B_2'), +# data={ +# 'iupac_name': ['butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', 'hexane-2,5-dione', 'phenol'], +# 'retention_time': [8.502, 10.474, 11.027, 11.456, 13.661], +# }) +# } +# return files -@pytest.fixture -def checked_files_applied_calibration(): - files = { - 'A_1': pd.DataFrame( - index=pd.Index(['unidentified', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A_1'), - data={ - 'iupac_name': ['unidentified', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], - 'retention_time': [6.025, 36.163, 40.052, 40.492, 43.847, 43.986], - 'area': [23386, 44389, 15068, 1878180, 1456119, 6379752], - 'height': [24797, 15019, 5705, 493759, 339605, 1147599], - 'area_if_undiluted': [584650, 1109725, 376700, 46954500, 36402975, 159493800], - 'conc_vial_mg_L': [np.nan, 23.581503644987627, np.nan, 66.05436178187291, 131.18800047103497, 113.61850020825628], - 'conc_vial_if_undiluted_mg_L': [np.nan, 589.5375911246907, np.nan, 1651.3590445468228, 3279.7000117758744, 2840.462505206407], - 'fraction_of_sample_fr': [np.nan, 0.042109827937477896, np.nan, 0.11795421746763018, 0.23426428655541953, 0.20289017894331474], - 'fraction_of_feedstock_fr': [np.nan, 0.018949422571865052, np.nan, 0.053079397860433586, 0.10541892894993879, 0.09130058052449164], - 'compound_used_for_calibration': ['n.a.', 'self', 'n.a.', 'self', 'self', 'self'], - }), - 'A_2': pd.DataFrame( - index=pd.Index(['unidentified', 'n-decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A_2'), - data={ - 'iupac_name': ['unidentified', 'decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], - 'retention_time': [6.025, 26.284, 36.158, 40.041, 40.494, 43.847, 43.988], - 'area': [25493, 10952, 50650, 21294, 1656756, 1371069, 6394708], - 'height': [25716, 4259, 14520, 6739, 461942, 324690, 1138647], - 'area_if_undiluted': [637325, 273800, 1266250, 532350, 41418900, 34276725, 159867700], - 'conc_vial_mg_L': [np.nan, 22.78427785050836, 23.730782309318595, np.nan, 61.11672684588226, 125.38077898437679, 113.82730072166243], - 'conc_vial_if_undiluted_mg_L': [np.nan, 569.606946262709, 593.2695577329649, np.nan, 1527.9181711470565, 3134.51947460942, 2845.682518041561], - 'fraction_of_sample_fr': [np.nan, 0.04068621044733635, 0.04237639698092605, np.nan, 0.10913701222478973, 0.2238942481863871, 0.20326303700296858], - 'fraction_of_feedstock_fr': [np.nan, 0.018715656805774722, 0.019493142611225985, np.nan, 0.05020302562340328, 0.10299135416573807, 0.09350099702136555], - 'compound_used_for_calibration': ['n.a.', 'tetradecanoic acid (sim=1.0; dwt=56)', 'self', 'n.a.', 'self', 'self', 'self'], - }), - 'Ader_1': pd.DataFrame( - index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader_1'), - data={ - 'iupac_name': ['unidentified', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], - 'retention_time': [6.027, 38.123, 41.729, 42.157, 45.253, 45.369], - 'area': [16741, 49508, 27798, 1415205, 519476, 1724814], - 'height': [13451, 18415, 9132, 484890, 180850, 501749], - 'area_if_undiluted': [2092625, 6188500, 3474750, 176900625, 64934500, 215601750], - 'conc_vial_mg_L': [np.nan, 0.600983241036704, 2.5980281295127825, 27.623189632994073, 31.36776718294773, 21.669084708496513], - 'conc_vial_if_undiluted_mg_L': [np.nan, 75.12290512958799, 324.7535161890978, 3452.898704124259, 3920.970897868466, 2708.635588562064], - 'fraction_of_sample_fr': [np.nan, 0.005365921794970571, 0.023196679727792702, 0.24663562172316136, 0.2800693498477476, 0.193473970611576], - 'fraction_of_feedstock_fr': [np.nan, 0.0025219832436361683, 0.01090243947206257, 0.11591874220988584, 0.13163259442844139, 0.09093276618744071], - 'compound_used_for_calibration': ['n.a.', 'hexadecanoic acid (sim=1.0; dwt=28)', '(e)-octadec-9-enoic acid (sim=1.0; dwt=28)', 'self', 'self', '(e)-octadec-9-enoic acid (sim=1.0; dwt=0)'], - }), - 'Ader_2': pd.DataFrame( - index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader_2'), - data={ - 'iupac_name': ['unidentified', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], - 'retention_time': [6.027, 38.125, 41.744, 42.161, 45.258, 45.37], - 'area': [14698, 53613, 25213, 1402990, 605137, 1956560], - 'height': [12802, 18373, 8775, 496504, 202599, 594688], - 'area_if_undiluted': [1837250, 6701625, 3151625, 175373750, 75642125, 244570000], - 'conc_vial_mg_L': [np.nan, 0.6822063507301317, 2.5689779135709925, 27.38149894239597, 36.81298755438084, 24.27344499617392], - 'conc_vial_if_undiluted_mg_L': [np.nan, 85.27579384126646, 321.12223919637404, 3422.6873677994963, 4601.623444297605, 3034.1806245217404], - 'fraction_of_sample_fr': [np.nan, 0.006091128131519033, 0.022937302799741006, 0.24447766912853547, 0.3286873888784004, 0.21672718746583858], - 'fraction_of_feedstock_fr': [np.nan, 0.0029237415031291357, 0.011009905343875682, 0.11734928118169702, 0.1577699466616322, 0.10402904998360252], - 'compound_used_for_calibration': ['n.a.', 'hexadecanoic acid (sim=1.0; dwt=28)', '(e)-octadec-9-enoic acid (sim=1.0; dwt=28)', 'self', 'self', '(e)-octadec-9-enoic acid (sim=1.0; dwt=0)'], - }), - 'B_1': pd.DataFrame( - index=pd.Index(['2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', '1-hexene, 4,5-dimethyl-', 'phenol'], name='B_1'), - data={ - 'iupac_name': ['butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', 'hexane-2,5-dione', '4,5-dimethylhex-1-ene', 'phenol'], - 'retention_time': [8.527, 10.507, 11.071, 11.486, 12.214, 13.687], - 'area': [147566, 69223, 40376, 441077, 19522, 200947], - 'height': [39393, 18515, 12132, 112797, 7194, 64421], - 'area_if_undiluted': [147566, 69223, 40376, 441077, 19522, 200947], - 'conc_vial_mg_L': [np.nan, 6.243800844792131, np.nan, np.nan, np.nan, 7.167230535550548], - 'conc_vial_if_undiluted_mg_L': [np.nan, 6.243800844792131, np.nan, np.nan, np.nan, 7.167230535550548], - 'fraction_of_sample_fr': [np.nan, 0.0022299288731400468, np.nan, np.nan, np.nan, 0.0025597251912680527], - 'fraction_of_feedstock_fr': [np.nan, 0.001092665147838623, np.nan, np.nan, np.nan, 0.0012542653437213457], - 'compound_used_for_calibration': ['n.a.', 'self', 'n.a.', 'n.a.', 'n.a.', 'self'], - }), - 'B_2': pd.DataFrame( - index=pd.Index(['2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', 'phenol'], name='B_2'), - data={ - 'iupac_name': ['butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', 'hexane-2,5-dione', 'phenol'], - 'retention_time': [8.502, 10.474, 11.027, 11.456, 13.661], - 'area': [181021, 64531, 35791, 472362, 228750], - 'height': [44551, 19823, 12737, 120142, 75153], - 'area_if_undiluted': [181021, 64531, 35791, 472362, 228750], - 'conc_vial_mg_L': [np.nan, 6.134683722446865, np.nan, np.nan, 7.884941445329839], - 'conc_vial_if_undiluted_mg_L': [np.nan, 6.134683722446865, np.nan, np.nan, 7.884941445329839], - 'fraction_of_sample_fr': [np.nan, 0.0021909584723024517, np.nan, np.nan, 0.002816050516189228], - 'fraction_of_feedstock_fr': [np.nan, 0.0010954792361512259, np.nan, np.nan, 0.001408025258094614], - 'compound_used_for_calibration': ['n.a.', 'self', 'n.a.', 'n.a.', 'self'], - }) - } - return files +# @pytest.fixture +# def checked_files_applied_calibration(): +# files = { +# 'A_1': pd.DataFrame( +# index=pd.Index(['unidentified', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A_1'), +# data={ +# 'iupac_name': ['unidentified', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], +# 'retention_time': [6.025, 36.163, 40.052, 40.492, 43.847, 43.986], +# 'area': [23386, 44389, 15068, 1878180, 1456119, 6379752], +# 'height': [24797, 15019, 5705, 493759, 339605, 1147599], +# 'area_if_undiluted': [584650, 1109725, 376700, 46954500, 36402975, 159493800], +# 'conc_vial_mg_L': [np.nan, 23.581503644987627, np.nan, 66.05436178187291, 131.18800047103497, 113.61850020825628], +# 'conc_vial_if_undiluted_mg_L': [np.nan, 589.5375911246907, np.nan, 1651.3590445468228, 3279.7000117758744, 2840.462505206407], +# 'fraction_of_sample_fr': [np.nan, 0.042109827937477896, np.nan, 0.11795421746763018, 0.23426428655541953, 0.20289017894331474], +# 'fraction_of_feedstock_fr': [np.nan, 0.018949422571865052, np.nan, 0.053079397860433586, 0.10541892894993879, 0.09130058052449164], +# 'compound_used_for_calibration': ['n.a.', 'self', 'n.a.', 'self', 'self', 'self'], +# }), +# 'A_2': pd.DataFrame( +# index=pd.Index(['unidentified', 'n-decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A_2'), +# data={ +# 'iupac_name': ['unidentified', 'decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], +# 'retention_time': [6.025, 26.284, 36.158, 40.041, 40.494, 43.847, 43.988], +# 'area': [25493, 10952, 50650, 21294, 1656756, 1371069, 6394708], +# 'height': [25716, 4259, 14520, 6739, 461942, 324690, 1138647], +# 'area_if_undiluted': [637325, 273800, 1266250, 532350, 41418900, 34276725, 159867700], +# 'conc_vial_mg_L': [np.nan, 22.78427785050836, 23.730782309318595, np.nan, 61.11672684588226, 125.38077898437679, 113.82730072166243], +# 'conc_vial_if_undiluted_mg_L': [np.nan, 569.606946262709, 593.2695577329649, np.nan, 1527.9181711470565, 3134.51947460942, 2845.682518041561], +# 'fraction_of_sample_fr': [np.nan, 0.04068621044733635, 0.04237639698092605, np.nan, 0.10913701222478973, 0.2238942481863871, 0.20326303700296858], +# 'fraction_of_feedstock_fr': [np.nan, 0.018715656805774722, 0.019493142611225985, np.nan, 0.05020302562340328, 0.10299135416573807, 0.09350099702136555], +# 'compound_used_for_calibration': ['n.a.', 'tetradecanoic acid (sim=1.0; dwt=56)', 'self', 'n.a.', 'self', 'self', 'self'], +# }), +# 'Ader_1': pd.DataFrame( +# index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader_1'), +# data={ +# 'iupac_name': ['unidentified', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], +# 'retention_time': [6.027, 38.123, 41.729, 42.157, 45.253, 45.369], +# 'area': [16741, 49508, 27798, 1415205, 519476, 1724814], +# 'height': [13451, 18415, 9132, 484890, 180850, 501749], +# 'area_if_undiluted': [2092625, 6188500, 3474750, 176900625, 64934500, 215601750], +# 'conc_vial_mg_L': [np.nan, 0.600983241036704, 2.5980281295127825, 27.623189632994073, 31.36776718294773, 21.669084708496513], +# 'conc_vial_if_undiluted_mg_L': [np.nan, 75.12290512958799, 324.7535161890978, 3452.898704124259, 3920.970897868466, 2708.635588562064], +# 'fraction_of_sample_fr': [np.nan, 0.005365921794970571, 0.023196679727792702, 0.24663562172316136, 0.2800693498477476, 0.193473970611576], +# 'fraction_of_feedstock_fr': [np.nan, 0.0025219832436361683, 0.01090243947206257, 0.11591874220988584, 0.13163259442844139, 0.09093276618744071], +# 'compound_used_for_calibration': ['n.a.', 'hexadecanoic acid (sim=1.0; dwt=28)', '(e)-octadec-9-enoic acid (sim=1.0; dwt=28)', 'self', 'self', '(e)-octadec-9-enoic acid (sim=1.0; dwt=0)'], +# }), +# 'Ader_2': pd.DataFrame( +# index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader_2'), +# data={ +# 'iupac_name': ['unidentified', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], +# 'retention_time': [6.027, 38.125, 41.744, 42.161, 45.258, 45.37], +# 'area': [14698, 53613, 25213, 1402990, 605137, 1956560], +# 'height': [12802, 18373, 8775, 496504, 202599, 594688], +# 'area_if_undiluted': [1837250, 6701625, 3151625, 175373750, 75642125, 244570000], +# 'conc_vial_mg_L': [np.nan, 0.6822063507301317, 2.5689779135709925, 27.38149894239597, 36.81298755438084, 24.27344499617392], +# 'conc_vial_if_undiluted_mg_L': [np.nan, 85.27579384126646, 321.12223919637404, 3422.6873677994963, 4601.623444297605, 3034.1806245217404], +# 'fraction_of_sample_fr': [np.nan, 0.006091128131519033, 0.022937302799741006, 0.24447766912853547, 0.3286873888784004, 0.21672718746583858], +# 'fraction_of_feedstock_fr': [np.nan, 0.0029237415031291357, 0.011009905343875682, 0.11734928118169702, 0.1577699466616322, 0.10402904998360252], +# 'compound_used_for_calibration': ['n.a.', 'hexadecanoic acid (sim=1.0; dwt=28)', '(e)-octadec-9-enoic acid (sim=1.0; dwt=28)', 'self', 'self', '(e)-octadec-9-enoic acid (sim=1.0; dwt=0)'], +# }), +# 'B_1': pd.DataFrame( +# index=pd.Index(['2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', '1-hexene, 4,5-dimethyl-', 'phenol'], name='B_1'), +# data={ +# 'iupac_name': ['butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', 'hexane-2,5-dione', '4,5-dimethylhex-1-ene', 'phenol'], +# 'retention_time': [8.527, 10.507, 11.071, 11.486, 12.214, 13.687], +# 'area': [147566, 69223, 40376, 441077, 19522, 200947], +# 'height': [39393, 18515, 12132, 112797, 7194, 64421], +# 'area_if_undiluted': [147566, 69223, 40376, 441077, 19522, 200947], +# 'conc_vial_mg_L': [np.nan, 6.243800844792131, np.nan, np.nan, np.nan, 7.167230535550548], +# 'conc_vial_if_undiluted_mg_L': [np.nan, 6.243800844792131, np.nan, np.nan, np.nan, 7.167230535550548], +# 'fraction_of_sample_fr': [np.nan, 0.0022299288731400468, np.nan, np.nan, np.nan, 0.0025597251912680527], +# 'fraction_of_feedstock_fr': [np.nan, 0.001092665147838623, np.nan, np.nan, np.nan, 0.0012542653437213457], +# 'compound_used_for_calibration': ['n.a.', 'self', 'n.a.', 'n.a.', 'n.a.', 'self'], +# }), +# 'B_2': pd.DataFrame( +# index=pd.Index(['2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', 'phenol'], name='B_2'), +# data={ +# 'iupac_name': ['butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', 'hexane-2,5-dione', 'phenol'], +# 'retention_time': [8.502, 10.474, 11.027, 11.456, 13.661], +# 'area': [181021, 64531, 35791, 472362, 228750], +# 'height': [44551, 19823, 12737, 120142, 75153], +# 'area_if_undiluted': [181021, 64531, 35791, 472362, 228750], +# 'conc_vial_mg_L': [np.nan, 6.134683722446865, np.nan, np.nan, 7.884941445329839], +# 'conc_vial_if_undiluted_mg_L': [np.nan, 6.134683722446865, np.nan, np.nan, 7.884941445329839], +# 'fraction_of_sample_fr': [np.nan, 0.0021909584723024517, np.nan, np.nan, 0.002816050516189228], +# 'fraction_of_feedstock_fr': [np.nan, 0.0010954792361512259, np.nan, np.nan, 0.001408025258094614], +# 'compound_used_for_calibration': ['n.a.', 'self', 'n.a.', 'n.a.', 'self'], +# }) +# } +# return files -@pytest.fixture -def checked_files_info_added_stats(): - files_info = pd.DataFrame( - index=pd.Index(['A_1', 'A_2', 'Ader_1', 'Ader_2', 'B_1', 'B_2'], name='filename'), - data={ - 'samplename': ['A', 'A', 'Ader', 'Ader', 'B', 'B'], - 'derivatized': [False, False, True, True, False, False], - 'dilution_factor': [25, 25, 125, 125, 1, 1], - 'total_sample_conc_in_vial_mg_L': [560.0000000000001, 560.0000000000001, 112.0, 112.0, 2800.0, 2800.0], - 'sample_yield_on_feedstock_basis_fr': [0.45, 0.46, 0.47, 0.48, 0.49, 0.5], - 'calibration_file': ['calibration', 'calibration', 'deriv_calibration', 'deriv_calibration', 'calibration', 'calibration'], - 'max_height': [1147599.0, 1138647.0, 501749.0, 594688.0, 112797.0, 120142.0], - 'total_height': [2026484.0, 1976513.0, 1208487.0, 1333741.0, 254452.0, 272406.0], - 'max_area': [6379752.0, 6394708.0, 1724814.0, 1956560.0, 441077.0, 472362.0], - 'total_area': [9796894.0, 9530922.0, 3753542.0, 4058211.0, 918711.0, 982455.0], - 'max_area_if_undiluted': [159493800.0, 159867700.0, 215601750.0, 244570000.0, 441077.0, 472362.0], - 'total_area_if_undiluted': [244922350.0, 238273050.0, 469192750.0, 507276375.0, 918711.0, 982455.0], - 'max_conc_vial_mg_L': [131.18800047103497, 125.38077898437679, 31.36776718294773, 36.81298755438084, 7.167230535550548, 7.884941445329839], - 'total_conc_vial_mg_L': [334.4423661061518, 346.8398667117484, 83.8590528949878, 91.71911575725186, 13.411031380342678, 14.019625167776704], - 'max_conc_vial_if_undiluted_mg_L': [3279.7000117758744, 3134.51947460942, 3920.970897868466, 4601.623444297605, 7.167230535550548, 7.884941445329839], - 'total_conc_vial_if_undiluted_mg_L': [8361.059152653796, 8670.996667793712, 10482.381611873476, 11464.889469656482, 13.411031380342678, 14.019625167776704], - 'max_fraction_of_sample_fr': [0.23426428655541953, 0.2238942481863871, 0.2800693498477476, 0.3286873888784004, 0.0025597251912680527, 0.002816050516189228], - 'total_fraction_of_sample_fr': [0.5972185109038424, 0.6193569048424078, 0.7487415437052483, 0.8189206764040344, 0.0047896540644080995, 0.005007008988491679], - 'max_fraction_of_feedstock_fr': [0.10541892894993879, 0.10299135416573807, 0.13163259442844139, 0.1577699466616322, 0.0012542653437213457, 0.001408025258094614], - 'total_fraction_of_feedstock_fr': [0.26874832990672903, 0.28490417622750763, 0.3519085255414667, 0.39308192467393654, 0.0023469304915599686, 0.0025035044942458397], - 'compound_with_max_area': ['oleic acid', 'oleic acid', '9-octadecenoic acid, (z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative', '2,5-hexanedione', '2,5-hexanedione'], - 'compound_with_max_conc': ['9,12-octadecadienoic acid (z,z)-', '9,12-octadecadienoic acid (z,z)-', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', 'phenol', 'phenol'], - } - ) - return files_info +# @pytest.fixture +# def checked_files_info_added_stats(): +# files_info = pd.DataFrame( +# index=pd.Index(['A_1', 'A_2', 'Ader_1', 'Ader_2', 'B_1', 'B_2'], name='filename'), +# data={ +# 'samplename': ['A', 'A', 'Ader', 'Ader', 'B', 'B'], +# 'derivatized': [False, False, True, True, False, False], +# 'dilution_factor': [25, 25, 125, 125, 1, 1], +# 'total_sample_conc_in_vial_mg_L': [560.0000000000001, 560.0000000000001, 112.0, 112.0, 2800.0, 2800.0], +# 'sample_yield_on_feedstock_basis_fr': [0.45, 0.46, 0.47, 0.48, 0.49, 0.5], +# 'calibration_file': ['calibration', 'calibration', 'deriv_calibration', 'deriv_calibration', 'calibration', 'calibration'], +# 'max_height': [1147599.0, 1138647.0, 501749.0, 594688.0, 112797.0, 120142.0], +# 'total_height': [2026484.0, 1976513.0, 1208487.0, 1333741.0, 254452.0, 272406.0], +# 'max_area': [6379752.0, 6394708.0, 1724814.0, 1956560.0, 441077.0, 472362.0], +# 'total_area': [9796894.0, 9530922.0, 3753542.0, 4058211.0, 918711.0, 982455.0], +# 'max_area_if_undiluted': [159493800.0, 159867700.0, 215601750.0, 244570000.0, 441077.0, 472362.0], +# 'total_area_if_undiluted': [244922350.0, 238273050.0, 469192750.0, 507276375.0, 918711.0, 982455.0], +# 'max_conc_vial_mg_L': [131.18800047103497, 125.38077898437679, 31.36776718294773, 36.81298755438084, 7.167230535550548, 7.884941445329839], +# 'total_conc_vial_mg_L': [334.4423661061518, 346.8398667117484, 83.8590528949878, 91.71911575725186, 13.411031380342678, 14.019625167776704], +# 'max_conc_vial_if_undiluted_mg_L': [3279.7000117758744, 3134.51947460942, 3920.970897868466, 4601.623444297605, 7.167230535550548, 7.884941445329839], +# 'total_conc_vial_if_undiluted_mg_L': [8361.059152653796, 8670.996667793712, 10482.381611873476, 11464.889469656482, 13.411031380342678, 14.019625167776704], +# 'max_fraction_of_sample_fr': [0.23426428655541953, 0.2238942481863871, 0.2800693498477476, 0.3286873888784004, 0.0025597251912680527, 0.002816050516189228], +# 'total_fraction_of_sample_fr': [0.5972185109038424, 0.6193569048424078, 0.7487415437052483, 0.8189206764040344, 0.0047896540644080995, 0.005007008988491679], +# 'max_fraction_of_feedstock_fr': [0.10541892894993879, 0.10299135416573807, 0.13163259442844139, 0.1577699466616322, 0.0012542653437213457, 0.001408025258094614], +# 'total_fraction_of_feedstock_fr': [0.26874832990672903, 0.28490417622750763, 0.3519085255414667, 0.39308192467393654, 0.0023469304915599686, 0.0025035044942458397], +# 'compound_with_max_area': ['oleic acid', 'oleic acid', '9-octadecenoic acid, (z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative', '2,5-hexanedione', '2,5-hexanedione'], +# 'compound_with_max_conc': ['9,12-octadecadienoic acid (z,z)-', '9,12-octadecadienoic acid (z,z)-', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', 'phenol', 'phenol'], +# } +# ) +# return files_info -@pytest.fixture -def checked_samples_info(): - samples_info = pd.DataFrame( - index=pd.Index(['A', 'Ader', 'B'], name='samplename'), - data={ - 'filename': [('A_1', 'A_2'), ('Ader_1', 'Ader_2'), ('B_1', 'B_2')], - 'derivatized': [(False, False), (True, True), (False, False)], - 'dilution_factor': [(25, 25), (125, 125), (1, 1)], - 'total_sample_conc_in_vial_mg_L': [(560.0000000000001, 560.0000000000001), (112.0, 112.0), (2800.0, 2800.0)], - 'sample_yield_on_feedstock_basis_fr': [(0.45, 0.46), (0.47, 0.48), (0.49, 0.5)], - 'calibration_file': [('calibration', 'calibration'), ('deriv_calibration', 'deriv_calibration'), ('calibration', 'calibration')], - 'compound_with_max_area': [('oleic acid', 'oleic acid'), ('9-octadecenoic acid, (z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'), ('2,5-hexanedione', '2,5-hexanedione')], - 'compound_with_max_conc': [('9,12-octadecadienoic acid (z,z)-', '9,12-octadecadienoic acid (z,z)-'), ('9,12-octadecadienoic acid (z,z)-, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative'), ('phenol', 'phenol')], - 'max_height': [1143123.0, 548218.5, 116469.5], - 'max_area': [6387230.0, 1840687.0, 456719.5], - 'max_area_if_undiluted': [159680750.0, 230085875.0, 456719.5], - 'max_conc_vial_mg_L': [128.28438972770587, 34.090377368664285, 7.526085990440194], - 'max_conc_vial_if_undiluted_mg_L': [3207.1097431926473, 4261.297171083035, 7.526085990440194], - 'max_fraction_of_sample_fr': [0.2290792673709033, 0.304378369363074, 0.0026878878537286406], - 'max_fraction_of_feedstock_fr': [0.10420514155783843, 0.1447012705450368, 0.00133114530090798], - 'total_height': [2001498.5, 1271114.0, 263429.0], - 'total_area': [9663908.0, 3905876.5, 950583.0], - 'total_area_if_undiluted': [241597700.0, 488234562.5, 950583.0], - 'total_conc_vial_mg_L': [340.6411164089501, 87.78908432611982, 13.71532827405969], - 'total_conc_vial_if_undiluted_mg_L': [8516.027910223755, 10973.635540764979, 13.71532827405969], - 'total_fraction_of_sample_fr': [0.608287707873125, 0.7838311100546413, 0.0048983315264498895], - 'total_fraction_of_feedstock_fr': [0.27682625306711833, 0.3724952251077016, 0.002425217492902904], - } - ) - return samples_info +# @pytest.fixture +# def checked_samples_info(): +# samples_info = pd.DataFrame( +# index=pd.Index(['A', 'Ader', 'B'], name='samplename'), +# data={ +# 'filename': [('A_1', 'A_2'), ('Ader_1', 'Ader_2'), ('B_1', 'B_2')], +# 'derivatized': [(False, False), (True, True), (False, False)], +# 'dilution_factor': [(25, 25), (125, 125), (1, 1)], +# 'total_sample_conc_in_vial_mg_L': [(560.0000000000001, 560.0000000000001), (112.0, 112.0), (2800.0, 2800.0)], +# 'sample_yield_on_feedstock_basis_fr': [(0.45, 0.46), (0.47, 0.48), (0.49, 0.5)], +# 'calibration_file': [('calibration', 'calibration'), ('deriv_calibration', 'deriv_calibration'), ('calibration', 'calibration')], +# 'compound_with_max_area': [('oleic acid', 'oleic acid'), ('9-octadecenoic acid, (z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'), ('2,5-hexanedione', '2,5-hexanedione')], +# 'compound_with_max_conc': [('9,12-octadecadienoic acid (z,z)-', '9,12-octadecadienoic acid (z,z)-'), ('9,12-octadecadienoic acid (z,z)-, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative'), ('phenol', 'phenol')], +# 'max_height': [1143123.0, 548218.5, 116469.5], +# 'max_area': [6387230.0, 1840687.0, 456719.5], +# 'max_area_if_undiluted': [159680750.0, 230085875.0, 456719.5], +# 'max_conc_vial_mg_L': [128.28438972770587, 34.090377368664285, 7.526085990440194], +# 'max_conc_vial_if_undiluted_mg_L': [3207.1097431926473, 4261.297171083035, 7.526085990440194], +# 'max_fraction_of_sample_fr': [0.2290792673709033, 0.304378369363074, 0.0026878878537286406], +# 'max_fraction_of_feedstock_fr': [0.10420514155783843, 0.1447012705450368, 0.00133114530090798], +# 'total_height': [2001498.5, 1271114.0, 263429.0], +# 'total_area': [9663908.0, 3905876.5, 950583.0], +# 'total_area_if_undiluted': [241597700.0, 488234562.5, 950583.0], +# 'total_conc_vial_mg_L': [340.6411164089501, 87.78908432611982, 13.71532827405969], +# 'total_conc_vial_if_undiluted_mg_L': [8516.027910223755, 10973.635540764979, 13.71532827405969], +# 'total_fraction_of_sample_fr': [0.608287707873125, 0.7838311100546413, 0.0048983315264498895], +# 'total_fraction_of_feedstock_fr': [0.27682625306711833, 0.3724952251077016, 0.002425217492902904], +# } +# ) +# return samples_info -@pytest.fixture -def checked_samples_info_std(): - samples_info_std = pd.DataFrame( - index=pd.Index(['A', 'Ader', 'B'], name='samplename'), - data={ - 'filename': [('A_1', 'A_2'), ('Ader_1', 'Ader_2'), ('B_1', 'B_2')], - 'derivatized': [(False, False), (True, True), (False, False)], - 'dilution_factor': [(25, 25), (125, 125), (1, 1)], - 'total_sample_conc_in_vial_mg_L': [(560.0000000000001, 560.0000000000001), (112.0, 112.0), (2800.0, 2800.0)], - 'sample_yield_on_feedstock_basis_fr': [(0.45, 0.46), (0.47, 0.48), (0.49, 0.5)], - 'calibration_file': [('calibration', 'calibration'), ('deriv_calibration', 'deriv_calibration'), ('calibration', 'calibration')], - 'compound_with_max_area': [('oleic acid', 'oleic acid'), ('9-octadecenoic acid, (z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'), ('2,5-hexanedione', '2,5-hexanedione')], - 'compound_with_max_conc': [('9,12-octadecadienoic acid (z,z)-', '9,12-octadecadienoic acid (z,z)-'), ('9,12-octadecadienoic acid (z,z)-, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative'), ('phenol', 'phenol')], - 'max_height': [6330.019905181974, 65717.79713669654, 5193.699307815192], - 'max_area': [10575.489019426004, 163869.16811285765, 22121.83564942114], - 'max_area_if_undiluted': [264387.2254856501, 20483646.014107205, 22121.83564942114], - 'max_conc_vial_mg_L': [4.106325693068217, 3.8503522496954825, 0.5074982512365029], - 'max_conc_vial_if_undiluted_mg_L': [102.65814232670576, 481.29403121193536, 0.5074982512365029], - 'max_fraction_of_sample_fr': [0.007332724451907525, 0.03437814508656681, 0.00018124937544160814], - 'max_fraction_of_feedstock_fr': [0.001716554591745799, 0.01848189900635057, 0.00010872467812800095], - 'total_height': [35334.83296267297, 88567.95277073982, 12695.395149423275], - 'total_area': [188070.60480574841, 215433.51591732426, 45073.814659955286], - 'total_area_if_undiluted': [4701765.120143711, 26929189.48966553, 45073.814659955286], - 'total_conc_vial_mg_L': [8.766356747981709, 5.557903750459465, 0.4303407940826049], - 'total_conc_vial_if_undiluted_mg_L': [219.1589186995424, 694.7379688074318, 0.4303407940826049], - 'total_fraction_of_sample_fr': [0.015654208478538812, 0.049624140629102274, 0.00015369314074378663], - 'total_fraction_of_feedstock_fr': [0.011423908489230281, 0.029113989731069757, 0.00011071453905670015], - } - ) - return samples_info_std +# @pytest.fixture +# def checked_samples_info_std(): +# samples_info_std = pd.DataFrame( +# index=pd.Index(['A', 'Ader', 'B'], name='samplename'), +# data={ +# 'filename': [('A_1', 'A_2'), ('Ader_1', 'Ader_2'), ('B_1', 'B_2')], +# 'derivatized': [(False, False), (True, True), (False, False)], +# 'dilution_factor': [(25, 25), (125, 125), (1, 1)], +# 'total_sample_conc_in_vial_mg_L': [(560.0000000000001, 560.0000000000001), (112.0, 112.0), (2800.0, 2800.0)], +# 'sample_yield_on_feedstock_basis_fr': [(0.45, 0.46), (0.47, 0.48), (0.49, 0.5)], +# 'calibration_file': [('calibration', 'calibration'), ('deriv_calibration', 'deriv_calibration'), ('calibration', 'calibration')], +# 'compound_with_max_area': [('oleic acid', 'oleic acid'), ('9-octadecenoic acid, (z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'), ('2,5-hexanedione', '2,5-hexanedione')], +# 'compound_with_max_conc': [('9,12-octadecadienoic acid (z,z)-', '9,12-octadecadienoic acid (z,z)-'), ('9,12-octadecadienoic acid (z,z)-, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative'), ('phenol', 'phenol')], +# 'max_height': [6330.019905181974, 65717.79713669654, 5193.699307815192], +# 'max_area': [10575.489019426004, 163869.16811285765, 22121.83564942114], +# 'max_area_if_undiluted': [264387.2254856501, 20483646.014107205, 22121.83564942114], +# 'max_conc_vial_mg_L': [4.106325693068217, 3.8503522496954825, 0.5074982512365029], +# 'max_conc_vial_if_undiluted_mg_L': [102.65814232670576, 481.29403121193536, 0.5074982512365029], +# 'max_fraction_of_sample_fr': [0.007332724451907525, 0.03437814508656681, 0.00018124937544160814], +# 'max_fraction_of_feedstock_fr': [0.001716554591745799, 0.01848189900635057, 0.00010872467812800095], +# 'total_height': [35334.83296267297, 88567.95277073982, 12695.395149423275], +# 'total_area': [188070.60480574841, 215433.51591732426, 45073.814659955286], +# 'total_area_if_undiluted': [4701765.120143711, 26929189.48966553, 45073.814659955286], +# 'total_conc_vial_mg_L': [8.766356747981709, 5.557903750459465, 0.4303407940826049], +# 'total_conc_vial_if_undiluted_mg_L': [219.1589186995424, 694.7379688074318, 0.4303407940826049], +# 'total_fraction_of_sample_fr': [0.015654208478538812, 0.049624140629102274, 0.00015369314074378663], +# 'total_fraction_of_feedstock_fr': [0.011423908489230281, 0.029113989731069757, 0.00011071453905670015], +# } +# ) +# return samples_info_std -@pytest.fixture -def checked_samples_info_no_calibrations(): - samples_info = pd.DataFrame( - index=pd.Index(['A', 'Ader', 'B'], name='samplename'), - data={ - 'filename': [('A_1', 'A_2'), ('Ader_1', 'Ader_2'), ('B_1', 'B_2')], - 'replicate_number': [('1', '2'), ('1', '2'), ('1', '2')], - 'derivatized': [(False, False), (False, False), (False, False)], - 'calibration_file': [(False, False), (False, False), (False, False)], - 'dilution_factor': [(1, 1), (1, 1), (1, 1)], - 'total_sample_conc_in_vial_mg_L': [(1, 1), (1, 1), (1, 1)], - 'sample_yield_on_feedstock_basis_fr': [(1, 1), (1, 1), (1, 1)], - 'compound_with_max_area': [('oleic acid', 'oleic acid'), ('9-octadecenoic acid, (z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'), ('2,5-hexanedione', '2,5-hexanedione')], - 'max_height': [1143123.0, 548218.5, 116469.5], - 'max_area': [6387230.0, 1840687.0, 456719.5], - 'max_area_if_undiluted': [6387230.0, 1840687.0, 456719.5], - 'total_height': [2001498.5, 1271114.0, 263429.0], - 'total_area': [9663908.0, 3905876.5, 950583.0], - 'total_area_if_undiluted': [9663908.0, 3905876.5, 950583.0], - } -) - return samples_info +# @pytest.fixture +# def checked_samples_info_no_calibrations(): +# samples_info = pd.DataFrame( +# index=pd.Index(['A', 'Ader', 'B'], name='samplename'), +# data={ +# 'filename': [('A_1', 'A_2'), ('Ader_1', 'Ader_2'), ('B_1', 'B_2')], +# 'replicate_number': [('1', '2'), ('1', '2'), ('1', '2')], +# 'derivatized': [(False, False), (False, False), (False, False)], +# 'calibration_file': [(False, False), (False, False), (False, False)], +# 'dilution_factor': [(1, 1), (1, 1), (1, 1)], +# 'total_sample_conc_in_vial_mg_L': [(1, 1), (1, 1), (1, 1)], +# 'sample_yield_on_feedstock_basis_fr': [(1, 1), (1, 1), (1, 1)], +# 'compound_with_max_area': [('oleic acid', 'oleic acid'), ('9-octadecenoic acid, (z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'), ('2,5-hexanedione', '2,5-hexanedione')], +# 'max_height': [1143123.0, 548218.5, 116469.5], +# 'max_area': [6387230.0, 1840687.0, 456719.5], +# 'max_area_if_undiluted': [6387230.0, 1840687.0, 456719.5], +# 'total_height': [2001498.5, 1271114.0, 263429.0], +# 'total_area': [9663908.0, 3905876.5, 950583.0], +# 'total_area_if_undiluted': [9663908.0, 3905876.5, 950583.0], +# } +# ) +# return samples_info -@pytest.fixture -def checked_samples_info_no_calibrations_std(): - samples_info_std = pd.DataFrame( - index=pd.Index(['A', 'Ader', 'B'], name='samplename'), - data={ - 'filename': [('A_1', 'A_2'), ('Ader_1', 'Ader_2'), ('B_1', 'B_2')], - 'replicate_number': [('1', '2'), ('1', '2'), ('1', '2')], - 'derivatized': [(False, False), (False, False), (False, False)], - 'calibration_file': [(False, False), (False, False), (False, False)], - 'dilution_factor': [(1, 1), (1, 1), (1, 1)], - 'total_sample_conc_in_vial_mg_L': [(1, 1), (1, 1), (1, 1)], - 'sample_yield_on_feedstock_basis_fr': [(1, 1), (1, 1), (1, 1)], - 'compound_with_max_area': [('oleic acid', 'oleic acid'), ('9-octadecenoic acid, (z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'), ('2,5-hexanedione', '2,5-hexanedione')], - 'max_height': [6330.019905181974, 65717.79713669654, 5193.699307815192], - 'max_area': [10575.489019426004, 163869.16811285765, 22121.83564942114], - 'max_area_if_undiluted': [10575.489019426004, 163869.16811285765, 22121.83564942114], - 'total_height': [35334.83296267297, 88567.95277073982, 12695.395149423275], - 'total_area': [188070.60480574841, 215433.51591732426, 45073.814659955286], - 'total_area_if_undiluted': [188070.60480574841, 215433.51591732426, 45073.814659955286], - } -) - return samples_info_std +# @pytest.fixture +# def checked_samples_info_no_calibrations_std(): +# samples_info_std = pd.DataFrame( +# index=pd.Index(['A', 'Ader', 'B'], name='samplename'), +# data={ +# 'filename': [('A_1', 'A_2'), ('Ader_1', 'Ader_2'), ('B_1', 'B_2')], +# 'replicate_number': [('1', '2'), ('1', '2'), ('1', '2')], +# 'derivatized': [(False, False), (False, False), (False, False)], +# 'calibration_file': [(False, False), (False, False), (False, False)], +# 'dilution_factor': [(1, 1), (1, 1), (1, 1)], +# 'total_sample_conc_in_vial_mg_L': [(1, 1), (1, 1), (1, 1)], +# 'sample_yield_on_feedstock_basis_fr': [(1, 1), (1, 1), (1, 1)], +# 'compound_with_max_area': [('oleic acid', 'oleic acid'), ('9-octadecenoic acid, (z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'), ('2,5-hexanedione', '2,5-hexanedione')], +# 'max_height': [6330.019905181974, 65717.79713669654, 5193.699307815192], +# 'max_area': [10575.489019426004, 163869.16811285765, 22121.83564942114], +# 'max_area_if_undiluted': [10575.489019426004, 163869.16811285765, 22121.83564942114], +# 'total_height': [35334.83296267297, 88567.95277073982, 12695.395149423275], +# 'total_area': [188070.60480574841, 215433.51591732426, 45073.814659955286], +# 'total_area_if_undiluted': [188070.60480574841, 215433.51591732426, 45073.814659955286], +# } +# ) +# return samples_info_std -@pytest.fixture -def checked_samples(): - samples = { - 'A': pd.DataFrame( - index=pd.Index(['unidentified', 'n-decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A'), - data={ - 'iupac_name': ['unidentified', 'decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], - 'retention_time': [6.025, 26.284, 36.1605, 40.046499999999995, 40.492999999999995, 43.847, 43.986999999999995], - 'area': [24439.5, 10952.0, 47519.5, 18181.0, 1767468.0, 1413594.0, 6387230.0], - 'height': [25256.5, 4259.0, 14769.5, 6222.0, 477850.5, 332147.5, 1143123.0], - 'area_if_undiluted': [610987.5, 273800.0, 1187987.5, 454525.0, 44186700.0, 35339850.0, 159680750.0], - 'conc_vial_mg_L': [0.0, 22.78427785050836, 23.65614297715311, 0.0, 63.58554431387759, 128.28438972770587, 113.72290046495935], - 'conc_vial_if_undiluted_mg_L': [0.0, 569.606946262709, 591.4035744288278, 0.0, 1589.6386078469395, 3207.1097431926473, 2843.0725116239837], - 'fraction_of_sample_fr': [0.0, 0.04068621044733635, 0.042243112459201974, 0.0, 0.11354561484620995, 0.2290792673709033, 0.20307660797314164], - 'fraction_of_feedstock_fr': [0.0, 0.018715656805774722, 0.01922128259154552, 0.0, 0.051641211741918436, 0.10420514155783843, 0.0924007887729286], - 'compound_used_for_calibration': ['n.a.', 'tetradecanoic acid (sim=1.0; dwt=56)', 'self', 'n.a.', 'self', 'self', 'self'], - } - ), - 'Ader': pd.DataFrame( - index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader'), - data={ - 'iupac_name': ['unidentified', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], - 'retention_time': [6.027, 38.123999999999995, 41.7365, 42.159, 45.2555, 45.3695], - 'area': [15719.5, 51560.5, 26505.5, 1409097.5, 562306.5, 1840687.0], - 'height': [13126.5, 18394.0, 8953.5, 490697.0, 191724.5, 548218.5], - 'area_if_undiluted': [1964937.5, 6445062.5, 3313187.5, 176137187.5, 70288312.5, 230085875.0], - 'conc_vial_mg_L': [0.0, 0.6415947958834178, 2.5835030215418877, 27.502344287695024, 34.090377368664285, 22.971264852335217], - 'conc_vial_if_undiluted_mg_L': [0.0, 80.19934948542723, 322.9378776927359, 3437.7930359618776, 4261.297171083035, 2871.4081065419023], - 'fraction_of_sample_fr': [0.0, 0.005728524963244802, 0.023066991263766854, 0.24555664542584843, 0.304378369363074, 0.2051005790387073], - 'fraction_of_feedstock_fr': [0.0, 0.002722862373382652, 0.010956172407969126, 0.11663401169579143, 0.1447012705450368, 0.09748090808552162], - 'compound_used_for_calibration': ['n.a.', 'hexadecanoic acid (sim=1.0; dwt=28)', '(e)-octadec-9-enoic acid (sim=1.0; dwt=28)', 'self', 'self', '(e)-octadec-9-enoic acid (sim=1.0; dwt=0)'], - } - ) -, - 'B': pd.DataFrame( - index=pd.Index(['1-hexene, 4,5-dimethyl-', '2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', 'phenol'], name='B'), - data={ - 'iupac_name': ['4,5-dimethylhex-1-ene', 'butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', 'hexane-2,5-dione', 'phenol'], - 'retention_time': [6.107, 8.5145, 10.4905, 11.049, 11.471, 13.674], - 'area': [9761.0, 164293.5, 66877.0, 38083.5, 456719.5, 214848.5], - 'height': [3597.0, 41972.0, 19169.0, 12434.5, 116469.5, 69787.0], - 'area_if_undiluted': [9761.0, 164293.5, 66877.0, 38083.5, 456719.5, 214848.5], - 'conc_vial_mg_L': [0.0, 0.0, 6.189242283619498, 0.0, 0.0, 7.526085990440194], - 'conc_vial_if_undiluted_mg_L': [0.0, 0.0, 6.189242283619498, 0.0, 0.0, 7.526085990440194], - 'fraction_of_sample_fr': [0.0, 0.0, 0.0022104436727212492, 0.0, 0.0, 0.0026878878537286406], - 'fraction_of_feedstock_fr': [0.0, 0.0, 0.0010940721919949245, 0.0, 0.0, 0.00133114530090798], - 'compound_used_for_calibration': ['n.a.', 'n.a.', 'self', 'n.a.', 'n.a.', 'self'], - } - ) - } - return samples +# @pytest.fixture +# def checked_samples(): +# samples = { +# 'A': pd.DataFrame( +# index=pd.Index(['unidentified', 'n-decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A'), +# data={ +# 'iupac_name': ['unidentified', 'decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], +# 'retention_time': [6.025, 26.284, 36.1605, 40.046499999999995, 40.492999999999995, 43.847, 43.986999999999995], +# 'area': [24439.5, 10952.0, 47519.5, 18181.0, 1767468.0, 1413594.0, 6387230.0], +# 'height': [25256.5, 4259.0, 14769.5, 6222.0, 477850.5, 332147.5, 1143123.0], +# 'area_if_undiluted': [610987.5, 273800.0, 1187987.5, 454525.0, 44186700.0, 35339850.0, 159680750.0], +# 'conc_vial_mg_L': [0.0, 22.78427785050836, 23.65614297715311, 0.0, 63.58554431387759, 128.28438972770587, 113.72290046495935], +# 'conc_vial_if_undiluted_mg_L': [0.0, 569.606946262709, 591.4035744288278, 0.0, 1589.6386078469395, 3207.1097431926473, 2843.0725116239837], +# 'fraction_of_sample_fr': [0.0, 0.04068621044733635, 0.042243112459201974, 0.0, 0.11354561484620995, 0.2290792673709033, 0.20307660797314164], +# 'fraction_of_feedstock_fr': [0.0, 0.018715656805774722, 0.01922128259154552, 0.0, 0.051641211741918436, 0.10420514155783843, 0.0924007887729286], +# 'compound_used_for_calibration': ['n.a.', 'tetradecanoic acid (sim=1.0; dwt=56)', 'self', 'n.a.', 'self', 'self', 'self'], +# } +# ), +# 'Ader': pd.DataFrame( +# index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader'), +# data={ +# 'iupac_name': ['unidentified', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], +# 'retention_time': [6.027, 38.123999999999995, 41.7365, 42.159, 45.2555, 45.3695], +# 'area': [15719.5, 51560.5, 26505.5, 1409097.5, 562306.5, 1840687.0], +# 'height': [13126.5, 18394.0, 8953.5, 490697.0, 191724.5, 548218.5], +# 'area_if_undiluted': [1964937.5, 6445062.5, 3313187.5, 176137187.5, 70288312.5, 230085875.0], +# 'conc_vial_mg_L': [0.0, 0.6415947958834178, 2.5835030215418877, 27.502344287695024, 34.090377368664285, 22.971264852335217], +# 'conc_vial_if_undiluted_mg_L': [0.0, 80.19934948542723, 322.9378776927359, 3437.7930359618776, 4261.297171083035, 2871.4081065419023], +# 'fraction_of_sample_fr': [0.0, 0.005728524963244802, 0.023066991263766854, 0.24555664542584843, 0.304378369363074, 0.2051005790387073], +# 'fraction_of_feedstock_fr': [0.0, 0.002722862373382652, 0.010956172407969126, 0.11663401169579143, 0.1447012705450368, 0.09748090808552162], +# 'compound_used_for_calibration': ['n.a.', 'hexadecanoic acid (sim=1.0; dwt=28)', '(e)-octadec-9-enoic acid (sim=1.0; dwt=28)', 'self', 'self', '(e)-octadec-9-enoic acid (sim=1.0; dwt=0)'], +# } +# ) +# , +# 'B': pd.DataFrame( +# index=pd.Index(['1-hexene, 4,5-dimethyl-', '2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', 'phenol'], name='B'), +# data={ +# 'iupac_name': ['4,5-dimethylhex-1-ene', 'butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', 'hexane-2,5-dione', 'phenol'], +# 'retention_time': [6.107, 8.5145, 10.4905, 11.049, 11.471, 13.674], +# 'area': [9761.0, 164293.5, 66877.0, 38083.5, 456719.5, 214848.5], +# 'height': [3597.0, 41972.0, 19169.0, 12434.5, 116469.5, 69787.0], +# 'area_if_undiluted': [9761.0, 164293.5, 66877.0, 38083.5, 456719.5, 214848.5], +# 'conc_vial_mg_L': [0.0, 0.0, 6.189242283619498, 0.0, 0.0, 7.526085990440194], +# 'conc_vial_if_undiluted_mg_L': [0.0, 0.0, 6.189242283619498, 0.0, 0.0, 7.526085990440194], +# 'fraction_of_sample_fr': [0.0, 0.0, 0.0022104436727212492, 0.0, 0.0, 0.0026878878537286406], +# 'fraction_of_feedstock_fr': [0.0, 0.0, 0.0010940721919949245, 0.0, 0.0, 0.00133114530090798], +# 'compound_used_for_calibration': ['n.a.', 'n.a.', 'self', 'n.a.', 'n.a.', 'self'], +# } +# ) +# } +# return samples -@pytest.fixture -def checked_samples_applied_calibration(): - samples = { - 'A': pd.DataFrame( - index=pd.Index(['unidentified', 'n-decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A'), - data={ - 'iupac_name': ['unidentified', 'decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], - 'retention_time': [6.025, 26.284, 36.1605, 40.046499999999995, 40.492999999999995, 43.847, 43.986999999999995], - 'area': [24439.5, 10952.0, 47519.5, 18181.0, 1767468.0, 1413594.0, 6387230.0], - 'height': [25256.5, 4259.0, 14769.5, 6222.0, 477850.5, 332147.5, 1143123.0], - 'area_if_undiluted': [610987.5, 273800.0, 1187987.5, 454525.0, 44186700.0, 35339850.0, 159680750.0], - 'conc_vial_mg_L': [0.0, 22.78427785050836, 23.65614297715311, 0.0, 63.58554431387759, 128.28438972770587, 113.72290046495935], - 'conc_vial_if_undiluted_mg_L': [0.0, 569.606946262709, 591.4035744288278, 0.0, 1589.6386078469395, 3207.1097431926473, 2843.0725116239837], - 'fraction_of_sample_fr': [0.0, 0.04068621044733635, 0.042243112459201974, 0.0, 0.11354561484620995, 0.2290792673709033, 0.20307660797314164], - 'fraction_of_feedstock_fr': [0.0, 0.018715656805774722, 0.01922128259154552, 0.0, 0.051641211741918436, 0.10420514155783843, 0.0924007887729286], - 'compound_used_for_calibration': ['n.a.', 'tetradecanoic acid (sim=1.0; dwt=56)', 'self', 'n.a.', 'self', 'self', 'self'], - } - ), - 'Ader': pd.DataFrame( - index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader'), - data={ - 'iupac_name': ['unidentified', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], - 'retention_time': [6.027, 38.123999999999995, 41.7365, 42.159, 45.2555, 45.3695], - 'area': [15719.5, 51560.5, 26505.5, 1409097.5, 562306.5, 1840687.0], - 'height': [13126.5, 18394.0, 8953.5, 490697.0, 191724.5, 548218.5], - 'area_if_undiluted': [1964937.5, 6445062.5, 3313187.5, 176137187.5, 70288312.5, 230085875.0], - 'conc_vial_mg_L': [0.0, 0.6415947958834178, 2.5835030215418877, 27.502344287695024, 34.090377368664285, 22.971264852335217], - 'conc_vial_if_undiluted_mg_L': [0.0, 80.19934948542723, 322.9378776927359, 3437.7930359618776, 4261.297171083035, 2871.4081065419023], - 'fraction_of_sample_fr': [0.0, 0.005728524963244802, 0.023066991263766854, 0.24555664542584843, 0.304378369363074, 0.2051005790387073], - 'fraction_of_feedstock_fr': [0.0, 0.002722862373382652, 0.010956172407969126, 0.11663401169579143, 0.1447012705450368, 0.09748090808552162], - 'compound_used_for_calibration': ['n.a.', 'hexadecanoic acid (sim=1.0; dwt=28)', '(e)-octadec-9-enoic acid (sim=1.0; dwt=28)', 'self', 'self', '(e)-octadec-9-enoic acid (sim=1.0; dwt=0)'], - } - ) -, - 'B': pd.DataFrame( - index=pd.Index(['1-hexene, 4,5-dimethyl-', '2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', 'phenol'], name='B'), - data={ - 'iupac_name': ['4,5-dimethylhex-1-ene', 'butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', 'hexane-2,5-dione', 'phenol'], - 'retention_time': [6.107, 8.5145, 10.4905, 11.049, 11.471, 13.674], - 'area': [9761.0, 164293.5, 66877.0, 38083.5, 456719.5, 214848.5], - 'height': [3597.0, 41972.0, 19169.0, 12434.5, 116469.5, 69787.0], - 'area_if_undiluted': [9761.0, 164293.5, 66877.0, 38083.5, 456719.5, 214848.5], - 'conc_vial_mg_L': [0.0, 0.0, 6.189242283619498, 0.0, 0.0, 7.526085990440194], - 'conc_vial_if_undiluted_mg_L': [0.0, 0.0, 6.189242283619498, 0.0, 0.0, 7.526085990440194], - 'fraction_of_sample_fr': [0.0, 0.0, 0.0022104436727212492, 0.0, 0.0, 0.0026878878537286406], - 'fraction_of_feedstock_fr': [0.0, 0.0, 0.0010940721919949245, 0.0, 0.0, 0.00133114530090798], - 'compound_used_for_calibration': ['n.a.', 'n.a.', 'self', 'n.a.', 'n.a.', 'self'], - } - ) - } - return samples +# @pytest.fixture +# def checked_samples_applied_calibration(): +# samples = { +# 'A': pd.DataFrame( +# index=pd.Index(['unidentified', 'n-decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'n-hexadecanoic acid', '9,12-octadecadienoic acid (z,z)-', 'oleic acid'], name='A'), +# data={ +# 'iupac_name': ['unidentified', 'decanoic acid', 'tetradecanoic acid', 'oxacycloheptadecan-2-one', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], +# 'retention_time': [6.025, 26.284, 36.1605, 40.046499999999995, 40.492999999999995, 43.847, 43.986999999999995], +# 'area': [24439.5, 10952.0, 47519.5, 18181.0, 1767468.0, 1413594.0, 6387230.0], +# 'height': [25256.5, 4259.0, 14769.5, 6222.0, 477850.5, 332147.5, 1143123.0], +# 'area_if_undiluted': [610987.5, 273800.0, 1187987.5, 454525.0, 44186700.0, 35339850.0, 159680750.0], +# 'conc_vial_mg_L': [0.0, 22.78427785050836, 23.65614297715311, 0.0, 63.58554431387759, 128.28438972770587, 113.72290046495935], +# 'conc_vial_if_undiluted_mg_L': [0.0, 569.606946262709, 591.4035744288278, 0.0, 1589.6386078469395, 3207.1097431926473, 2843.0725116239837], +# 'fraction_of_sample_fr': [0.0, 0.04068621044733635, 0.042243112459201974, 0.0, 0.11354561484620995, 0.2290792673709033, 0.20307660797314164], +# 'fraction_of_feedstock_fr': [0.0, 0.018715656805774722, 0.01922128259154552, 0.0, 0.051641211741918436, 0.10420514155783843, 0.0924007887729286], +# 'compound_used_for_calibration': ['n.a.', 'tetradecanoic acid (sim=1.0; dwt=56)', 'self', 'n.a.', 'self', 'self', 'self'], +# } +# ), +# 'Ader': pd.DataFrame( +# index=pd.Index(['unidentified', 'myristic acid, tms derivative', 'palmitelaidic acid, tms derivative', 'palmitic acid, tms derivative', '9,12-octadecadienoic acid (z,z)-, tms derivative', '9-octadecenoic acid, (z)-, tms derivative'], name='Ader'), +# data={ +# 'iupac_name': ['unidentified', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid'], +# 'retention_time': [6.027, 38.123999999999995, 41.7365, 42.159, 45.2555, 45.3695], +# 'area': [15719.5, 51560.5, 26505.5, 1409097.5, 562306.5, 1840687.0], +# 'height': [13126.5, 18394.0, 8953.5, 490697.0, 191724.5, 548218.5], +# 'area_if_undiluted': [1964937.5, 6445062.5, 3313187.5, 176137187.5, 70288312.5, 230085875.0], +# 'conc_vial_mg_L': [0.0, 0.6415947958834178, 2.5835030215418877, 27.502344287695024, 34.090377368664285, 22.971264852335217], +# 'conc_vial_if_undiluted_mg_L': [0.0, 80.19934948542723, 322.9378776927359, 3437.7930359618776, 4261.297171083035, 2871.4081065419023], +# 'fraction_of_sample_fr': [0.0, 0.005728524963244802, 0.023066991263766854, 0.24555664542584843, 0.304378369363074, 0.2051005790387073], +# 'fraction_of_feedstock_fr': [0.0, 0.002722862373382652, 0.010956172407969126, 0.11663401169579143, 0.1447012705450368, 0.09748090808552162], +# 'compound_used_for_calibration': ['n.a.', 'hexadecanoic acid (sim=1.0; dwt=28)', '(e)-octadec-9-enoic acid (sim=1.0; dwt=28)', 'self', 'self', '(e)-octadec-9-enoic acid (sim=1.0; dwt=0)'], +# } +# ) +# , +# 'B': pd.DataFrame( +# index=pd.Index(['1-hexene, 4,5-dimethyl-', '2-butanone', '2-cyclopenten-1-one, 2-methyl-', 'trans-2-pentenoic acid', '2,5-hexanedione', 'phenol'], name='B'), +# data={ +# 'iupac_name': ['4,5-dimethylhex-1-ene', 'butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', 'hexane-2,5-dione', 'phenol'], +# 'retention_time': [6.107, 8.5145, 10.4905, 11.049, 11.471, 13.674], +# 'area': [9761.0, 164293.5, 66877.0, 38083.5, 456719.5, 214848.5], +# 'height': [3597.0, 41972.0, 19169.0, 12434.5, 116469.5, 69787.0], +# 'area_if_undiluted': [9761.0, 164293.5, 66877.0, 38083.5, 456719.5, 214848.5], +# 'conc_vial_mg_L': [0.0, 0.0, 6.189242283619498, 0.0, 0.0, 7.526085990440194], +# 'conc_vial_if_undiluted_mg_L': [0.0, 0.0, 6.189242283619498, 0.0, 0.0, 7.526085990440194], +# 'fraction_of_sample_fr': [0.0, 0.0, 0.0022104436727212492, 0.0, 0.0, 0.0026878878537286406], +# 'fraction_of_feedstock_fr': [0.0, 0.0, 0.0010940721919949245, 0.0, 0.0, 0.00133114530090798], +# 'compound_used_for_calibration': ['n.a.', 'n.a.', 'self', 'n.a.', 'n.a.', 'self'], +# } +# ) +# } +# return samples -@pytest.fixture -def checked_files_param_reports(): - reports = { - 'height': pd.DataFrame( - index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'hexane-2,5-dione', 'phenol', 'butan-2-one', '2-methylcyclopent-2-en-1-one', 'tetradecanoic acid', '(e)-pent-2-enoic acid', '(e)-hexadec-9-enoic acid', '4,5-dimethylhex-1-ene', 'oxacycloheptadecan-2-one', 'decanoic acid'], name='height'), - data={ - 'A_1': [1147599.0, 493759.0, 339605.0, 0.0, 0.0, 0.0, 0.0, 15019.0, 0.0, 0.0, 0.0, 5705.0, 0.0], - 'A_2': [1138647.0, 461942.0, 324690.0, 0.0, 0.0, 0.0, 0.0, 14520.0, 0.0, 0.0, 0.0, 6739.0, 4259.0], - 'Ader_1': [501749.0, 484890.0, 180850.0, 0.0, 0.0, 0.0, 0.0, 18415.0, 0.0, 9132.0, 0.0, 0.0, 0.0], - 'Ader_2': [594688.0, 496504.0, 202599.0, 0.0, 0.0, 0.0, 0.0, 18373.0, 0.0, 8775.0, 0.0, 0.0, 0.0], - 'B_1': [0.0, 0.0, 0.0, 112797.0, 64421.0, 39393.0, 18515.0, 0.0, 12132.0, 0.0, 7194.0, 0.0, 0.0], - 'B_2': [0.0, 0.0, 0.0, 120142.0, 75153.0, 44551.0, 19823.0, 0.0, 12737.0, 0.0, 0.0, 0.0, 0.0], - } - ), - 'area': pd.DataFrame( - index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'hexane-2,5-dione', 'phenol', 'butan-2-one', '2-methylcyclopent-2-en-1-one', 'tetradecanoic acid', '(e)-pent-2-enoic acid', '(e)-hexadec-9-enoic acid', 'oxacycloheptadecan-2-one', '4,5-dimethylhex-1-ene', 'decanoic acid'], name='area'), - data={ - 'A_1': [6379752.0, 1878180.0, 1456119.0, 0.0, 0.0, 0.0, 0.0, 44389.0, 0.0, 0.0, 15068.0, 0.0, 0.0], - 'A_2': [6394708.0, 1656756.0, 1371069.0, 0.0, 0.0, 0.0, 0.0, 50650.0, 0.0, 0.0, 21294.0, 0.0, 10952.0], - 'Ader_1': [1724814.0, 1415205.0, 519476.0, 0.0, 0.0, 0.0, 0.0, 49508.0, 0.0, 27798.0, 0.0, 0.0, 0.0], - 'Ader_2': [1956560.0, 1402990.0, 605137.0, 0.0, 0.0, 0.0, 0.0, 53613.0, 0.0, 25213.0, 0.0, 0.0, 0.0], - 'B_1': [0.0, 0.0, 0.0, 441077.0, 200947.0, 147566.0, 69223.0, 0.0, 40376.0, 0.0, 0.0, 19522.0, 0.0], - 'B_2': [0.0, 0.0, 0.0, 472362.0, 228750.0, 181021.0, 64531.0, 0.0, 35791.0, 0.0, 0.0, 0.0, 0.0], - } - ), - 'area_if_undiluted': pd.DataFrame( - index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'oxacycloheptadecan-2-one', 'hexane-2,5-dione', 'decanoic acid', 'phenol', 'butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', '4,5-dimethylhex-1-ene'], name='area_if_undiluted'), - data={ - 'A_1': [159493800.0, 46954500.0, 36402975.0, 1109725.0, 0.0, 376700.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - 'A_2': [159867700.0, 41418900.0, 34276725.0, 1266250.0, 0.0, 532350.0, 0.0, 273800.0, 0.0, 0.0, 0.0, 0.0, 0.0], - 'Ader_1': [215601750.0, 176900625.0, 64934500.0, 6188500.0, 3474750.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - 'Ader_2': [244570000.0, 175373750.0, 75642125.0, 6701625.0, 3151625.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - 'B_1': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 441077.0, 0.0, 200947.0, 147566.0, 69223.0, 40376.0, 19522.0], - 'B_2': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 472362.0, 0.0, 228750.0, 181021.0, 64531.0, 35791.0, 0.0], - } - ), - 'conc_vial_mg_L': pd.DataFrame( - index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid', 'hexadecanoic acid', 'tetradecanoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one', '(e)-hexadec-9-enoic acid'], name='conc_vial_mg_L'), - data={ - 'A_1': [131.18800047103497, 113.61850020825628, 66.05436178187291, 23.581503644987627, 0.0, 0.0, 0.0, 0.0], - 'A_2': [125.38077898437679, 113.82730072166243, 61.11672684588226, 23.730782309318595, 22.78427785050836, 0.0, 0.0, 0.0], - 'Ader_1': [31.36776718294773, 21.669084708496513, 27.623189632994073, 0.600983241036704, 0.0, 0.0, 0.0, 2.5980281295127825], - 'Ader_2': [36.81298755438084, 24.27344499617392, 27.38149894239597, 0.6822063507301317, 0.0, 0.0, 0.0, 2.5689779135709925], - 'B_1': [0.0, 0.0, 0.0, 0.0, 0.0, 7.167230535550548, 6.243800844792131, 0.0], - 'B_2': [0.0, 0.0, 0.0, 0.0, 0.0, 7.884941445329839, 6.134683722446865, 0.0], - } - ), - 'conc_vial_if_undiluted_mg_L': pd.DataFrame( - index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', 'decanoic acid', '(e)-hexadec-9-enoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='conc_vial_if_undiluted_mg_L'), - data={ - 'A_1': [3279.7000117758744, 1651.3590445468228, 2840.462505206407, 589.5375911246907, 0.0, 0.0, 0.0, 0.0], - 'A_2': [3134.51947460942, 1527.9181711470565, 2845.682518041561, 593.2695577329649, 569.606946262709, 0.0, 0.0, 0.0], - 'Ader_1': [3920.970897868466, 3452.898704124259, 2708.635588562064, 75.12290512958799, 0.0, 324.7535161890978, 0.0, 0.0], - 'Ader_2': [4601.623444297605, 3422.6873677994963, 3034.1806245217404, 85.27579384126646, 0.0, 321.12223919637404, 0.0, 0.0], - 'B_1': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.167230535550548, 6.243800844792131], - 'B_2': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.884941445329839, 6.134683722446865], - } - ), - 'fraction_of_sample_fr': pd.DataFrame( - index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', 'decanoic acid', '(e)-hexadec-9-enoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='fraction_of_sample_fr'), - data={ - 'A_1': [0.23426428655541953, 0.11795421746763018, 0.20289017894331474, 0.042109827937477896, 0.0, 0.0, 0.0, 0.0], - 'A_2': [0.2238942481863871, 0.10913701222478973, 0.20326303700296858, 0.04237639698092605, 0.04068621044733635, 0.0, 0.0, 0.0], - 'Ader_1': [0.2800693498477476, 0.24663562172316136, 0.193473970611576, 0.005365921794970571, 0.0, 0.023196679727792702, 0.0, 0.0], - 'Ader_2': [0.3286873888784004, 0.24447766912853547, 0.21672718746583858, 0.006091128131519033, 0.0, 0.022937302799741006, 0.0, 0.0], - 'B_1': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0025597251912680527, 0.0022299288731400468], - 'B_2': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.002816050516189228, 0.0021909584723024517], - } - ), - 'fraction_of_feedstock_fr': pd.DataFrame( - index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', 'decanoic acid', '(e)-hexadec-9-enoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='fraction_of_feedstock_fr'), - data={ - 'A_1': [0.10541892894993879, 0.053079397860433586, 0.09130058052449164, 0.018949422571865052, 0.0, 0.0, 0.0, 0.0], - 'A_2': [0.10299135416573807, 0.05020302562340328, 0.09350099702136555, 0.019493142611225985, 0.018715656805774722, 0.0, 0.0, 0.0], - 'Ader_1': [0.13163259442844139, 0.11591874220988584, 0.09093276618744071, 0.0025219832436361683, 0.0, 0.01090243947206257, 0.0, 0.0], - 'Ader_2': [0.1577699466616322, 0.11734928118169702, 0.10402904998360252, 0.0029237415031291357, 0.0, 0.011009905343875682, 0.0, 0.0], - 'B_1': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0012542653437213457, 0.001092665147838623], - 'B_2': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.001408025258094614, 0.0010954792361512259], - } - ), +# @pytest.fixture +# def checked_files_param_reports(): +# reports = { +# 'height': pd.DataFrame( +# index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'hexane-2,5-dione', 'phenol', 'butan-2-one', '2-methylcyclopent-2-en-1-one', 'tetradecanoic acid', '(e)-pent-2-enoic acid', '(e)-hexadec-9-enoic acid', '4,5-dimethylhex-1-ene', 'oxacycloheptadecan-2-one', 'decanoic acid'], name='height'), +# data={ +# 'A_1': [1147599.0, 493759.0, 339605.0, 0.0, 0.0, 0.0, 0.0, 15019.0, 0.0, 0.0, 0.0, 5705.0, 0.0], +# 'A_2': [1138647.0, 461942.0, 324690.0, 0.0, 0.0, 0.0, 0.0, 14520.0, 0.0, 0.0, 0.0, 6739.0, 4259.0], +# 'Ader_1': [501749.0, 484890.0, 180850.0, 0.0, 0.0, 0.0, 0.0, 18415.0, 0.0, 9132.0, 0.0, 0.0, 0.0], +# 'Ader_2': [594688.0, 496504.0, 202599.0, 0.0, 0.0, 0.0, 0.0, 18373.0, 0.0, 8775.0, 0.0, 0.0, 0.0], +# 'B_1': [0.0, 0.0, 0.0, 112797.0, 64421.0, 39393.0, 18515.0, 0.0, 12132.0, 0.0, 7194.0, 0.0, 0.0], +# 'B_2': [0.0, 0.0, 0.0, 120142.0, 75153.0, 44551.0, 19823.0, 0.0, 12737.0, 0.0, 0.0, 0.0, 0.0], +# } +# ), +# 'area': pd.DataFrame( +# index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'hexane-2,5-dione', 'phenol', 'butan-2-one', '2-methylcyclopent-2-en-1-one', 'tetradecanoic acid', '(e)-pent-2-enoic acid', '(e)-hexadec-9-enoic acid', 'oxacycloheptadecan-2-one', '4,5-dimethylhex-1-ene', 'decanoic acid'], name='area'), +# data={ +# 'A_1': [6379752.0, 1878180.0, 1456119.0, 0.0, 0.0, 0.0, 0.0, 44389.0, 0.0, 0.0, 15068.0, 0.0, 0.0], +# 'A_2': [6394708.0, 1656756.0, 1371069.0, 0.0, 0.0, 0.0, 0.0, 50650.0, 0.0, 0.0, 21294.0, 0.0, 10952.0], +# 'Ader_1': [1724814.0, 1415205.0, 519476.0, 0.0, 0.0, 0.0, 0.0, 49508.0, 0.0, 27798.0, 0.0, 0.0, 0.0], +# 'Ader_2': [1956560.0, 1402990.0, 605137.0, 0.0, 0.0, 0.0, 0.0, 53613.0, 0.0, 25213.0, 0.0, 0.0, 0.0], +# 'B_1': [0.0, 0.0, 0.0, 441077.0, 200947.0, 147566.0, 69223.0, 0.0, 40376.0, 0.0, 0.0, 19522.0, 0.0], +# 'B_2': [0.0, 0.0, 0.0, 472362.0, 228750.0, 181021.0, 64531.0, 0.0, 35791.0, 0.0, 0.0, 0.0, 0.0], +# } +# ), +# 'area_if_undiluted': pd.DataFrame( +# index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'oxacycloheptadecan-2-one', 'hexane-2,5-dione', 'decanoic acid', 'phenol', 'butan-2-one', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', '4,5-dimethylhex-1-ene'], name='area_if_undiluted'), +# data={ +# 'A_1': [159493800.0, 46954500.0, 36402975.0, 1109725.0, 0.0, 376700.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], +# 'A_2': [159867700.0, 41418900.0, 34276725.0, 1266250.0, 0.0, 532350.0, 0.0, 273800.0, 0.0, 0.0, 0.0, 0.0, 0.0], +# 'Ader_1': [215601750.0, 176900625.0, 64934500.0, 6188500.0, 3474750.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], +# 'Ader_2': [244570000.0, 175373750.0, 75642125.0, 6701625.0, 3151625.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], +# 'B_1': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 441077.0, 0.0, 200947.0, 147566.0, 69223.0, 40376.0, 19522.0], +# 'B_2': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 472362.0, 0.0, 228750.0, 181021.0, 64531.0, 35791.0, 0.0], +# } +# ), +# 'conc_vial_mg_L': pd.DataFrame( +# index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid', 'hexadecanoic acid', 'tetradecanoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one', '(e)-hexadec-9-enoic acid'], name='conc_vial_mg_L'), +# data={ +# 'A_1': [131.18800047103497, 113.61850020825628, 66.05436178187291, 23.581503644987627, 0.0, 0.0, 0.0, 0.0], +# 'A_2': [125.38077898437679, 113.82730072166243, 61.11672684588226, 23.730782309318595, 22.78427785050836, 0.0, 0.0, 0.0], +# 'Ader_1': [31.36776718294773, 21.669084708496513, 27.623189632994073, 0.600983241036704, 0.0, 0.0, 0.0, 2.5980281295127825], +# 'Ader_2': [36.81298755438084, 24.27344499617392, 27.38149894239597, 0.6822063507301317, 0.0, 0.0, 0.0, 2.5689779135709925], +# 'B_1': [0.0, 0.0, 0.0, 0.0, 0.0, 7.167230535550548, 6.243800844792131, 0.0], +# 'B_2': [0.0, 0.0, 0.0, 0.0, 0.0, 7.884941445329839, 6.134683722446865, 0.0], +# } +# ), +# 'conc_vial_if_undiluted_mg_L': pd.DataFrame( +# index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', 'decanoic acid', '(e)-hexadec-9-enoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='conc_vial_if_undiluted_mg_L'), +# data={ +# 'A_1': [3279.7000117758744, 1651.3590445468228, 2840.462505206407, 589.5375911246907, 0.0, 0.0, 0.0, 0.0], +# 'A_2': [3134.51947460942, 1527.9181711470565, 2845.682518041561, 593.2695577329649, 569.606946262709, 0.0, 0.0, 0.0], +# 'Ader_1': [3920.970897868466, 3452.898704124259, 2708.635588562064, 75.12290512958799, 0.0, 324.7535161890978, 0.0, 0.0], +# 'Ader_2': [4601.623444297605, 3422.6873677994963, 3034.1806245217404, 85.27579384126646, 0.0, 321.12223919637404, 0.0, 0.0], +# 'B_1': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.167230535550548, 6.243800844792131], +# 'B_2': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.884941445329839, 6.134683722446865], +# } +# ), +# 'fraction_of_sample_fr': pd.DataFrame( +# index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', 'decanoic acid', '(e)-hexadec-9-enoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='fraction_of_sample_fr'), +# data={ +# 'A_1': [0.23426428655541953, 0.11795421746763018, 0.20289017894331474, 0.042109827937477896, 0.0, 0.0, 0.0, 0.0], +# 'A_2': [0.2238942481863871, 0.10913701222478973, 0.20326303700296858, 0.04237639698092605, 0.04068621044733635, 0.0, 0.0, 0.0], +# 'Ader_1': [0.2800693498477476, 0.24663562172316136, 0.193473970611576, 0.005365921794970571, 0.0, 0.023196679727792702, 0.0, 0.0], +# 'Ader_2': [0.3286873888784004, 0.24447766912853547, 0.21672718746583858, 0.006091128131519033, 0.0, 0.022937302799741006, 0.0, 0.0], +# 'B_1': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0025597251912680527, 0.0022299288731400468], +# 'B_2': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.002816050516189228, 0.0021909584723024517], +# } +# ), +# 'fraction_of_feedstock_fr': pd.DataFrame( +# index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', 'decanoic acid', '(e)-hexadec-9-enoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='fraction_of_feedstock_fr'), +# data={ +# 'A_1': [0.10541892894993879, 0.053079397860433586, 0.09130058052449164, 0.018949422571865052, 0.0, 0.0, 0.0, 0.0], +# 'A_2': [0.10299135416573807, 0.05020302562340328, 0.09350099702136555, 0.019493142611225985, 0.018715656805774722, 0.0, 0.0, 0.0], +# 'Ader_1': [0.13163259442844139, 0.11591874220988584, 0.09093276618744071, 0.0025219832436361683, 0.0, 0.01090243947206257, 0.0, 0.0], +# 'Ader_2': [0.1577699466616322, 0.11734928118169702, 0.10402904998360252, 0.0029237415031291357, 0.0, 0.011009905343875682, 0.0, 0.0], +# 'B_1': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0012542653437213457, 0.001092665147838623], +# 'B_2': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.001408025258094614, 0.0010954792361512259], +# } +# ), - } - return reports +# } +# return reports -@pytest.fixture -def checked_files_param_aggrreps(): - reports = { - 'height': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'alcohol', 'ester'], name='height'), - data={ - 'A_1': [1673299.018636137, 327039.28314786457, 0.0, 0.0, 0.0, 1301.4243936952162], - 'A_2': [1630562.5435198732, 318647.09040130535, 0.0, 0.0, 0.0, 1537.300436303604], - 'Ader_1': [995669.7397917995, 199362.50083044838, 0.0, 0.0, 0.0, 0.0], - 'Ader_2': [1101297.4487493301, 219631.74381979596, 0.0, 0.0, 0.0, 0.0], - 'B_1': [30189.53968239826, 5454.916540151818, 154971.12017291295, 52781.27325470194, 11641.780331526938, 0.0], - 'B_2': [24976.13637228884, 5726.94295844986, 167175.26975375, 61574.19209435766, 13581.203602167678, 0.0], - } - ), - 'area': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'alcohol', 'ester'], name='area'), - data={ - 'A_1': [8180808.865926539, 1588883.8460032765, 0.0, 0.0, 0.0, 3437.311615109468], - 'A_2': [7957332.218705356, 1542835.8875348074, 0.0, 0.0, 0.0, 4857.58650996423], - 'Ader_1': [3115375.5519706816, 621383.3360462904, 0.0, 0.0, 0.0, 0.0], - 'Ader_2': [3373187.7167576416, 670272.3970037951, 0.0, 0.0, 0.0, 0.0], - 'B_1': [102813.63412565118, 18154.278785457453, 598982.0949258526, 164639.4578897035, 36313.94781638508, 0.0], - 'B_2': [85688.75337144051, 16092.723202157411, 653960.780006639, 187418.9512272872, 41338.34077143768, 0.0], - } - ), - 'area_if_undiluted': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'ester', 'alcohol'], name='area_if_undiluted'), - data={ - 'A_1': [204520221.64816347, 39722096.15008192, 0.0, 0.0, 85932.7903777367, 0.0], - 'A_2': [198933305.4676339, 38570897.18837019, 0.0, 0.0, 121439.66274910574, 0.0], - 'Ader_1': [389421943.99633527, 77672917.00578631, 0.0, 0.0, 0.0, 0.0], - 'Ader_2': [421648464.59470516, 83784049.6254744, 0.0, 0.0, 0.0, 0.0], - 'B_1': [102813.63412565118, 18154.278785457453, 598982.0949258526, 164639.4578897035, 0.0, 36313.94781638508], - 'B_2': [85688.75337144051, 16092.723202157411, 653960.780006639, 187418.9512272872, 0.0, 41338.34077143768], - } - ), - 'conc_vial_mg_L': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='conc_vial_mg_L'), - data={ - 'A_1': [279.04506020687086, 55.41196015223182, 0.0, 0.0, 0.0], - 'A_2': [287.2245498713612, 59.62973999202588, 0.0, 0.0, 0.0], - 'Ader_1': [69.94687725385289, 13.916672123312216, 0.0, 0.0, 0.0], - 'Ader_2': [76.54999067692155, 15.17432861960896, 0.0, 0.0, 0.0], - 'B_1': [2.7332298278341587, 0.0, 5.872239694763155, 3.706919814979471, 1.2952193148242288], - 'B_2': [2.6854637025308077, 0.0, 6.460273032447163, 3.6421374119160257, 1.424919765813671], - } - ), - 'conc_vial_if_undiluted_mg_L': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='conc_vial_if_undiluted_mg_L'), - data={ - 'A_1': [6976.126505171772, 1385.2990038057956, 0.0, 0.0, 0.0], - 'A_2': [7180.61374678403, 1490.7434998006472, 0.0, 0.0, 0.0], - 'Ader_1': [8743.35965673161, 1739.5840154140271, 0.0, 0.0, 0.0], - 'Ader_2': [9568.748834615195, 1896.79107745112, 0.0, 0.0, 0.0], - 'B_1': [2.7332298278341587, 0.0, 5.872239694763155, 3.706919814979471, 1.2952193148242288], - 'B_2': [2.6854637025308077, 0.0, 6.460273032447163, 3.6421374119160257, 1.424919765813671], - } - ), - 'fraction_of_sample_fr': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='fraction_of_sample_fr'), - data={ - 'A_1': [0.4982947503694122, 0.09894992884327108, 0.0, 0.0, 0.0], - 'A_2': [0.5129009819131448, 0.10648167855718907, 0.0, 0.0, 0.0], - 'Ader_1': [0.6245256897665437, 0.12425600110100192, 0.0, 0.0, 0.0], - 'Ader_2': [0.6834820596153709, 0.1354850769607943, 0.0, 0.0, 0.0], - 'B_1': [0.0009761535099407709, 0.0, 0.0020972284624154124, 0.0013238999339212397, 0.00046257832672293885], - 'B_2': [0.0009590941794752884, 0.0, 0.0023072403687311297, 0.0013007633613985805, 0.0005088999163620254], - } - ), - 'fraction_of_feedstock_fr': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='fraction_of_feedstock_fr'), - data={ - 'A_1': [0.22423263766623547, 0.04452746797947199, 0.0, 0.0, 0.0], - 'A_2': [0.23593445168004665, 0.04898157213630697, 0.0, 0.0, 0.0], - 'Ader_1': [0.29352707419027546, 0.058400320517470905, 0.0, 0.0, 0.0], - 'Ader_2': [0.32807138861537805, 0.06503283694118125, 0.0, 0.0, 0.0], - 'B_1': [0.00047831521987097775, 0.0, 0.001027641946583552, 0.0006487109676214074, 0.00022666338009424], - 'B_2': [0.0004795470897376442, 0.0, 0.0011536201843655649, 0.0006503816806992902, 0.0002544499581810127], - } - ) +# @pytest.fixture +# def checked_files_param_aggrreps(): +# reports = { +# 'height': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'alcohol', 'ester'], name='height'), +# data={ +# 'A_1': [1673299.018636137, 327039.28314786457, 0.0, 0.0, 0.0, 1301.4243936952162], +# 'A_2': [1630562.5435198732, 318647.09040130535, 0.0, 0.0, 0.0, 1537.300436303604], +# 'Ader_1': [995669.7397917995, 199362.50083044838, 0.0, 0.0, 0.0, 0.0], +# 'Ader_2': [1101297.4487493301, 219631.74381979596, 0.0, 0.0, 0.0, 0.0], +# 'B_1': [30189.53968239826, 5454.916540151818, 154971.12017291295, 52781.27325470194, 11641.780331526938, 0.0], +# 'B_2': [24976.13637228884, 5726.94295844986, 167175.26975375, 61574.19209435766, 13581.203602167678, 0.0], +# } +# ), +# 'area': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'alcohol', 'ester'], name='area'), +# data={ +# 'A_1': [8180808.865926539, 1588883.8460032765, 0.0, 0.0, 0.0, 3437.311615109468], +# 'A_2': [7957332.218705356, 1542835.8875348074, 0.0, 0.0, 0.0, 4857.58650996423], +# 'Ader_1': [3115375.5519706816, 621383.3360462904, 0.0, 0.0, 0.0, 0.0], +# 'Ader_2': [3373187.7167576416, 670272.3970037951, 0.0, 0.0, 0.0, 0.0], +# 'B_1': [102813.63412565118, 18154.278785457453, 598982.0949258526, 164639.4578897035, 36313.94781638508, 0.0], +# 'B_2': [85688.75337144051, 16092.723202157411, 653960.780006639, 187418.9512272872, 41338.34077143768, 0.0], +# } +# ), +# 'area_if_undiluted': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'ester', 'alcohol'], name='area_if_undiluted'), +# data={ +# 'A_1': [204520221.64816347, 39722096.15008192, 0.0, 0.0, 85932.7903777367, 0.0], +# 'A_2': [198933305.4676339, 38570897.18837019, 0.0, 0.0, 121439.66274910574, 0.0], +# 'Ader_1': [389421943.99633527, 77672917.00578631, 0.0, 0.0, 0.0, 0.0], +# 'Ader_2': [421648464.59470516, 83784049.6254744, 0.0, 0.0, 0.0, 0.0], +# 'B_1': [102813.63412565118, 18154.278785457453, 598982.0949258526, 164639.4578897035, 0.0, 36313.94781638508], +# 'B_2': [85688.75337144051, 16092.723202157411, 653960.780006639, 187418.9512272872, 0.0, 41338.34077143768], +# } +# ), +# 'conc_vial_mg_L': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='conc_vial_mg_L'), +# data={ +# 'A_1': [279.04506020687086, 55.41196015223182, 0.0, 0.0, 0.0], +# 'A_2': [287.2245498713612, 59.62973999202588, 0.0, 0.0, 0.0], +# 'Ader_1': [69.94687725385289, 13.916672123312216, 0.0, 0.0, 0.0], +# 'Ader_2': [76.54999067692155, 15.17432861960896, 0.0, 0.0, 0.0], +# 'B_1': [2.7332298278341587, 0.0, 5.872239694763155, 3.706919814979471, 1.2952193148242288], +# 'B_2': [2.6854637025308077, 0.0, 6.460273032447163, 3.6421374119160257, 1.424919765813671], +# } +# ), +# 'conc_vial_if_undiluted_mg_L': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='conc_vial_if_undiluted_mg_L'), +# data={ +# 'A_1': [6976.126505171772, 1385.2990038057956, 0.0, 0.0, 0.0], +# 'A_2': [7180.61374678403, 1490.7434998006472, 0.0, 0.0, 0.0], +# 'Ader_1': [8743.35965673161, 1739.5840154140271, 0.0, 0.0, 0.0], +# 'Ader_2': [9568.748834615195, 1896.79107745112, 0.0, 0.0, 0.0], +# 'B_1': [2.7332298278341587, 0.0, 5.872239694763155, 3.706919814979471, 1.2952193148242288], +# 'B_2': [2.6854637025308077, 0.0, 6.460273032447163, 3.6421374119160257, 1.424919765813671], +# } +# ), +# 'fraction_of_sample_fr': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='fraction_of_sample_fr'), +# data={ +# 'A_1': [0.4982947503694122, 0.09894992884327108, 0.0, 0.0, 0.0], +# 'A_2': [0.5129009819131448, 0.10648167855718907, 0.0, 0.0, 0.0], +# 'Ader_1': [0.6245256897665437, 0.12425600110100192, 0.0, 0.0, 0.0], +# 'Ader_2': [0.6834820596153709, 0.1354850769607943, 0.0, 0.0, 0.0], +# 'B_1': [0.0009761535099407709, 0.0, 0.0020972284624154124, 0.0013238999339212397, 0.00046257832672293885], +# 'B_2': [0.0009590941794752884, 0.0, 0.0023072403687311297, 0.0013007633613985805, 0.0005088999163620254], +# } +# ), +# 'fraction_of_feedstock_fr': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='fraction_of_feedstock_fr'), +# data={ +# 'A_1': [0.22423263766623547, 0.04452746797947199, 0.0, 0.0, 0.0], +# 'A_2': [0.23593445168004665, 0.04898157213630697, 0.0, 0.0, 0.0], +# 'Ader_1': [0.29352707419027546, 0.058400320517470905, 0.0, 0.0, 0.0], +# 'Ader_2': [0.32807138861537805, 0.06503283694118125, 0.0, 0.0, 0.0], +# 'B_1': [0.00047831521987097775, 0.0, 0.001027641946583552, 0.0006487109676214074, 0.00022666338009424], +# 'B_2': [0.0004795470897376442, 0.0, 0.0011536201843655649, 0.0006503816806992902, 0.0002544499581810127], +# } +# ) - } - return reports +# } +# return reports -@pytest.fixture -def checked_samples_param_reports(): - reports = { - 'height': pd.DataFrame( - index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'hexane-2,5-dione', 'phenol', 'butan-2-one', '2-methylcyclopent-2-en-1-one', 'tetradecanoic acid', '(e)-pent-2-enoic acid', '(e)-hexadec-9-enoic acid', 'oxacycloheptadecan-2-one', '4,5-dimethylhex-1-ene', 'decanoic acid'], name='height'), - data={ - 'A': [1143123.0, 477850.5, 332147.5, 0.0, 0.0, 0.0, 0.0, 14769.5, 0.0, 0.0, 6222.0, 0.0, 2129.5], - 'Ader': [548218.5, 490697.0, 191724.5, 0.0, 0.0, 0.0, 0.0, 18394.0, 0.0, 8953.5, 0.0, 0.0, 0.0], - 'B': [0.0, 0.0, 0.0, 116469.5, 69787.0, 41972.0, 19169.0, 0.0, 12434.5, 0.0, 0.0, 3597.0, 0.0], - } - ), - 'area': pd.DataFrame( - index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'hexane-2,5-dione', 'phenol', 'butan-2-one', '2-methylcyclopent-2-en-1-one', 'tetradecanoic acid', '(e)-pent-2-enoic acid', '(e)-hexadec-9-enoic acid', 'oxacycloheptadecan-2-one', '4,5-dimethylhex-1-ene', 'decanoic acid'], name='area'), - data={ - 'A': [6387230.0, 1767468.0, 1413594.0, 0.0, 0.0, 0.0, 0.0, 47519.5, 0.0, 0.0, 18181.0, 0.0, 5476.0], - 'Ader': [1840687.0, 1409097.5, 562306.5, 0.0, 0.0, 0.0, 0.0, 51560.5, 0.0, 26505.5, 0.0, 0.0, 0.0], - 'B': [0.0, 0.0, 0.0, 456719.5, 214848.5, 164293.5, 66877.0, 0.0, 38083.5, 0.0, 0.0, 9761.0, 0.0], - } - ), - 'area_if_undiluted': pd.DataFrame( - index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexane-2,5-dione', 'oxacycloheptadecan-2-one', 'phenol', 'butan-2-one', 'decanoic acid', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', '4,5-dimethylhex-1-ene'], name='area_if_undiluted'), - data={ - 'A': [159680750.0, 44186700.0, 35339850.0, 1187987.5, 0.0, 0.0, 454525.0, 0.0, 0.0, 136900.0, 0.0, 0.0, 0.0], - 'Ader': [230085875.0, 176137187.5, 70288312.5, 6445062.5, 3313187.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - 'B': [0.0, 0.0, 0.0, 0.0, 0.0, 456719.5, 0.0, 214848.5, 164293.5, 0.0, 66877.0, 38083.5, 9761.0], - } - ), - 'conc_vial_mg_L': pd.DataFrame( - index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid', 'hexadecanoic acid', 'tetradecanoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one', '(e)-hexadec-9-enoic acid'], name='conc_vial_mg_L'), - data={ - 'A': [128.28438972770587, 113.72290046495935, 63.58554431387759, 23.65614297715311, 11.39213892525418, 0.0, 0.0, 0.0], - 'Ader': [34.090377368664285, 22.971264852335217, 27.502344287695024, 0.6415947958834178, 0.0, 0.0, 0.0, 2.5835030215418877], - 'B': [0.0, 0.0, 0.0, 0.0, 0.0, 7.526085990440194, 6.189242283619498, 0.0], - } - ), - 'conc_vial_if_undiluted_mg_L': pd.DataFrame( - index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='conc_vial_if_undiluted_mg_L'), - data={ - 'A': [3207.1097431926473, 1589.6386078469395, 2843.0725116239837, 591.4035744288278, 0.0, 284.8034731313545, 0.0, 0.0], - 'Ader': [4261.297171083035, 3437.7930359618776, 2871.4081065419023, 80.19934948542723, 322.9378776927359, 0.0, 0.0, 0.0], - 'B': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.526085990440194, 6.189242283619498], - } - ), - 'fraction_of_sample_fr': pd.DataFrame( - index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='fraction_of_sample_fr'), - data={ - 'A': [0.2290792673709033, 0.11354561484620995, 0.20307660797314164, 0.042243112459201974, 0.0, 0.020343105223668174, 0.0, 0.0], - 'Ader': [0.304378369363074, 0.24555664542584843, 0.2051005790387073, 0.005728524963244802, 0.023066991263766854, 0.0, 0.0, 0.0], - 'B': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0026878878537286406, 0.0022104436727212492], - } - ), - 'fraction_of_feedstock_fr': pd.DataFrame( - index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='fraction_of_feedstock_fr'), - data={ - 'A': [0.10420514155783843, 0.051641211741918436, 0.0924007887729286, 0.01922128259154552, 0.0, 0.009357828402887361, 0.0, 0.0], - 'Ader': [0.1447012705450368, 0.11663401169579143, 0.09748090808552162, 0.002722862373382652, 0.010956172407969126, 0.0, 0.0, 0.0], - 'B': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.00133114530090798, 0.0010940721919949245], - } - ) - } - return reports +# @pytest.fixture +# def checked_samples_param_reports(): +# reports = { +# 'height': pd.DataFrame( +# index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'hexane-2,5-dione', 'phenol', 'butan-2-one', '2-methylcyclopent-2-en-1-one', 'tetradecanoic acid', '(e)-pent-2-enoic acid', '(e)-hexadec-9-enoic acid', 'oxacycloheptadecan-2-one', '4,5-dimethylhex-1-ene', 'decanoic acid'], name='height'), +# data={ +# 'A': [1143123.0, 477850.5, 332147.5, 0.0, 0.0, 0.0, 0.0, 14769.5, 0.0, 0.0, 6222.0, 0.0, 2129.5], +# 'Ader': [548218.5, 490697.0, 191724.5, 0.0, 0.0, 0.0, 0.0, 18394.0, 0.0, 8953.5, 0.0, 0.0, 0.0], +# 'B': [0.0, 0.0, 0.0, 116469.5, 69787.0, 41972.0, 19169.0, 0.0, 12434.5, 0.0, 0.0, 3597.0, 0.0], +# } +# ), +# 'area': pd.DataFrame( +# index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'hexane-2,5-dione', 'phenol', 'butan-2-one', '2-methylcyclopent-2-en-1-one', 'tetradecanoic acid', '(e)-pent-2-enoic acid', '(e)-hexadec-9-enoic acid', 'oxacycloheptadecan-2-one', '4,5-dimethylhex-1-ene', 'decanoic acid'], name='area'), +# data={ +# 'A': [6387230.0, 1767468.0, 1413594.0, 0.0, 0.0, 0.0, 0.0, 47519.5, 0.0, 0.0, 18181.0, 0.0, 5476.0], +# 'Ader': [1840687.0, 1409097.5, 562306.5, 0.0, 0.0, 0.0, 0.0, 51560.5, 0.0, 26505.5, 0.0, 0.0, 0.0], +# 'B': [0.0, 0.0, 0.0, 456719.5, 214848.5, 164293.5, 66877.0, 0.0, 38083.5, 0.0, 0.0, 9761.0, 0.0], +# } +# ), +# 'area_if_undiluted': pd.DataFrame( +# index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexane-2,5-dione', 'oxacycloheptadecan-2-one', 'phenol', 'butan-2-one', 'decanoic acid', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', '4,5-dimethylhex-1-ene'], name='area_if_undiluted'), +# data={ +# 'A': [159680750.0, 44186700.0, 35339850.0, 1187987.5, 0.0, 0.0, 454525.0, 0.0, 0.0, 136900.0, 0.0, 0.0, 0.0], +# 'Ader': [230085875.0, 176137187.5, 70288312.5, 6445062.5, 3313187.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], +# 'B': [0.0, 0.0, 0.0, 0.0, 0.0, 456719.5, 0.0, 214848.5, 164293.5, 0.0, 66877.0, 38083.5, 9761.0], +# } +# ), +# 'conc_vial_mg_L': pd.DataFrame( +# index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid', 'hexadecanoic acid', 'tetradecanoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one', '(e)-hexadec-9-enoic acid'], name='conc_vial_mg_L'), +# data={ +# 'A': [128.28438972770587, 113.72290046495935, 63.58554431387759, 23.65614297715311, 11.39213892525418, 0.0, 0.0, 0.0], +# 'Ader': [34.090377368664285, 22.971264852335217, 27.502344287695024, 0.6415947958834178, 0.0, 0.0, 0.0, 2.5835030215418877], +# 'B': [0.0, 0.0, 0.0, 0.0, 0.0, 7.526085990440194, 6.189242283619498, 0.0], +# } +# ), +# 'conc_vial_if_undiluted_mg_L': pd.DataFrame( +# index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='conc_vial_if_undiluted_mg_L'), +# data={ +# 'A': [3207.1097431926473, 1589.6386078469395, 2843.0725116239837, 591.4035744288278, 0.0, 284.8034731313545, 0.0, 0.0], +# 'Ader': [4261.297171083035, 3437.7930359618776, 2871.4081065419023, 80.19934948542723, 322.9378776927359, 0.0, 0.0, 0.0], +# 'B': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.526085990440194, 6.189242283619498], +# } +# ), +# 'fraction_of_sample_fr': pd.DataFrame( +# index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='fraction_of_sample_fr'), +# data={ +# 'A': [0.2290792673709033, 0.11354561484620995, 0.20307660797314164, 0.042243112459201974, 0.0, 0.020343105223668174, 0.0, 0.0], +# 'Ader': [0.304378369363074, 0.24555664542584843, 0.2051005790387073, 0.005728524963244802, 0.023066991263766854, 0.0, 0.0, 0.0], +# 'B': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0026878878537286406, 0.0022104436727212492], +# } +# ), +# 'fraction_of_feedstock_fr': pd.DataFrame( +# index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='fraction_of_feedstock_fr'), +# data={ +# 'A': [0.10420514155783843, 0.051641211741918436, 0.0924007887729286, 0.01922128259154552, 0.0, 0.009357828402887361, 0.0, 0.0], +# 'Ader': [0.1447012705450368, 0.11663401169579143, 0.09748090808552162, 0.002722862373382652, 0.010956172407969126, 0.0, 0.0, 0.0], +# 'B': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.00133114530090798, 0.0010940721919949245], +# } +# ) +# } +# return reports -@pytest.fixture -def checked_samples_param_reports_std(): - reports = { - 'height': pd.DataFrame( - index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'hexane-2,5-dione', 'phenol', 'butan-2-one', '2-methylcyclopent-2-en-1-one', 'tetradecanoic acid', '(e)-pent-2-enoic acid', '(e)-hexadec-9-enoic acid', 'oxacycloheptadecan-2-one', '4,5-dimethylhex-1-ene', 'decanoic acid'], name='height'), - data={ - 'A': [6330.019905181974, 22498.01645701238, 10546.497641397356, np.nan, np.nan, np.nan, np.nan, 352.8462838120872, np.nan, np.nan, 731.1484117468901, np.nan, 3011.567781073506], - 'Ader': [65717.79713669654, 8212.338156700564, 15378.865384026221, np.nan, np.nan, np.nan, np.nan, 29.698484809834994, np.nan, 252.43712088359746, np.nan, np.nan, np.nan], - 'B': [np.nan, np.nan, np.nan, 5193.699307815192, 7588.669975694028, 3647.2567773602123, 924.8956697920041, np.nan, 427.79960261786124, np.nan, np.nan, 5086.926183856023, np.nan], - } - ), - 'area': pd.DataFrame( - index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'hexane-2,5-dione', 'phenol', 'butan-2-one', '2-methylcyclopent-2-en-1-one', 'tetradecanoic acid', '(e)-pent-2-enoic acid', '(e)-hexadec-9-enoic acid', 'oxacycloheptadecan-2-one', '4,5-dimethylhex-1-ene', 'decanoic acid'], name='area'), - data={ - 'A': [10575.489019426004, 156570.4119174501, 60139.43173991587, np.nan, np.nan, np.nan, np.nan, 4427.195557008974, np.nan, np.nan, 4402.446819667445, np.nan, 7744.233467555068], - 'Ader': [163869.16811285765, 8637.309332193678, 60571.47398322085, np.nan, np.nan, np.nan, np.nan, 2902.6733367707775, np.nan, 1827.8710293672254, np.nan, np.nan, np.nan], - 'B': [np.nan, np.nan, np.nan, 22121.83564942114, 19659.68983732958, 23656.25736459595, 3317.745017327281, np.nan, 3242.0845917403203, np.nan, np.nan, 13804.138582323782, np.nan], - } - ), - 'area_if_undiluted': pd.DataFrame( - index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexane-2,5-dione', 'oxacycloheptadecan-2-one', 'phenol', 'butan-2-one', 'decanoic acid', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', '4,5-dimethylhex-1-ene'], name='area_if_undiluted'), - data={ - 'A': [264387.2254856501, 3914260.2979362523, 1503485.7934978968, 110679.88892522435, np.nan, np.nan, 110061.17049168612, np.nan, np.nan, 193605.8366888767, np.nan, np.nan, np.nan], - 'Ader': [20483646.014107205, 1079663.6665242098, 7571434.247902606, 362834.1670963472, 228483.87867090316, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], - 'B': [np.nan, np.nan, np.nan, np.nan, np.nan, 22121.83564942114, np.nan, 19659.68983732958, 23656.25736459595, np.nan, 3317.745017327281, 3242.0845917403203, 13804.138582323782], - } - ), - 'conc_vial_mg_L': pd.DataFrame( - index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid', 'hexadecanoic acid', 'tetradecanoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one', '(e)-hexadec-9-enoic acid'], name='conc_vial_mg_L'), - data={ - 'A': [4.106325693068217, 0.14764425894471855, 3.4914351462625968, 0.10555595583489899, 16.110917372532917, np.nan, np.nan, np.nan], - 'Ader': [3.8503522496954825, 1.8415608200696434, 0.1709011262715786, 0.05743341165328154, np.nan, np.nan, np.nan, 0.02054160468737343], - 'B': [np.nan, np.nan, np.nan, np.nan, np.nan, 0.5074982512365029, 0.07715745715389961, np.nan], - } - ), - 'conc_vial_if_undiluted_mg_L': pd.DataFrame( - index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='conc_vial_if_undiluted_mg_L'), - data={ - 'A': [102.65814232670576, 87.28587865656482, 3.691106473618185, 2.638898895872451, np.nan, 402.77293431332293, np.nan, np.nan], - 'Ader': [481.29403121193536, 21.362640783947153, 230.19510250870547, 7.179176456660192, 2.5677005859216706, np.nan, np.nan, np.nan], - 'B': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 0.5074982512365029, 0.07715745715389961], - } - ), - 'fraction_of_sample_fr': pd.DataFrame( - index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='fraction_of_sample_fr'), - data={ - 'A': [0.007332724451907525, 0.006234705618326057, 0.00026365046240129915, 0.00018849277827660252, np.nan, 0.028769495308094487, np.nan, np.nan], - 'Ader': [0.03437814508656681, 0.001525902913139085, 0.01644250732205038, 0.0005127983183328709, 0.00018340718470868995, np.nan, np.nan, np.nan], - 'B': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 0.00018124937544160814, 2.7556234697821363e-05], - } - ), - 'fraction_of_feedstock_fr': pd.DataFrame( - index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='fraction_of_feedstock_fr'), - data={ - 'A': [0.001716554591745799, 0.002033902314020852, 0.001555929426374292, 0.0003844681268991305, np.nan, 0.013233967841723464, np.nan, np.nan], - 'Ader': [0.01848189900635057, 0.0010115438077193133, 0.009260471080609506, 0.00028408598968518184, 7.598984670517598e-05, np.nan, np.nan, np.nan], - 'B': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 0.00010872467812800095, 1.9898609286992866e-06], - } - ) - } - return reports +# @pytest.fixture +# def checked_samples_param_reports_std(): +# reports = { +# 'height': pd.DataFrame( +# index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'hexane-2,5-dione', 'phenol', 'butan-2-one', '2-methylcyclopent-2-en-1-one', 'tetradecanoic acid', '(e)-pent-2-enoic acid', '(e)-hexadec-9-enoic acid', 'oxacycloheptadecan-2-one', '4,5-dimethylhex-1-ene', 'decanoic acid'], name='height'), +# data={ +# 'A': [6330.019905181974, 22498.01645701238, 10546.497641397356, np.nan, np.nan, np.nan, np.nan, 352.8462838120872, np.nan, np.nan, 731.1484117468901, np.nan, 3011.567781073506], +# 'Ader': [65717.79713669654, 8212.338156700564, 15378.865384026221, np.nan, np.nan, np.nan, np.nan, 29.698484809834994, np.nan, 252.43712088359746, np.nan, np.nan, np.nan], +# 'B': [np.nan, np.nan, np.nan, 5193.699307815192, 7588.669975694028, 3647.2567773602123, 924.8956697920041, np.nan, 427.79960261786124, np.nan, np.nan, 5086.926183856023, np.nan], +# } +# ), +# 'area': pd.DataFrame( +# index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'hexane-2,5-dione', 'phenol', 'butan-2-one', '2-methylcyclopent-2-en-1-one', 'tetradecanoic acid', '(e)-pent-2-enoic acid', '(e)-hexadec-9-enoic acid', 'oxacycloheptadecan-2-one', '4,5-dimethylhex-1-ene', 'decanoic acid'], name='area'), +# data={ +# 'A': [10575.489019426004, 156570.4119174501, 60139.43173991587, np.nan, np.nan, np.nan, np.nan, 4427.195557008974, np.nan, np.nan, 4402.446819667445, np.nan, 7744.233467555068], +# 'Ader': [163869.16811285765, 8637.309332193678, 60571.47398322085, np.nan, np.nan, np.nan, np.nan, 2902.6733367707775, np.nan, 1827.8710293672254, np.nan, np.nan, np.nan], +# 'B': [np.nan, np.nan, np.nan, 22121.83564942114, 19659.68983732958, 23656.25736459595, 3317.745017327281, np.nan, 3242.0845917403203, np.nan, np.nan, 13804.138582323782, np.nan], +# } +# ), +# 'area_if_undiluted': pd.DataFrame( +# index=pd.Index(['(z)-octadec-9-enoic acid', 'hexadecanoic acid', '(9z,12z)-octadeca-9,12-dienoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'hexane-2,5-dione', 'oxacycloheptadecan-2-one', 'phenol', 'butan-2-one', 'decanoic acid', '2-methylcyclopent-2-en-1-one', '(e)-pent-2-enoic acid', '4,5-dimethylhex-1-ene'], name='area_if_undiluted'), +# data={ +# 'A': [264387.2254856501, 3914260.2979362523, 1503485.7934978968, 110679.88892522435, np.nan, np.nan, 110061.17049168612, np.nan, np.nan, 193605.8366888767, np.nan, np.nan, np.nan], +# 'Ader': [20483646.014107205, 1079663.6665242098, 7571434.247902606, 362834.1670963472, 228483.87867090316, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], +# 'B': [np.nan, np.nan, np.nan, np.nan, np.nan, 22121.83564942114, np.nan, 19659.68983732958, 23656.25736459595, np.nan, 3317.745017327281, 3242.0845917403203, 13804.138582323782], +# } +# ), +# 'conc_vial_mg_L': pd.DataFrame( +# index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', '(z)-octadec-9-enoic acid', 'hexadecanoic acid', 'tetradecanoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one', '(e)-hexadec-9-enoic acid'], name='conc_vial_mg_L'), +# data={ +# 'A': [4.106325693068217, 0.14764425894471855, 3.4914351462625968, 0.10555595583489899, 16.110917372532917, np.nan, np.nan, np.nan], +# 'Ader': [3.8503522496954825, 1.8415608200696434, 0.1709011262715786, 0.05743341165328154, np.nan, np.nan, np.nan, 0.02054160468737343], +# 'B': [np.nan, np.nan, np.nan, np.nan, np.nan, 0.5074982512365029, 0.07715745715389961, np.nan], +# } +# ), +# 'conc_vial_if_undiluted_mg_L': pd.DataFrame( +# index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='conc_vial_if_undiluted_mg_L'), +# data={ +# 'A': [102.65814232670576, 87.28587865656482, 3.691106473618185, 2.638898895872451, np.nan, 402.77293431332293, np.nan, np.nan], +# 'Ader': [481.29403121193536, 21.362640783947153, 230.19510250870547, 7.179176456660192, 2.5677005859216706, np.nan, np.nan, np.nan], +# 'B': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 0.5074982512365029, 0.07715745715389961], +# } +# ), +# 'fraction_of_sample_fr': pd.DataFrame( +# index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='fraction_of_sample_fr'), +# data={ +# 'A': [0.007332724451907525, 0.006234705618326057, 0.00026365046240129915, 0.00018849277827660252, np.nan, 0.028769495308094487, np.nan, np.nan], +# 'Ader': [0.03437814508656681, 0.001525902913139085, 0.01644250732205038, 0.0005127983183328709, 0.00018340718470868995, np.nan, np.nan, np.nan], +# 'B': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 0.00018124937544160814, 2.7556234697821363e-05], +# } +# ), +# 'fraction_of_feedstock_fr': pd.DataFrame( +# index=pd.Index(['(9z,12z)-octadeca-9,12-dienoic acid', 'hexadecanoic acid', '(z)-octadec-9-enoic acid', 'tetradecanoic acid', '(e)-hexadec-9-enoic acid', 'decanoic acid', 'phenol', '2-methylcyclopent-2-en-1-one'], name='fraction_of_feedstock_fr'), +# data={ +# 'A': [0.001716554591745799, 0.002033902314020852, 0.001555929426374292, 0.0003844681268991305, np.nan, 0.013233967841723464, np.nan, np.nan], +# 'Ader': [0.01848189900635057, 0.0010115438077193133, 0.009260471080609506, 0.00028408598968518184, 7.598984670517598e-05, np.nan, np.nan, np.nan], +# 'B': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 0.00010872467812800095, 1.9898609286992866e-06], +# } +# ) +# } +# return reports -@pytest.fixture -def checked_samples_param_aggrreps(): - reports = { - 'height': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'alcohol', 'ester'], name='height'), - data={ - 'A': [1651930.781078005, 322843.186774585, 0.0, 0.0, 0.0, 1419.36241499941], - 'Ader': [1048483.5942705647, 209497.12232512212, 0.0, 0.0, 0.0, 0.0], - 'B': [27582.83802734355, 5590.929749300839, 161073.19496333148, 57177.7326745298, 12611.491966847307, 0.0], - } - ), - 'area': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'alcohol', 'ester'], name='area'), - data={ - 'A': [8069070.542315948, 1565859.8667690419, 0.0, 0.0, 0.0, 4147.449062536849], - 'Ader': [3244281.634364161, 645827.8665250427, 0.0, 0.0, 0.0, 0.0], - 'B': [94251.19374854585, 17123.500993807433, 626471.4374662458, 176029.20455849537, 38826.144293911384, 0.0], - } - ), - 'area_if_undiluted': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'ester', 'alcohol'], name='area_if_undiluted'), - data={ - 'A': [201726763.55789867, 39146496.66922605, 0.0, 0.0, 103686.22656342122, 0.0], - 'Ader': [405535204.2955202, 80728483.31563035, 0.0, 0.0, 0.0, 0.0], - 'B': [94251.19374854585, 17123.500993807433, 626471.4374662458, 176029.20455849537, 0.0, 38826.144293911384], - } - ), - 'conc_vial_mg_L': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='conc_vial_mg_L'), - data={ - 'A': [283.134805039116, 57.52085007212886, 0.0, 0.0, 0.0], - 'Ader': [73.2484339653872, 14.545500371460587, 0.0, 0.0, 0.0], - 'B': [2.709346765182483, 0.0, 6.166256363605159, 3.674528613447748, 1.36006954031895], - } - ), - 'conc_vial_if_undiluted_mg_L': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='conc_vial_if_undiluted_mg_L'), - data={ - 'A': [7078.3701259779, 1438.0212518032215, 0.0, 0.0, 0.0], - 'Ader': [9156.054245673402, 1818.1875464325735, 0.0, 0.0, 0.0], - 'B': [2.709346765182483, 0.0, 6.166256363605159, 3.674528613447748, 1.36006954031895], - } - ), - 'fraction_of_sample_fr': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='fraction_of_sample_fr'), - data={ - 'A': [0.5055978661412784, 0.10271580370023008, 0.0, 0.0, 0.0], - 'Ader': [0.6540038746909574, 0.12987053903089812, 0.0, 0.0, 0.0], - 'B': [0.0009676238447080297, 0.0, 0.002202234415573271, 0.0013123316476599102, 0.00048573912154248214], - } - ), - 'fraction_of_feedstock_fr': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='fraction_of_feedstock_fr'), - data={ - 'A': [0.23008354467314104, 0.04675452005788948, 0.0, 0.0, 0.0], - 'Ader': [0.3107992314028268, 0.06171657872932608, 0.0, 0.0, 0.0], - 'B': [0.000478931154804311, 0.0, 0.0010906310654745584, 0.0006495463241603489, 0.00024055666913762634], - } - ) - } - return reports +# @pytest.fixture +# def checked_samples_param_aggrreps(): +# reports = { +# 'height': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'alcohol', 'ester'], name='height'), +# data={ +# 'A': [1651930.781078005, 322843.186774585, 0.0, 0.0, 0.0, 1419.36241499941], +# 'Ader': [1048483.5942705647, 209497.12232512212, 0.0, 0.0, 0.0, 0.0], +# 'B': [27582.83802734355, 5590.929749300839, 161073.19496333148, 57177.7326745298, 12611.491966847307, 0.0], +# } +# ), +# 'area': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'alcohol', 'ester'], name='area'), +# data={ +# 'A': [8069070.542315948, 1565859.8667690419, 0.0, 0.0, 0.0, 4147.449062536849], +# 'Ader': [3244281.634364161, 645827.8665250427, 0.0, 0.0, 0.0, 0.0], +# 'B': [94251.19374854585, 17123.500993807433, 626471.4374662458, 176029.20455849537, 38826.144293911384, 0.0], +# } +# ), +# 'area_if_undiluted': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'ester', 'alcohol'], name='area_if_undiluted'), +# data={ +# 'A': [201726763.55789867, 39146496.66922605, 0.0, 0.0, 103686.22656342122, 0.0], +# 'Ader': [405535204.2955202, 80728483.31563035, 0.0, 0.0, 0.0, 0.0], +# 'B': [94251.19374854585, 17123.500993807433, 626471.4374662458, 176029.20455849537, 0.0, 38826.144293911384], +# } +# ), +# 'conc_vial_mg_L': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='conc_vial_mg_L'), +# data={ +# 'A': [283.134805039116, 57.52085007212886, 0.0, 0.0, 0.0], +# 'Ader': [73.2484339653872, 14.545500371460587, 0.0, 0.0, 0.0], +# 'B': [2.709346765182483, 0.0, 6.166256363605159, 3.674528613447748, 1.36006954031895], +# } +# ), +# 'conc_vial_if_undiluted_mg_L': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='conc_vial_if_undiluted_mg_L'), +# data={ +# 'A': [7078.3701259779, 1438.0212518032215, 0.0, 0.0, 0.0], +# 'Ader': [9156.054245673402, 1818.1875464325735, 0.0, 0.0, 0.0], +# 'B': [2.709346765182483, 0.0, 6.166256363605159, 3.674528613447748, 1.36006954031895], +# } +# ), +# 'fraction_of_sample_fr': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='fraction_of_sample_fr'), +# data={ +# 'A': [0.5055978661412784, 0.10271580370023008, 0.0, 0.0, 0.0], +# 'Ader': [0.6540038746909574, 0.12987053903089812, 0.0, 0.0, 0.0], +# 'B': [0.0009676238447080297, 0.0, 0.002202234415573271, 0.0013123316476599102, 0.00048573912154248214], +# } +# ), +# 'fraction_of_feedstock_fr': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='fraction_of_feedstock_fr'), +# data={ +# 'A': [0.23008354467314104, 0.04675452005788948, 0.0, 0.0, 0.0], +# 'Ader': [0.3107992314028268, 0.06171657872932608, 0.0, 0.0, 0.0], +# 'B': [0.000478931154804311, 0.0, 0.0010906310654745584, 0.0006495463241603489, 0.00024055666913762634], +# } +# ) +# } +# return reports -@pytest.fixture -def checked_samples_param_aggrreps_std(): - reports = { - 'height': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'alcohol', 'ester'], name='height'), - data={ - 'A': [35797.371056783835, 7508.212703385821, 0.0, 0.0, 0.0, 166.78954924783815], - 'Ader': [75153.30566953593, 14433.563492713163, 0.0, 0.0, 0.0, 0.0], - 'B': [6487.963541864086, 192.35172504043408, 8629.636927224863, 6217.532537943509, 1371.3793462610597, 0.0], - } - ), - 'area': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'alcohol', 'ester'], name='area'), - data={ - 'A': [201147.21731284214, 41724.311288199315, 0.0, 0.0, 0.0, 1004.2860093008128], - 'Ader': [199552.22488919983, 38249.3835185481, 0.0, 0.0, 0.0, 0.0], - 'B': [21973.821276880837, 1457.7399327444466, 42815.265175930195, 16107.534210999198, 3552.7823298636085, 0.0], - } - ), - 'area_if_undiluted': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'ester', 'alcohol'], name='area_if_undiluted'), - data={ - 'A': [5028680.432821053, 1043107.7822049828, 0.0, 0.0, 25107.15023252032, 0.0], - 'Ader': [24944028.111149978, 4781172.939818514, 0.0, 0.0, 0.0, 0.0], - 'B': [21973.821276880837, 1457.7399327444466, 42815.26517593019, 16107.534210999198, 0.0, 3552.7823298636085], - } - ), - 'conc_vial_mg_L': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='conc_vial_mg_L'), - data={ - 'A': [18.436674076139045, 5.526836289719974, 0.0, 0.0, 0.0], - 'Ader': [4.984729502757059, 0.9565736502096863, 0.0, 0.0, 0.0], - 'B': [0.03377575111300582, 0.0, 0.41580236064012105, 0.04580807650772246, 0.09171206841758799], - } - ), - 'conc_vial_if_undiluted_mg_L': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='conc_vial_if_undiluted_mg_L'), - data={ - 'A': [460.9168519034765, 138.1709072429994, 0.0, 0.0, 0.0], - 'Ader': [623.0911878446323, 119.57170627621078, 0.0, 0.0, 0.0], - 'B': [0.03377575111300582, 0.0, 0.41580236064012105, 0.04580807650772246, 0.09171206841758799], - } - ), - 'fraction_of_sample_fr': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='fraction_of_sample_fr'), - data={ - 'A': [0.03292263227881972, 0.009869350517357094, 0.0, 0.0, 0.0], - 'Ader': [0.04450651341747372, 0.00854083616258648, 0.0, 0.0, 0.0], - 'B': [1.2062768254644968e-05, 0.0, 0.0001485008430857575, 1.6360027324186634e-05, 3.275431014913856e-05], - } - ), - 'fraction_of_feedstock_fr': pd.DataFrame( - index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='fraction_of_feedstock_fr'), - data={ - 'A': [0.014510828146666238, 0.004414840265872383, 0.0, 0.0, 0.0], - 'Ader': [0.024426518981430268, 0.004689897339536736, 0.0, 0.0, 0.0], - 'B': [8.710635362591769e-07, 0.0, 8.908006621759261e-05, 1.1813725467879507e-06, 1.9648077791126472e-05], - } - ) - } - return reports +# @pytest.fixture +# def checked_samples_param_aggrreps_std(): +# reports = { +# 'height': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'alcohol', 'ester'], name='height'), +# data={ +# 'A': [35797.371056783835, 7508.212703385821, 0.0, 0.0, 0.0, 166.78954924783815], +# 'Ader': [75153.30566953593, 14433.563492713163, 0.0, 0.0, 0.0, 0.0], +# 'B': [6487.963541864086, 192.35172504043408, 8629.636927224863, 6217.532537943509, 1371.3793462610597, 0.0], +# } +# ), +# 'area': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'alcohol', 'ester'], name='area'), +# data={ +# 'A': [201147.21731284214, 41724.311288199315, 0.0, 0.0, 0.0, 1004.2860093008128], +# 'Ader': [199552.22488919983, 38249.3835185481, 0.0, 0.0, 0.0, 0.0], +# 'B': [21973.821276880837, 1457.7399327444466, 42815.265175930195, 16107.534210999198, 3552.7823298636085, 0.0], +# } +# ), +# 'area_if_undiluted': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'ketone', 'C-arom', 'ester', 'alcohol'], name='area_if_undiluted'), +# data={ +# 'A': [5028680.432821053, 1043107.7822049828, 0.0, 0.0, 25107.15023252032, 0.0], +# 'Ader': [24944028.111149978, 4781172.939818514, 0.0, 0.0, 0.0, 0.0], +# 'B': [21973.821276880837, 1457.7399327444466, 42815.26517593019, 16107.534210999198, 0.0, 3552.7823298636085], +# } +# ), +# 'conc_vial_mg_L': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='conc_vial_mg_L'), +# data={ +# 'A': [18.436674076139045, 5.526836289719974, 0.0, 0.0, 0.0], +# 'Ader': [4.984729502757059, 0.9565736502096863, 0.0, 0.0, 0.0], +# 'B': [0.03377575111300582, 0.0, 0.41580236064012105, 0.04580807650772246, 0.09171206841758799], +# } +# ), +# 'conc_vial_if_undiluted_mg_L': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='conc_vial_if_undiluted_mg_L'), +# data={ +# 'A': [460.9168519034765, 138.1709072429994, 0.0, 0.0, 0.0], +# 'Ader': [623.0911878446323, 119.57170627621078, 0.0, 0.0, 0.0], +# 'B': [0.03377575111300582, 0.0, 0.41580236064012105, 0.04580807650772246, 0.09171206841758799], +# } +# ), +# 'fraction_of_sample_fr': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='fraction_of_sample_fr'), +# data={ +# 'A': [0.03292263227881972, 0.009869350517357094, 0.0, 0.0, 0.0], +# 'Ader': [0.04450651341747372, 0.00854083616258648, 0.0, 0.0, 0.0], +# 'B': [1.2062768254644968e-05, 0.0, 0.0001485008430857575, 1.6360027324186634e-05, 3.275431014913856e-05], +# } +# ), +# 'fraction_of_feedstock_fr': pd.DataFrame( +# index=pd.Index(['C-aliph', 'carboxyl', 'C-arom', 'ketone', 'alcohol'], name='fraction_of_feedstock_fr'), +# data={ +# 'A': [0.014510828146666238, 0.004414840265872383, 0.0, 0.0, 0.0], +# 'Ader': [0.024426518981430268, 0.004689897339536736, 0.0, 0.0, 0.0], +# 'B': [8.710635362591769e-07, 0.0, 8.908006621759261e-05, 1.1813725467879507e-06, 1.9648077791126472e-05], +# } +# ) +# } +# return reports # fmt: on diff --git a/tests/data_minimal_case/compounds_properties.xlsx b/tests/data_minimal_case/compounds_properties.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..030484487f7061649b49d338f5a4e0dfe3ba7fc4 GIT binary patch literal 5495 zcmZ`-1yq!4*BwfRW(YwVNu|46Is~a91O!GvhLE94z@emvl5S9m0coTgk(QQ_5D<`b z2#J5Zu5aCY`M+;wz3;nbt=Z>2&t7Mr=R8|i6APOH00001T7~{#71IvF5cIDq^hJuk zY+Y=0-CSH@d{10mdA*&W+OgUMo&2N^t~It6oDKJx1eiV-ED!jpiHXMBDtU%p`0IDX z%}X!xM>kP^1b4G|iG+2R1cC-s0d)i_T}l&g@2IlTIDi@Chkc^RXYIOKD{1?%tSGgF zhBGeccWNa!=$b`V?roLUhrZ5qPkko9d7A2Y6dO9-%vk4g_5`Ob@f=6@U-=S`aT$F_ z=Q)T00D%6PuZ@cvxXmKcb_C2D*XJeKHjLKmow&@qkNTmdu&(MAlS&RfvH} z%>#i8M;u92sy)&~@Z(iiQqL;Coy;d>tjfSK#qhKr(+j!Fx7*1nUDo!{W2D&H<@3y#4#Orl`-n;g!;zVTbk>%Jl$5C^KW9(%rjX?ZvgM(|a zz%fuC+XVQFHdRs5!4#rhgmzPgG&c9vuVH1?jZJZ%qaVE1n_&@2hu&7ReDhNu>iHIz zjRjdj3jB9Uko)cUMOqfMrVfBE@qhtxvaT;7Q9w;^ioq%KWiuK+mZ|SG^9x3B2WTF( zs;yOk1-HHYsDYlTizQRdI-}4j<)A?fqyU55Sb@DUqUP`n+H*0tC<__clB5?5cIKxA zGrbsgDOuAojreNN+1>ukPR_`&Z){vGC}bcY38zUNRWLyb6eN<8%mNR4`lGscbD}@a zxeGhdEUzATi`NZ~N4e5rFYA5C?#w!dwiPPi)f704+RmWnDiVs{#ppkrI0;zV&altK zk1=~V;pnw65gXZCKI%=>dhjs3t0Jb5T#W$n8B1QP)4 z>N)jk94p1*-nmB-%`E+g1qJhc@DGVd&{b~__LRf;ofKgex2$*^Vt`opn3k?TQlakE zawlTkJbr5AUac+-Fugzcr)mgu&llw7&(seCg+jhr0g}UxNiQ5qF-mqnWQ-(Uz&6)% zN)o5F6^C7$x$zEuv6wvx=Wv!Hcax;EQ}iD}p%RMN1V{GH@V9lO?HG+xDj$!GtYt^L zv3yuSfdfzvRh?O;-?mrZola6=RnF<>X<7q`*Qyfe5~gh32_e9EG6#j zLz_Z09V$p#k8t~OY2PcHvrG|ux&&H!Ogq|Y6=t}@hkXVlYixRXCx}Z?*7}Ypja%*Q z;JW8;;Z~+(y<^@w<29fQlQNM+S*!k?p#+cNsRUvW!a@xF-{GiuP#Z&-xJLSJ{&O() zWwu3^ddFtS;M2>7z_3XnD0l3M>SrpAH4^p`xh+9q+=6ryycW38vz&L~!m@7|1o3eW z8X08QeZBF;QhJ+pD?E_23ISvHe5t~+`r$A zw`ps}hBs(10TFvemaRzZyrutBBYAoq*~r--ZML?B%yXCJ9yyb?+^88M5K^jl)BIkS z)LP3fu5n!T$yJA8kWk7_8bM#@l}j)33-#D@jircRKUJxLMzIyksVVD=q{ zNAWIQXInkqG%FIb*o^_Njh!}T#>Reabm&zU>`AB4_eC7$Gylj7dp**n6z=0^`PweI zS67(A30lU=tx|8*Fnz#Hv21I_lZ*QjsfT&^$k0l_tQU(bKHpy}T6xoTBh}n;bqujO zUks^rM&9$f<8=3tM5n`Ra=X%xi;$`gL;4Xzv%|{PXEHUEIwMym<^4H?Af4L;WNJ1^ zRK*(kM|bpFH@MWh4aM7(h3d#sshd~7^*z_oZy@umBf%7G{2=kx7b0L+e7M9zh{tUf zVM4Z|{djkBBO|lI_pax8s#k~A;ic<4ILL6SN!>Hr}HgE|-;fq0Id{gmx zzRp=Z9M|&}pJi&OWK@m{(v}&j!(kb|4V}tm*Wf9JGr=VZ*LlfBEQ@dC+)9Ok9RaUi z1%XPfTbqy%&)71hYWxm@nKCY><_l2^%Y^=)OHGSxxj=H7xc-cYTQ<>V$HJCz6xQX% zRpq^N0)#$6;3LAkr{YWD%IP#hlTl?7G59VDlk)3w^Ph?AQAEOY2{;x4X?3CnYdZMS zU#|9uDm%MlzwSo0Vf=do2^q=KPsaiP6o~-<>OUKZmy4St%pL-9hw=UO_-hx*N;QM6 zh*I>m;dkl((k;tyk!sgkg(~u5ck0Zms$c}maG6!`bfV^sMNOt2K{`^>um?6SVuus1 zB_n%P^#aQEd)JoDhNFB1wTigqd@NejO?0*sI8E}nrS#Sf-Z|8se1C84KCdWoLJ9~@ zohYN$>D#S_h|ov9Tr=rNnaq`4lPJGxd}2<=p(K8BWtF_EKP$Umyo&fSOT~W8Nts)` zC^0NLXrgDAPi+7dyK_Y>SFWa6!|>wi4%n;rW$|k<=2p?f@LgMAdM33U?JPX^`*s#v z^btGaEBoy5MWMGQ zTUIRBb2W*J2j>CJzOVc_I2N*%92F%w+FUV*4CSHC20K~u}w%Hi!yG;&<+u^ zEgV-xW(wRVp}q}jJlvmGmG3W^B>Feqs#!ntC$iV{_;JhYm&YOPx=EVpOseCI z#@cQ7=b$2gmPZ{tJBN4Ycbr|Okj5*hZsY0EsV_2<(!=!FSyDi^UM~Bzk4bJuU_aow zC3rlXjr7x9v+7M;c6C8@-SI#+dxX4MiTKwc#*~wL55|CDHhV4ez$RjExW{%&7}kRL zHVw-HYP9ywmx8CXJ>u|qxPq1%X>07QWkKxY3Eh zOJ8Tqvk81d5(a$Giu^7j>I;c0z@y3ujmqle(KiA=+#|X&xx2-Ju)00tJLkmbirD7+ zj(hctMQ(MNxZ4&xkJ+GXx7Y<(>qtbHB?%=lHBc%6Ct6USJ_$05>EstZzJe2_kx`ri z6>_@!aTC5?micv6CaX|{1ri3iWClriZcgdVb?VKHgN_Uz@`eb6<(=<_e|N>S-JE*E zipk8{9tR)xrk%U=&jk*Q2&pqB$_pz6YY(+N*sz%p+;R>8s?3Q0 z@Om6>W!s$PMKA1DO&dlXzSnvc!kC%vTX#!b@J)qA5@C_1i@Jol_ej$H*;GKGF!RbVBLi~0V7g#EVCv~@ysD= zSueX2Uuav`{DDib2d*RLDD%#5JU>%Zm%ep5?&zsPlOY}vX1vu|8kSo51e8kstfmMqI^|fs%?=ZKvK>B(PZG1cSNnG`5=Ux5QIp2sn4w!`#a-p)QGPe%& zXg8u2--4hn!UPGK_Qvi@{r5fSg{|Lvlqz@CH*KyP1$dIsKeI^s6Tb%NHz#lMO)51zRFmzkfuF>z@uv{uW4ddNmA-pmZ!r#@q+~0 zm_EJC{@EfOm|~5jB$;iSUwt~H(_AQI)Ep$1IY7@T9=#(}ud}0zrHzjxSyQF4fqCZk zc=D=Ddtcsgi$wa(;oUJ`ig7%8<1Blc3d?%Hj+9Z?!;WOEnaTZ>m~ok`j{8|_d(EJN z`y(G1DsMlZW_{76XHGHUaLZ0n?4mux$$Tzgw+9>Bl@`h0FE-lJ28^)9i4P)W4M=zr zU)IhNa2e-QzS)earqbX`YdTg&IfUOVmRH`UZi!tAk5X&oK5FLdKXk5l39n*v1XG+c z6Cyx&!;eE3uj&qku=s^UU%V!IKUi&}R4gr&Z~IXwSvBGfv)e}mHDH{|t-m;)JXC?S z)%7#bDBC7y061VHiTo55GAwFd>|CWDtQ)sIwnN)`=yBmGH!hi8X2C ztFrHr?4ZtGBlP@b%NfzXE4p5F`&B!-l5eA{8eP-?OE(A<#>e~no`ukK#uOlwN0LD- zwcd7P3Y5)MDn$@9L~azMbtl+#AERPs+L`P@n%CLcJV+AZTu(aKd=a#lzuSD)NTeL_ z-tnGQ_Vax3eGyH0o;>0B(VsJ<+~1Da`gyvXr|Pv}7wX6O3ENGu8i*G@#ZQdWimr4P z4JFgr=Rrc6*F#@M@IR+#AzV zcrR^UAC11DkU4A1;xCFEpH{|Wcd0zPamYn^V7%i;A8_xX7fSL+y1%z#YZn*C-{Osl zAJah}ag|>beZ(Dg4pGoVxRNtWBy|Vb(Qs~T6)#C=Gmxm_rjzx0? zqlkI3^LP#O1eeCQ5~xSk9%N|u*lN9H@3JD`245zKjeYyDcvcv(*wXW;9_hsg4<^VLm%aUd47t9QLTPWVQ|U ze$$moY$gqjyRIB=eB8_UclaDJYzf8FVu|z1tzzGJ?fHI0x?kEro4+64=!-(zt4-x- zQh%WdrujqepF;X4#eWr5ZGsH(OMb#oB-uKE`pxG&%wpa$N~(yEGf?n@s?yF~+75sk zeG|XQNJ`s@tsS56D?cf{X(_TJd-jizz8wZYp%fjZ^3+!f_L!dqD@*|dI$H|0KnJ>| z4#qq0`(w5Ac!;l==Zd#PWXl8N#1BaL{mU5NDB0e-zizf>-U&sCc9bV4UEE#b=HF}N zRuLs@7WAS{U>9tq^)}d_B<*C3e3U2{L**xf62W1>Y!(kS9kT1VatqnH)!2=ngcPYSY{YVb}R9zxaWoOtJJx0S8xw^9alSJb{nX}QYzoYko zxvOG!WOW4dG1pgvQ{SMZ=rAZ3d5c<~Lw+^pmOaMcAdOuUp@$R2W-cmLQzII$u=iF_ zCXeb^52f%y{F95tA!n=jMzyuiPWYhNTI?4aSW;8w|d6#>BQ^=yk-iJgHUuBST=)o8RlLGL+O)@m0e|>_` z4F7-2>?ZoAW%UOO00d!F{uBKl1M4RIrm^!kd;tCW|6=pp3~+O>|4)FrV0_?T0siZl ze>2L>apFHwf-vULy%?Rw-^0aC=*_f v{|!Dw8=YwI|4{wSI5(yAcbsXo0r_8qrK^d9_Tc~kJoHx<-N9LY3lQ*s+yJXU literal 0 HcmV?d00001 diff --git a/tests/test_minimal_case.py b/tests/test_minimal_case.py index c5a368e..a2f91ff 100644 --- a/tests/test_minimal_case.py +++ b/tests/test_minimal_case.py @@ -53,7 +53,8 @@ def test_load_class_code_fractions(gcms, checked_load_class_code_fractions): def test_load_calibrations( - gcms, checked_load_calibrations, checked_is_calibrations_deriv + gcms, + checked_load_calibrations, ): files_info = gcms.create_files_info() calib_to_check, is_calib_deriv_to_check = gcms.load_calibrations() @@ -67,4 +68,183 @@ def test_load_calibrations( to_check, checked, check_exact=False, atol=1e-5, rtol=1e-5 ) - assert is_calib_deriv_to_check == checked_is_calibrations_deriv + assert is_calib_deriv_to_check == False + + +def test_list_of_all_compounds(gcms, checked_list_of_all_compounds): + to_check = gcms.create_list_of_all_compounds() + assert to_check.sort() == checked_list_of_all_compounds.sort() + + +# def test_list_of_all_deriv_compounds(gcms, checked_list_of_all_deriv_compounds): +# to_check = gcms.create_list_of_all_deriv_compounds() +# assert to_check.sort() == checked_list_of_all_deriv_compounds.sort() + + +@pytest.mark.slow +def test_create_compounds_properties(gcms, checked_compounds_properties): + to_check = gcms.create_compounds_properties() + assert_frame_equal( + to_check.sort_index(), + checked_compounds_properties.sort_index(), + check_exact=False, + check_dtype=False, + atol=1e-5, + rtol=1e-5, + ) + + +# @pytest.mark.slow +# def test_create_deriv_compounds_properties(gcms, checked_deriv_compounds_properties): +# to_check = gcms.create_deriv_compounds_properties() +# assert_frame_equal( +# to_check.sort_index(), +# checked_deriv_compounds_properties.sort_index(), +# check_exact=False, +# atol=1e-3, +# rtol=1e-3, +# ) + + +def test_load_compounds_properties(gcms, checked_compounds_properties): + to_check = gcms.load_compounds_properties() + assert_frame_equal( + to_check.sort_index(), + checked_compounds_properties.sort_index(), + check_exact=False, + atol=1e-3, + rtol=1e-3, + ) + + +# def test_load_deriv_compounds_properties(gcms, checked_deriv_compounds_properties): +# to_check = gcms.load_deriv_compounds_properties() +# assert_frame_equal( +# to_check.sort_index(), +# checked_deriv_compounds_properties.sort_index(), +# check_exact=False, +# atol=1e-3, +# rtol=1e-3, +# ) + + +def test_create_samples_info(gcms, checked_samples_info, checked_samples_info_std): + to_check, to_check_std = gcms.create_samples_info() + assert_frame_equal( + to_check, checked_samples_info, check_exact=False, atol=1e-5, rtol=1e-5 + ) + assert_frame_equal( + to_check_std, checked_samples_info_std, check_exact=False, atol=1e-5, rtol=1e-5 + ) + + +@pytest.mark.parametrize( + "parameter", + [ + "height", + "area", + "area_if_undiluted", + "conc_vial_mg_L", + "conc_vial_if_undiluted_mg_L", + "fraction_of_sample_fr", + "fraction_of_feedstock_fr", + ], +) +def test_files_param_reports(gcms, checked_files_param_reports, parameter): + to_check = gcms.create_files_param_report(param=parameter) + checked_report = checked_files_param_reports[parameter] + assert_frame_equal( + to_check, checked_report, check_exact=False, atol=1e-5, rtol=1e-5 + ) + + +def test_files_param_reports_exception(gcms): + with pytest.raises(ValueError): + gcms.create_files_param_report(param="wrong_parameter") + + +@pytest.mark.parametrize( + "parameter", + [ + "height", + "area", + "area_if_undiluted", + "conc_vial_mg_L", + "conc_vial_if_undiluted_mg_L", + "fraction_of_sample_fr", + "fraction_of_feedstock_fr", + ], +) +def test_files_param_aggrreps(gcms, checked_files_param_aggrreps, parameter): + to_check = gcms.create_files_param_aggrrep(param=parameter) + checked_report = checked_files_param_aggrreps[parameter] + assert_frame_equal( + to_check, checked_report, check_exact=False, atol=1e-5, rtol=1e-5 + ) + + +def test_files_param_aggreps_exception(gcms): + with pytest.raises(ValueError): + gcms.create_files_param_aggrrep(param="wrong_parameter") + + +@pytest.mark.parametrize( + "parameter", + [ + "height", + "area", + "area_if_undiluted", + "conc_vial_mg_L", + "conc_vial_if_undiluted_mg_L", + "fraction_of_sample_fr", + "fraction_of_feedstock_fr", + ], +) +def test_samples_param_reports( + gcms, checked_samples_param_reports, checked_samples_param_reports_std, parameter +): + to_check, to_check_std = gcms.create_samples_param_report(param=parameter) + checked_report = checked_samples_param_reports[parameter] + checked_report_std = checked_samples_param_reports_std[parameter] + assert_frame_equal( + to_check, checked_report, check_exact=False, atol=1e-5, rtol=1e-5 + ) + assert_frame_equal( + to_check_std, checked_report_std, check_exact=False, atol=1e-5, rtol=1e-5 + ) + + +def test_samples_param_reports_exception(gcms): + with pytest.raises(ValueError): + gcms.create_samples_param_report(param="wrong_parameter") + + +@pytest.mark.parametrize( + "parameter", + [ + "height", + "area", + "area_if_undiluted", + "conc_vial_mg_L", + "conc_vial_if_undiluted_mg_L", + "fraction_of_sample_fr", + "fraction_of_feedstock_fr", + ], +) +def test_samples_param_aggrreps( + gcms, checked_samples_param_aggrreps, checked_samples_param_aggrreps_std, parameter +): + to_check, to_check_std = gcms.create_samples_param_aggrrep(param=parameter) + checked_report = checked_samples_param_aggrreps[parameter] + checked_report_std = checked_samples_param_aggrreps_std[parameter] + assert_frame_equal( + to_check, checked_report, check_exact=False, atol=1e-5, rtol=1e-5 + ) + assert_frame_equal( + to_check_std, checked_report_std, check_exact=False, atol=1e-5, rtol=1e-5 + ) + + +def test_samples_param_aggrreps_exception(gcms): + with pytest.raises(ValueError): + gcms.create_samples_param_aggrrep(param="wrong_parameter") diff --git a/tests/test_name_to_properties.py b/tests/test_name_to_properties.py index d61445c..44c654f 100644 --- a/tests/test_name_to_properties.py +++ b/tests/test_name_to_properties.py @@ -21,7 +21,9 @@ def test_name_to_properties_wrong_input_df_empty( @pytest.mark.parametrize("compound_name", [" ", None, False, np.nan]) def test_name_to_properties_wrong_input_df_not_empty( - compound_name, dicts_classifications_codes_fractions, checked_compounds_properties + compound_name, + dicts_classifications_codes_fractions, + checked_n2p_compounds_properties, ): dict_class_to_code, dict_class_to_mass_fraction = ( dicts_classifications_codes_fractions @@ -30,11 +32,11 @@ def test_name_to_properties_wrong_input_df_not_empty( compound_name, dict_class_to_code, dict_class_to_mass_fraction, - checked_compounds_properties, + checked_n2p_compounds_properties, ) assert_frame_equal( to_check, - checked_compounds_properties, + checked_n2p_compounds_properties, check_exact=False, atol=1e-3, rtol=1e-3, @@ -63,7 +65,7 @@ def test_name_to_properties_name_not_on_pubchem_df_empty( def test_name_to_properties_name_not_on_pubchem_df_not_empty( dicts_classifications_codes_fractions, - checked_compounds_properties, + checked_n2p_compounds_properties, ): dict_class_to_code, dict_class_to_mass_fraction = ( dicts_classifications_codes_fractions @@ -72,12 +74,14 @@ def test_name_to_properties_name_not_on_pubchem_df_not_empty( "name_not_on_pcp", dict_class_to_code, dict_class_to_mass_fraction, - checked_compounds_properties, + checked_n2p_compounds_properties, + ) + checked_n2p_compounds_properties.loc["name_not_on_pcp", "iupac_name"] = ( + "unidentified" ) - checked_compounds_properties.loc["name_not_on_pcp", "iupac_name"] = "unidentified" assert_frame_equal( to_check, - checked_compounds_properties, + checked_n2p_compounds_properties, check_exact=False, atol=1e-5, rtol=1e-5, @@ -100,7 +104,7 @@ def test_name_to_properties_name_not_on_pubchem_df_not_empty( ], ) def test_name_to_properties_single_compounds( - compound, dicts_classifications_codes_fractions, checked_compounds_properties + compound, dicts_classifications_codes_fractions, checked_n2p_compounds_properties ): dict_class_to_code, dict_class_to_mass_fraction = ( dicts_classifications_codes_fractions @@ -111,7 +115,7 @@ def test_name_to_properties_single_compounds( ) to_check = to_check.loc[[compound], :] to_check = to_check.loc[:, (to_check != 0).any(axis=0)] - checked = checked_compounds_properties.loc[[compound], :] + checked = checked_n2p_compounds_properties.loc[[compound], :] checked = checked.loc[:, (checked != 0).any(axis=0)] assert_frame_equal( to_check, @@ -123,7 +127,7 @@ def test_name_to_properties_single_compounds( def test_name_to_properties_all_compounds( - dicts_classifications_codes_fractions, checked_compounds_properties + dicts_classifications_codes_fractions, checked_n2p_compounds_properties ): dict_class_to_code, dict_class_to_mass_fraction = ( dicts_classifications_codes_fractions @@ -151,7 +155,7 @@ def test_name_to_properties_all_compounds( to_check = name_to_properties( compound, dict_class_to_code, dict_class_to_mass_fraction, to_check ) - checked = checked_compounds_properties + checked = checked_n2p_compounds_properties assert_frame_equal( to_check, checked, From f14c6f7d3aae05db08e776a2dcc01e5d058be48b Mon Sep 17 00:00:00 2001 From: mpecchi Date: Fri, 29 Mar 2024 10:19:25 -0400 Subject: [PATCH 7/7] fixed RCS example based on new version --- RCSdata/RCS_gcms_data_analysis.py | 302 ++++++++++++++++++++---------- 1 file changed, 199 insertions(+), 103 deletions(-) diff --git a/RCSdata/RCS_gcms_data_analysis.py b/RCSdata/RCS_gcms_data_analysis.py index 8425655..76333a4 100644 --- a/RCSdata/RCS_gcms_data_analysis.py +++ b/RCSdata/RCS_gcms_data_analysis.py @@ -5,6 +5,7 @@ @author Matteo Pecchi (mp933@cornell.edu). """ + # ============================================================================= # # necessary packages, install them using conda (not pip) # ============================================================================= @@ -17,35 +18,41 @@ from rdkit import Chem from rdkit.Chem import DataStructs from rdkit.Chem.AllChem import GetMorganFingerprintAsBitVect -from gcms_data_analysis import Project, figure_create, figure_save +from gcms_data_analysis import Project +from gcms_data_analysis.plotting import plot_ave_std, MyFigure + +folder_path = plib.Path(plib.Path(__file__).parent, "data") + -folder_path = plib.Path(plib.Path(__file__).parent, 'data') -#%% +# %% def get_calibration_error(name0, name1, calibration, xrange=[10, 200], steps=100): - cols_cal_area = [c for c in list(calibration) if 'Area' in c] - cols_cal_ppms = [c for c in list(calibration) if 'PPM' in c] - calibration[cols_cal_area + cols_cal_ppms] = \ - calibration[cols_cal_area + cols_cal_ppms].apply(pd.to_numeric, - errors='coerce') + cols_cal_area = [c for c in list(calibration) if "Area" in c] + cols_cal_ppms = [c for c in list(calibration) if "PPM" in c] + calibration[cols_cal_area + cols_cal_ppms] = calibration[ + cols_cal_area + cols_cal_ppms + ].apply(pd.to_numeric, errors="coerce") cal_areas0 = calibration.loc[name0, cols_cal_area].to_numpy(dtype=float) cal_ppms0 = calibration.loc[name0, cols_cal_ppms].to_numpy(dtype=float) # linear fit of calibration curve (exclude nan), get ppm from area - fit0 = np.polyfit(cal_areas0[~np.isnan(cal_areas0)], - cal_ppms0[~np.isnan(cal_ppms0)], 1) + fit0 = np.polyfit( + cal_areas0[~np.isnan(cal_areas0)], cal_ppms0[~np.isnan(cal_ppms0)], 1 + ) cal_areas1 = calibration.loc[name1, cols_cal_area].to_numpy(dtype=float) cal_ppms1 = calibration.loc[name1, cols_cal_ppms].to_numpy(dtype=float) # linear fit of calibration curve (exclude nan), get ppm from area - fit1 = np.polyfit(cal_areas1[~np.isnan(cal_areas1)], - cal_ppms1[~np.isnan(cal_ppms1)], 1) + fit1 = np.polyfit( + cal_areas1[~np.isnan(cal_areas1)], cal_ppms1[~np.isnan(cal_ppms1)], 1 + ) x = np.arange(xrange[0], xrange[1], steps) line0 = np.poly1d(fit0)(x) line1 = np.poly1d(fit1)(x) - mse = np.mean((line0 - line1)**2) - mse = np.average(abs(line0-line1)/line1)*100 + mse = np.mean((line0 - line1) ** 2) + mse = np.average(abs(line0 - line1) / line1) * 100 return mse -#%% + +# %% Project.set_folder_path(folder_path) # Set the base folder path for the project's data files @@ -65,7 +72,7 @@ def get_calibration_error(name0, name1, calibration, xrange=[10, 200], steps=100 # Load calibration data for standard and derivatized samples, and determine if they are derivatized calibrations, is_calibr_deriv = gcms.load_calibrations() -c1, c2 = calibrations['calibration88'], calibrations['deriv_calibration11'] +c1, c2 = calibrations["calibration88"], calibrations["deriv_calibration11"] # Generate a comprehensive list of all compounds found across samples list_of_all_compounds = gcms.create_list_of_all_compounds() @@ -87,7 +94,7 @@ def get_calibration_error(name0, name1, calibration, xrange=[10, 200], steps=100 files, is_files_deriv = gcms.apply_calibration_to_files() # Extract specific files for detailed analysis or further operations -f11, f22, f33 = files['A_1'], files['Ader_1'], files['B_1'] +f11, f22, f33 = files["A_1"], files["Ader_1"], files["B_1"] # Add statistical information to the files_info DataFrame, such as mean, median, and standard deviation for each file files_info = gcms.add_stats_to_files_info() @@ -97,105 +104,194 @@ def get_calibration_error(name0, name1, calibration, xrange=[10, 200], steps=100 # Create samples and their standard deviations from the files, storing the results in dictionaries samples, samples_std = gcms.create_samples_from_files() -s1, s2, s3 = samples['A'], samples['Ader'], samples['B'] -sd1, sd2, sd3 = samples_std['A'], samples_std['Ader'], samples_std['B'] - -# Add statistical information to the samples_info DataFrame, enhancing the initial analysis with statistical data -samples_info = gcms.add_stats_to_samples_info() +s1, s2, s3 = samples["A"], samples["Ader"], samples["B"] +sd1, sd2, sd3 = samples_std["A"], samples_std["Ader"], samples_std["B"] # Generate reports for specific parameters (e.g., concentration, mass fraction) for files and samples -rep_files_conc = gcms.create_files_param_report(param='conc_vial_mg_L') -rep_files_fr = gcms.create_files_param_report(param='fraction_of_sample_fr') -rep_samples_conc, rep_samples_conc_std = gcms.create_samples_param_report(param='conc_vial_mg_L') -rep_samples_fr, rep_samples_fr_std = gcms.create_samples_param_report(param='fraction_of_sample_fr') +rep_files_conc = gcms.create_files_param_report(param="conc_vial_mg_L") +rep_files_fr = gcms.create_files_param_report(param="fraction_of_sample_fr") +rep_samples_conc, rep_samples_conc_std = gcms.create_samples_param_report( + param="conc_vial_mg_L" +) +rep_samples_fr, rep_samples_fr_std = gcms.create_samples_param_report( + param="fraction_of_sample_fr" +) # Generate aggregated reports based on functional groups for files and samples, for specific parameters -agg_files_conc = gcms.create_files_param_aggrrep(param='conc_vial_mg_L') -agg_files_fr = gcms.create_files_param_aggrrep(param='fraction_of_sample_fr') -agg_samples_conc, agg_samples_conc_std = gcms.create_samples_param_aggrrep(param='conc_vial_mg_L') -agg_samples_fr, agg_samples_fr_std = gcms.create_samples_param_aggrrep(param='fraction_of_sample_fr') +agg_files_conc = gcms.create_files_param_aggrrep(param="conc_vial_mg_L") +agg_files_fr = gcms.create_files_param_aggrrep(param="fraction_of_sample_fr") +agg_samples_conc, agg_samples_conc_std = gcms.create_samples_param_aggrrep( + param="conc_vial_mg_L" +) +agg_samples_fr, agg_samples_fr_std = gcms.create_samples_param_aggrrep( + param="fraction_of_sample_fr" +) # %% Plotting results based on the generated reports, allowing for visual comparison of average values and standard deviations # Plot results for individual files or samples based -gcms.plot_ave_std(param='fraction_of_sample_fr', min_y_thresh=0.05, files_or_samples='files', - legend_location='outside', xlab_rot=30, filename='sample_fraction_files', +mf = plot_ave_std( + gcms, + width=8, + height=4.5, + param="fraction_of_sample_fr", + min_y_thresh=0.05, + files_or_samples="files", + legend_location="best", + x_label_rotation=30, + filename="sample_fraction_files", # only_samples_to_plot=['A_1', 'A_2', 'Ader_1', 'B_2'], - y_lim=[0, .3], annotate_lttrs='a' - ) -gcms.plot_ave_std(param='fraction_of_sample_fr', min_y_thresh=0.05, files_or_samples='samples', - legend_location='outside', xlab_rot=0, filename='sample_fraction_samples', + y_lim=[0, 0.4], + annotate_lttrs="a", + annotate_lttrs_xy=(-0.08, -0.08), +) +# %% +mf = plot_ave_std( + gcms, + width=7, + height=4.5, + param="fraction_of_sample_fr", + min_y_thresh=0.05, + files_or_samples="samples", + legend_location="outside", + filename="sample_fraction_samples", # only_samples_to_plot=['A_1', 'A_2', 'Ader_1', 'B_2'], - y_lim=[0, .3], annotate_lttrs='b' - ) -#%% + y_lim=[0, 0.3], + annotate_lttrs="b", + annotate_lttrs_xy=(-0.2, -0.05), +) +# %% # plot results bases on aggreport -gcms.plot_ave_std(param='fraction_of_sample_fr', aggr=True, files_or_samples='files', - filename='sample_fraction_aggr_files', xlab_rot=30, annotate_lttrs='c', - min_y_thresh=0.01, #yt_sum=True, - y_lim=[0, 1], color_palette='Set2') -gcms.plot_ave_std(param='fraction_of_sample_fr', aggr=True, files_or_samples='samples', - filename='sample_fraction_aggr_samples', annotate_lttrs='d', - min_y_thresh=0.01, #yt_sum=True, - y_lim=[0, 1], color_palette='Set2') -#%% -gcms.plot_ave_std(param='fraction_of_sample_fr', min_y_thresh=0.01, - legend_location='outside', only_samples_to_plot=['A', 'Ader', 'B'], - y_lim=[0, 0.3] - ) +mf = plot_ave_std( + gcms, + width=7, + height=4.5, + param="fraction_of_sample_fr", + aggr=True, + files_or_samples="files", + filename="sample_fraction_aggr_files", + x_label_rotation=30, + annotate_lttrs="c", + min_y_thresh=0.01, # yt_sum=True, + y_lim=[0, 1], + color_palette="Set2", + annotate_lttrs_xy=(-0.08, -0.08), +) +# %% +mf = plot_ave_std( + gcms, + width=4.5, + height=4.5, + param="fraction_of_sample_fr", + aggr=True, + files_or_samples="samples", + filename="sample_fraction_aggr_samples", + annotate_lttrs="d", + min_y_thresh=0.01, # yt_sum=True, + y_lim=[0, 1], + color_palette="Set2", + annotate_lttrs_xy=(-0.15, -0.05), +) +# %% +mf = plot_ave_std( + gcms, + width=8, + height=4.5, + param="fraction_of_sample_fr", + min_y_thresh=0.01, + legend_location="outside", + only_samples_to_plot=["A", "Ader", "B"], + y_lim=[0, 0.3], +) # %% plot results bases on aggreport -gcms.plot_ave_std(param='fraction_of_sample_fr', aggr=True, min_y_thresh=0.01, - y_lim=[0, .5], color_palette='Set2') +mf = plot_ave_std( + gcms, + width=4.5, + height=4.5, + param="fraction_of_sample_fr", + aggr=True, + min_y_thresh=0.01, + y_lim=[0, 0.5], + color_palette="Set2", +) -#%% +# %% run_tanimoto_analysis = True if run_tanimoto_analysis: in_path = folder_path - out_path_cal = plib.Path(folder_path, 'output_tanimoto') + out_path_cal = plib.Path(folder_path, "output_tanimoto") out_path_cal.mkdir(parents=True, exist_ok=True) - calibration = pd.read_excel(plib.Path(in_path, 'calibration88.xlsx'), - engine='openpyxl', index_col='Name') + calibration = pd.read_excel( + plib.Path(in_path, "calibration88.xlsx"), engine="openpyxl", index_col="Name" + ) combs = combinations(calibration.index.tolist(), 2) - tanimoto_error = pd.DataFrame(columns=['CalErr', 'DistMW', 'TanimS'], index=range(3915)) + tanimoto_error = pd.DataFrame( + columns=["CalErr", "DistMW", "TanimS"], index=range(3915) + ) for c, (name0, name1) in enumerate(combs): - tanimoto_error.loc[c, 'CalErr'] = get_calibration_error(name0, name1, calibration) - tanimoto_error.loc[c, 'DistMW'] = abs(calibration.loc[name0,'MW'] - calibration.loc[name1,'MW']) + tanimoto_error.loc[c, "CalErr"] = get_calibration_error( + name0, name1, calibration + ) + tanimoto_error.loc[c, "DistMW"] = abs( + calibration.loc[name0, "MW"] - calibration.loc[name1, "MW"] + ) try: - smis = [calibration.loc[name0, 'canonical_smiles'], - calibration.loc[name1, 'canonical_smiles']] + smis = [ + calibration.loc[name0, "canonical_smiles"], + calibration.loc[name1, "canonical_smiles"], + ] mols = [Chem.MolFromSmiles(smi) for smi in smis] fps = [GetMorganFingerprintAsBitVect(mol, 2, nBits=1024) for mol in mols] # perform Tanimoto similarity - tanimoto_error.loc[c, 'TanimS'] = DataStructs.TanimotoSimilarity(fps[0], fps[1]) + tanimoto_error.loc[c, "TanimS"] = DataStructs.TanimotoSimilarity( + fps[0], fps[1] + ) except TypeError: - tanimoto_error.loc[c, 'TanimS'] = np.nan - tanimoto_error.to_excel(plib.Path(out_path_cal, 'tanimoto_error.xlsx')) - fig, ax, axt, fig_par = figure_create(rows=1, cols=1, plot_type=0, hgt_mltp=1.2, - paper_col=1.4) - - aa = ax[0].scatter(tanimoto_error['TanimS'].values, tanimoto_error['CalErr'].values, - c=tanimoto_error['DistMW'].values) - ax[0].set_yscale('log') - plt.colorbar(aa, label=r'$\Delta$MW [atomic mass unit]') - plt.hlines(y=100, xmin=0, xmax=1, color='grey', linestyle='dotted') - plt.vlines(x=.4, ymin=0, ymax=100, color='grey', linestyle='dashed') - ax[0].annotate('default\nsetting', ha='left', va='bottom', - xycoords='axes fraction', - xy=(0.3, .01)) - ax[0].annotate('Error = 100%', ha='left', va='bottom', - xycoords='axes fraction', - xy=(0.8, .6)) - figure_save('tanimoto_error', out_path_cal, fig, ax, axt, fig_par, - x_lab='Tanimoto Similarity [-]', x_lim=[0, 1], y_lab='Average error [%]', - legend=None, tight_layout=True) + tanimoto_error.loc[c, "TanimS"] = np.nan + tanimoto_error.to_excel(plib.Path(out_path_cal, "tanimoto_error.xlsx")) + myfig = MyFigure( + rows=1, + cols=1, + width=7, + height=6, + x_lab="Tanimoto Similarity [-]", + x_lim=[0, 1], + y_lab="Average error [%]", + ) + # fig, ax, axt, fig_par = figure_create(rows=1, cols=1, plot_type=0, hgt_mltp=1.2, + # paper_col=1.4) + + aa = myfig.axs[0].scatter( + tanimoto_error["TanimS"].values, + tanimoto_error["CalErr"].values, + c=tanimoto_error["DistMW"].values, + ) + myfig.axs[0].set_yscale("log") + plt.colorbar(aa, label=r"$\Delta$MW [atomic mass unit]") + plt.hlines(y=100, xmin=0, xmax=1, color="grey", linestyle="dotted") + plt.vlines(x=0.4, ymin=0, ymax=100, color="grey", linestyle="dashed") + myfig.axs[0].annotate( + "default\nsetting", + ha="left", + va="bottom", + xycoords="axes fraction", + xy=(0.3, 0.01), + ) + myfig.axs[0].annotate( + "Error = 100%", ha="left", va="bottom", xycoords="axes fraction", xy=(0.8, 0.6) + ) + myfig.save_figure(filename="tanimoto_error", out_path=out_path_cal) + # figure_save('tanimoto_error', out_path_cal, fig, ax, axt, fig_par, + # x_lab='Tanimoto Similarity [-]', x_lim=[0, 1], y_lab='Average error [%]', + # legend=None, tight_layout=True) # create and export the similarity table for tetradecanoic acid - cpmnds = gcms.compounds_properties.set_index('iupac_name') - cpmnds = cpmnds[~cpmnds.index.duplicated(keep='first')].copy() + cpmnds = gcms.compounds_properties.set_index("iupac_name") + cpmnds = cpmnds[~cpmnds.index.duplicated(keep="first")].copy() iupac = cpmnds.index[0] - mws = [cpmnds.loc[iupac, 'molecular_weight']] - smis = [cpmnds.loc[iupac, 'canonical_smiles']] + mws = [cpmnds.loc[iupac, "molecular_weight"]] + smis = [cpmnds.loc[iupac, "canonical_smiles"]] names_cal = [iupac] # then add all properties for all calibrated compounds # if the sample was not derivatized (default) @@ -203,28 +299,28 @@ def get_calibration_error(name0, name1, calibration, xrange=[10, 200], steps=100 for c in cpmnds.index.tolist()[1:6]: names_cal.append(c) # print(df_comps.index) - smis.append(cpmnds.loc[c, 'canonical_smiles']) - mws.append(cpmnds.loc[c, 'molecular_weight']) + smis.append(cpmnds.loc[c, "canonical_smiles"]) + mws.append(cpmnds.loc[c, "molecular_weight"]) # calculate the delta mw with all calib compounds - delta_mw = np.abs(np.asarray(mws)[0] - - np.asarray(mws)[1:]) + delta_mw = np.abs(np.asarray(mws)[0] - np.asarray(mws)[1:]) # get mols and fingerprints from rdkit for each comp mols = [Chem.MolFromSmiles(smi) for smi in smis] - fps = [GetMorganFingerprintAsBitVect(ml, 2, nBits=1024) - for ml in mols] + fps = [GetMorganFingerprintAsBitVect(ml, 2, nBits=1024) for ml in mols] # perform Tanimoto similarity betwenn the first and all # other compounds s = DataStructs.BulkTanimotoSimilarity(fps[0], fps[1:]) # create a df with results - df_sim = pd.DataFrame(data={'name': names_cal[1:], - 'smiles': smis[1:], 'Similarity': s, 'delta_mw': delta_mw}) + df_sim = pd.DataFrame( + data={ + "name": names_cal[1:], + "smiles": smis[1:], + "Similarity": s, + "delta_mw": delta_mw, + } + ) # put the index title as the comp - df_sim.set_index('name', inplace=True) + df_sim.set_index("name", inplace=True) df_sim.index.name = iupac # sort values based on similarity and delta mw - df_sim = df_sim.sort_values(['Similarity', 'delta_mw'], - ascending=[False, True]) - df_sim.to_excel(plib.Path(out_path_cal, 'similarity_table_tetradecanoic.xlsx')) - -# %% - + df_sim = df_sim.sort_values(["Similarity", "delta_mw"], ascending=[False, True]) + df_sim.to_excel(plib.Path(out_path_cal, "similarity_table_tetradecanoic.xlsx"))