Skip to content

Commit

Permalink
minor plotting updates
Browse files Browse the repository at this point in the history
  • Loading branch information
mpecchi committed May 13, 2024
1 parent 98f5947 commit 69c78d6
Showing 1 changed file with 176 additions and 2 deletions.
178 changes: 176 additions & 2 deletions src/gcms_data_analysis/gcms.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@
import pubchempy as pcp
import ele
import pandas as pd
import matplotlib.patches as mpatches
from matplotlib.axes import Axes
from rdkit import Chem
from rdkit.Chem import DataStructs
from rdkit.Chem.AllChem import ( # pylint: disable=no-name-in-module
GetMorganFingerprintAsBitVect,
)
from gcms_data_analysis.fragmenter import Fragmenter
from myfigure.myfigure import MyFigure, colors, hatches


class Project:
Expand Down Expand Up @@ -833,15 +836,18 @@ def create_files_param_aggrrep(self, param="conc_vial_mg_L"):
# fg = functional groups, mf = mass fraction
filenames = self.files_info.index.tolist()
_all_comps = self.files_reports[param].index.tolist()
_all_comps = [comp for comp in _all_comps if comp != "unidentified"]
_all_comps = [comp for comp in _all_comps] # if comp != "unidentified"]
fg_mf_labs = [
c for c in comps_df.columns if c.startswith("fg_mf_") if c != "fg_mf_total"
]
fg_labs = [c[6:] for c in fg_mf_labs]

fg_mf_all = pd.DataFrame(index=_all_comps, columns=fg_mf_labs)
for idx in fg_mf_all.index.tolist():
fg_mf_all.loc[idx, fg_mf_labs] = comps_df.loc[idx, fg_mf_labs]
if idx == "unidentified":
fg_mf_all.loc[idx, :] = 0
else:
fg_mf_all.loc[idx, fg_mf_labs] = comps_df.loc[idx, fg_mf_labs]
# create the aggregated dataframes and compute aggregated results
aggrrep = pd.DataFrame(columns=filenames, index=fg_labs, dtype="float")
aggrrep.fillna(0, inplace=True)
Expand Down Expand Up @@ -964,6 +970,148 @@ def save_files_samples_reports(self):
)
)

def plot_report(
self,
filename: str = "",
report_or_aggrrep: Literal["report", "aggrrep"] = "report",
files_or_samples: Literal["files", "samples"] = "samples",
param: str = "conc_vial_mg_L",
names_to_keep: list[str] | None = None,
labels: list[str] | None = None,
show_total_in_twinx: bool = False,
y_axis_min_threshold: float | None = None,
item_to_color_to_hatch: pd.DataFrame | None = None,
alternative_colors: list[tuple] = None,
yt_sum_label: str = "total\n(right axis)",
remove_insignificant_values: bool = False,
**kwargs,
) -> MyFigure:
""" """
if param not in self.acceptable_params:
raise ValueError(f"{param = } is not an acceptable param")

out_path = plib.Path(self.out_path, "plots")
out_path.mkdir(parents=True, exist_ok=True)

if report_or_aggrrep == "report": # then use compounds reports
if files_or_samples == "files":
if param not in self.files_reports:
self.create_files_param_report(param)
df_ave = self.files_reports[param].T
df_std = pd.DataFrame()
elif files_or_samples == "samples":
if param not in self.samples_reports:
self.create_samples_param_report(param)
df_ave = self.samples_reports[param].T
df_std = self.samples_reports_std[param].T
else: # use aggregated reports
if files_or_samples == "files":
if param not in self.files_aggrreps:
self.create_files_param_aggrrep(param)
df_ave = self.files_aggrreps[param].T
df_std = pd.DataFrame()
elif files_or_samples == "samples":
if param not in self.samples_aggrreps:
self.create_samples_param_aggrrep(param)
df_ave = self.samples_aggrreps[param].T
df_std = self.samples_aggrreps_std[param].T

if names_to_keep is not None:
df_ave = df_ave.loc[names_to_keep, :].copy()
if files_or_samples == "samples":
df_std = df_std.loc[names_to_keep, :].copy()

if labels is not None:
df_ave.index = labels
if files_or_samples == "samples":
df_std.index = labels

if y_axis_min_threshold is not None:
df_ave = df_ave.loc[:, (df_ave > y_axis_min_threshold).any(axis=0)].copy()
if files_or_samples == "samples":
df_std = df_std.loc[:, df_ave.columns].copy()

if item_to_color_to_hatch is not None: # specific color and hatches to each fg
plot_colors = [
item_to_color_to_hatch.loc[item, "clr"] for item in df_ave.columns
]
plot_hatches = [
item_to_color_to_hatch.loc[item, "htch"] for item in df_ave.columns
]
else: # no specific colors and hatches specified
if alternative_colors is not None:
plot_colors = alternative_colors
else:
plot_colors = colors
plot_hatches = hatches

if df_std.isna().all().all() or df_std.empty:
std_available = False
else:
std_available = True

if remove_insignificant_values:
if std_available:
mask = (df_ave.abs() > df_std.abs()) | df_std.isna()
df_ave = df_ave[mask]
df_std = df_std[mask]

default_kwargs = {
"filename": filename + report_or_aggrrep + files_or_samples + param,
"out_path": out_path,
"height": 4,
"width": 4,
"grid": self.plot_grid,
"text_font": self.plot_font,
"y_lab": self.param_to_axis_label[param],
"yt_lab": self.param_to_axis_label[param],
"twinx": True if show_total_in_twinx else False,
"auto_apply_hatches_to_bars": False,
}
# Update kwargs with the default key-value pairs if the key is not present in kwargs
kwargs = {**default_kwargs, **kwargs}
myfig = MyFigure(rows=1, cols=1, **kwargs)
df_ave.plot(
ax=myfig.axs[0],
kind="bar",
width=0.9,
edgecolor="k",
legend=False,
capsize=3,
color=plot_colors,
yerr=df_std if std_available else None,
)

apply_hatches_to_ax(myfig.axs[0], plot_hatches)

if show_total_in_twinx:
myfig.axts[0].scatter(
df_ave.index,
df_ave.sum(axis=1).values,
color="k",
linestyle="None",
edgecolor="k",
facecolor="grey",
s=100,
label=yt_sum_label,
alpha=0.5,
)
if std_available:
myfig.axts[0].errorbar(
df_ave.index,
df_ave.sum(axis=1).values,
df_std.sum(axis=1).values,
capsize=3,
linestyle="None",
color="grey",
ecolor="k",
label="_nolegend_",
)

# Identify new patches added by the DataFrame plot
myfig.save_figure()
return myfig


def create_tanimoto_similarity_dict(
comp_smiles: str, calib_smiless: list[str]
Expand Down Expand Up @@ -1202,3 +1350,29 @@ def name_to_properties(
df["iupac_name"] != "unidentified"
].fillna(0)
return _order_columns_in_compounds_properties(df)


def apply_hatches_to_ax(ax: Axes, hatches_list: list[str]) -> None:
"""
Apply hatch patterns to bars in the bar plots of each subplot.
This method iterates over all subplots and applies predefined hatch patterns to each bar,
enhancing the visual distinction between bars, especially in black and white printouts.
"""
# Check if the plot is a bar plot
bars = [b for b in ax.patches if isinstance(b, mpatches.Rectangle)]
# If there are no bars, return immediately
if not bars:
return
num_groups = len(ax.get_xticks(minor=False))
# Determine the number of bars in each group
bars_in_group = len(bars) // num_groups
patterns = hatches_list[:bars_in_group] # set hatch patterns in correct order
plot_hatches_list = [] # list for hatches in the order of the bars
for h in patterns: # loop over patterns to create bar-ordered hatches
for _ in range(int(len(bars) / len(patterns))):
plot_hatches_list.append(h)
# loop over bars and hatches to set hatches in correct order
for b, hatch in zip(bars, plot_hatches_list):
b.set_hatch(hatch)
b.set_edgecolor("k")

0 comments on commit 69c78d6

Please sign in to comment.