Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separated plotting functions, fixed bugs, added lower level testing #2

Merged
merged 7 commits into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
302 changes: 199 additions & 103 deletions RCSdata/RCS_gcms_data_analysis.py

Large diffs are not rendered by default.

Binary file modified example/data/classifications_codes_fractions.xlsx
Binary file not shown.
71 changes: 59 additions & 12 deletions example/example_gcms_data_analysis.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
# Import necessary libraries
import pathlib as plib # Used for handling file and directory paths
from gcms_data_analysis import (
Project,
) # Import the Project class from the gcms_data_analysis package
from gcms_data_analysis import Project
from gcms_data_analysis.plotting import plot_ave_std

# Define the folder path where your data is located. Change this path to where you've stored your data files.
# folder_path = plib.Path(plib.Path(__file__).parent, "example\data")
folder_path = plib.Path(
r"C:\Users\mp933\OneDrive - Cornell University\Python\gcms_data_analysis\example\data"
)

# folder_path: plib.Path = plib.Path(
# r"C:\Users\mp933\OneDrive - Cornell University\Python\GCMS\NNDNDD"
# )
# Set global configurations for the Project class.
# These configurations affect all instances of the class.
Project.set_folder_path(
folder_path
) # Set the base folder path for the project's data files
Project.set_plot_grid(False) # Disable grid lines in plots for a cleaner look
Project.set_plot_font("Sans") # Set the font style for plots to 'Sans'

Project.set_auto_save_to_excel(False)
# Initialize a Project instance to manage and analyze GCMS data
gcms = Project()

Expand All @@ -41,8 +42,8 @@
list_of_all_deriv_compounds = gcms.create_list_of_all_deriv_compounds()

# Load properties for standard and derivatized compounds from provided files
compounds_properties = gcms.load_compounds_properties()
deriv_compounds_properties = gcms.load_deriv_compounds_properties()
compounds_properties = gcms.create_compounds_properties()
deriv_compounds_properties = gcms.create_deriv_compounds_properties()

# Flag indicating whether new compounds have been added, triggering a need to regenerate properties data
new_files_with_new_compounds_added = False
Expand All @@ -56,7 +57,7 @@
# Extract specific files for detailed analysis or further operations
f11, f22, f33 = files["A_1"], files["Ader_1"], files["B_1"]

# Add statistical information to the files_info DataFrame, such as mean, median, and standard deviation for each file
# # Add statistical information to the files_info DataFrame, such as mean, median, and standard deviation for each file
files_info = gcms.add_stats_to_files_info()

# Create a samples_info DataFrame without applying calibration data, for initial analysis
Expand Down Expand Up @@ -90,15 +91,17 @@
# Plotting results based on the generated reports, allowing for visual comparison of average values and standard deviations
# Plot results for individual files or samples based

gcms.plot_ave_std(
plot_ave_std(
gcms,
param="fraction_of_sample_fr",
min_y_thresh=0,
files_or_samples="files",
legend_location="outside",
only_samples_to_plot=["A_1", "A_2", "Ader_1", "Ader_2"], # y_lim=[0, 5000]
)
# plot results bases on aggreport
gcms.plot_ave_std(
plot_ave_std(
gcms,
param="fraction_of_sample_fr",
aggr=True,
files_or_samples="files",
Expand All @@ -107,17 +110,61 @@
color_palette="Set2",
)

gcms.plot_ave_std(
plot_ave_std(
gcms,
param="fraction_of_sample_fr",
min_y_thresh=0,
legend_location="outside",
only_samples_to_plot=["A", "Ader"], # y_lim=[0, 5000]
)
# plot results bases on aggreport
gcms.plot_ave_std(
plot_ave_std(
gcms,
param="fraction_of_sample_fr",
aggr=True,
min_y_thresh=0.01,
y_lim=[0, 0.5],
color_palette="Set2",
)

# %%
# import pickle

# folder_path: plib.Path = plib.Path(r"C:\Users\mp933\Desktop\New folder")
# pickle_path: plib.Path = plib.Path(folder_path, "pickle_object.pkl")
# with open(pickle_path, "wb") as output_file:
# pickle.dump(gcms, output_file)
# %%
# import pickle
# import pathlib as plib # Used for handling file and directory paths
# from gcms_data_analysis import (
# Project,
# ) # Import the Project class from the gcms_data_analysis package

# folder_path: plib.Path = plib.Path(r"C:\Users\mp933\Desktop\New folder")
# pickle_path: plib.Path = plib.Path(folder_path, "pickle_object.pkl")
# with open(pickle_path, "rb") as input_file:
# gcms: Project = pickle.load(input_file)
# from gcms_data_analysis.plotting import plot_pave_std

# # %%
# myfig = plot_pave_std(
# gcms,
# files_or_samples="files",
# width=12,
# height=5,
# legend_location="outside",
# y_lim=[0, 100],
# )
# # %%
# myfig = plot_pave_std(
# gcms,
# files_or_samples="samples",
# width=6,
# height=6,
# legend_location="best",
# y_lim=[0, 100],
# min_y_thresh=10,
# )

# # %%
220 changes: 220 additions & 0 deletions example/example_minimal_case.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
# %% Import necessary libraries
import pathlib as plib # Used for handling file and directory paths
from gcms_data_analysis import Project
from gcms_data_analysis.plotting import plot_ave_std

# Define the folder path where your data is located. Change this path to where you've stored your data files.
# folder_path = plib.Path(plib.Path(__file__).parent, "example\data")
folder_path = plib.Path(
r"C:\Users\mp933\OneDrive - Cornell University\Python\gcms_data_analysis\tests\data_minimal_case"
)
# folder_path: plib.Path = plib.Path(
# r"C:\Users\mp933\OneDrive - Cornell University\Python\GCMS\NNDNDD"
# )
# Set global configurations for the Project class.
# These configurations affect all instances of the class.
Project.set_folder_path(
folder_path
) # Set the base folder path for the project's data files
Project.set_plot_grid(False) # Disable grid lines in plots for a cleaner look
Project.set_plot_font("Sans") # Set the font style for plots to 'Sans'
Project.set_auto_save_to_excel(False)
# Initialize a Project instance to manage and analyze GCMS data
gcms = Project()

# Load metadata from a user-provided 'files_info.xlsx' file, or generate it from .txt GC-MS files if not provided
files_info = gcms.load_files_info()
# Load individual GCMS .txt files as pandas DataFrames
files = gcms.load_all_files()
files = gcms.add_iupac_to_files()
list_of_all_compounds = gcms.create_list_of_all_compounds()
files, is_files_deriv = gcms.apply_calibration_to_files()
samples_info, samples_info_std = gcms.create_samples_info()
samples, samples_std = gcms.create_samples_from_files()

params = [
"height",
"area",
"area_if_undiluted",
"conc_vial_mg_L",
"conc_vial_if_undiluted_mg_L",
"fraction_of_sample_fr",
"fraction_of_feedstock_fr",
]
for param in params:
_ = gcms.create_files_param_report(param)
_ = gcms.create_files_param_aggrrep(param)

_, _ = gcms.create_samples_param_report(param)
_, _ = gcms.create_samples_param_aggrrep(param)

# %%
for param in params:
print(f"'{param}': ")
print_checked_df_to_script_text_with_arrays(gcms.files_reports[param])
# %%

for param in params:
print(f"'{param}': ")
print_checked_df_to_script_text_with_arrays(gcms.files_aggrreps[param])
# %%
for param in params:
print(f"'{param}': ")
print_checked_df_to_script_text_with_arrays(gcms.samples_reports[param])
# %%
for param in params:
print(f"'{param}': ")
print_checked_df_to_script_text_with_arrays(gcms.samples_reports_std[param])
# %%

for param in params:
print(f"'{param}': ")
print_checked_df_to_script_text_with_arrays(gcms.samples_aggrreps[param])
# %%

for param in params:
print(f"'{param}': ")
print_checked_df_to_script_text_with_arrays(gcms.samples_aggrreps_std[param])
# %%


# Load classification codes and mass fractions for functional groups from a provided file
class_code_frac = gcms.load_class_code_frac()

# Load calibration data for standard and derivatized samples, and determine if they are derivatized
calibrations, is_calibr_deriv = gcms.load_calibrations()
# c1, c2 = calibrations["calibration"], calibrations["deriv_calibration"]

# Generate a comprehensive list of all compounds found across samples
list_of_all_compounds = gcms.create_list_of_all_compounds()

# Similarly, create a list of all derivatized compounds found across samples
list_of_all_deriv_compounds = gcms.create_list_of_all_deriv_compounds()

# Load properties for standard and derivatized compounds from provided files
compounds_properties = gcms.create_compounds_properties()
deriv_compounds_properties = gcms.create_deriv_compounds_properties()

# Flag indicating whether new compounds have been added, triggering a need to regenerate properties data
new_files_with_new_compounds_added = False
if new_files_with_new_compounds_added:
compounds_properties = gcms.create_compounds_properties()
deriv_compounds_properties = gcms.create_deriv_compounds_properties()

# Apply calibration data to all loaded files, adjusting compound concentrations based on calibration curves
files, is_files_deriv = gcms.apply_calibration_to_files()

# Extract specific files for detailed analysis or further operations
f11, f22, f33 = files["A_1"], files["Ader_1"], files["B_1"]

# # Add statistical information to the files_info DataFrame, such as mean, median, and standard deviation for each file
files_info = gcms.add_stats_to_files_info()

# Create a samples_info DataFrame without applying calibration data, for initial analysis
samples_info_0 = gcms.create_samples_info()

# Create samples and their standard deviations from the files, storing the results in dictionaries
samples, samples_std = gcms.create_samples_from_files()
s1, s2, s3 = samples["A"], samples["Ader"], samples["B"]
sd1, sd2, sd3 = samples_std["A"], samples_std["Ader"], samples_std["B"]

# Generate reports for specific parameters (e.g., concentration, mass fraction) for files and samples
rep_files_conc = gcms.create_files_param_report(param="conc_vial_mg_L")
rep_files_fr = gcms.create_files_param_report(param="fraction_of_sample_fr")
rep_samples_conc, rep_samples_conc_std = gcms.create_samples_param_report(
param="conc_vial_mg_L"
)
rep_samples_fr, rep_samples_fr_std = gcms.create_samples_param_report(
param="fraction_of_sample_fr"
)

# Generate aggregated reports based on functional groups for files and samples, for specific parameters
agg_files_conc = gcms.create_files_param_aggrrep(param="conc_vial_mg_L")
agg_files_fr = gcms.create_files_param_aggrrep(param="fraction_of_sample_fr")
agg_samples_conc, agg_samples_conc_std = gcms.create_samples_param_aggrrep(
param="conc_vial_mg_L"
)
agg_samples_fr, agg_samples_fr_std = gcms.create_samples_param_aggrrep(
param="fraction_of_sample_fr"
)

# Plotting results based on the generated reports, allowing for visual comparison of average values and standard deviations
# Plot results for individual files or samples based

plot_ave_std(
gcms,
param="fraction_of_sample_fr",
min_y_thresh=0,
files_or_samples="files",
legend_location="outside",
only_samples_to_plot=["A_1", "A_2", "Ader_1", "Ader_2"], # y_lim=[0, 5000]
)
# plot results bases on aggreport
plot_ave_std(
gcms,
param="fraction_of_sample_fr",
aggr=True,
files_or_samples="files",
min_y_thresh=0.01,
y_lim=[0, 0.5],
color_palette="Set2",
)

plot_ave_std(
gcms,
param="fraction_of_sample_fr",
min_y_thresh=0,
legend_location="outside",
only_samples_to_plot=["A", "Ader"], # y_lim=[0, 5000]
)
# plot results bases on aggreport
plot_ave_std(
gcms,
param="fraction_of_sample_fr",
aggr=True,
min_y_thresh=0.01,
y_lim=[0, 0.5],
color_palette="Set2",
)

# %%
# import pickle

# folder_path: plib.Path = plib.Path(r"C:\Users\mp933\Desktop\New folder")
# pickle_path: plib.Path = plib.Path(folder_path, "pickle_object.pkl")
# with open(pickle_path, "wb") as output_file:
# pickle.dump(gcms, output_file)
# %%
# import pickle
# import pathlib as plib # Used for handling file and directory paths
# from gcms_data_analysis import (
# Project,
# ) # Import the Project class from the gcms_data_analysis package

# folder_path: plib.Path = plib.Path(r"C:\Users\mp933\Desktop\New folder")
# pickle_path: plib.Path = plib.Path(folder_path, "pickle_object.pkl")
# with open(pickle_path, "rb") as input_file:
# gcms: Project = pickle.load(input_file)
# from gcms_data_analysis.plotting import plot_pave_std

# # %%
# myfig = plot_pave_std(
# gcms,
# files_or_samples="files",
# width=12,
# height=5,
# legend_location="outside",
# y_lim=[0, 100],
# )
# # %%
# myfig = plot_pave_std(
# gcms,
# files_or_samples="samples",
# width=6,
# height=6,
# legend_location="best",
# y_lim=[0, 100],
# min_y_thresh=10,
# )

# # %%
Binary file not shown.
Binary file not shown.
Loading
Loading