Debug missing replicate from tracer plot

quanted · May 23, 2024 · 763d41f · 763d41f
1 parent b30081a
commit 763d41f
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 86 deletions.
diff --git a/app/data_handler/utilities.py b/app/data_handler/utilities.py
@@ -7,22 +7,25 @@
 logger = logging.getLogger("nta_app.ms2")
 logger.setLevel(logging.INFO)
 
-DSSTOX_API = os.environ.get('UBERTOOL_REST_SERVER')
+DSSTOX_API = os.environ.get("UBERTOOL_REST_SERVER")
+
 
 def connect_to_mongoDB(address):
     mongo = pymongo.MongoClient(host=address)
-    mongo_db = mongo['nta_ms2_runs']
+    mongo_db = mongo["nta_ms2_runs"]
     mongo.nta_ms2_runs.Collection.create_index([("date", pymongo.DESCENDING)], expireAfterSeconds=86400)
     # ALL entries into mongo.nta_runs must have datetime.utcnow() timestamp, which is used to delete the record after 86400
     # seconds, 24 hours.
     return mongo_db
 
-def connect_to_mongo_gridfs(address, path = None):
+
+def connect_to_mongo_gridfs(address, path=None):
     db = pymongo.MongoClient(host=address).nta_ms2_storage
     print("Connecting to mongodb at {}".format(address))
     fs = gridfs.GridFS(db)
     return fs
 
+
 def get_mongoDB(address):
     mongo = pymongo.MongoClient(host=address).nta_ms2_storage
-    return mongo
+    return mongo
diff --git a/app/ms1/WebApp_plotter.py b/app/ms1/WebApp_plotter.py
@@ -147,11 +147,7 @@ def make_seq_line(
             pool_indices = locs.index[locs.str.startswith("Pooled")]
             # set marker colors for plot
             mark_colors = [
-                c_aes[1]
-                if i in mb_indices
-                else c_aes[2]
-                if i in pool_indices
-                else c_aes[0]
+                c_aes[1] if i in mb_indices else c_aes[2] if i in pool_indices else c_aes[0]
                 for i in range(0, len(locs))
             ]
         # in case there is an error above... this can probably be removed
@@ -358,9 +354,7 @@ def make_seq_line(
                 else:
                     try:
                         # plot once to get lines with no markers
-                        ax[row_index, col_index].plot(
-                            x_values, y_values, color=c_aes[0]
-                        )
+                        ax[row_index, col_index].plot(x_values, y_values, color=c_aes[0])
                         # need to iterate through each color
                         for x, y, col in zip(x_values, y_values, mark_colors):
                             ax[row_index, col_index].plot(
@@ -372,9 +366,7 @@ def make_seq_line(
                                 markeredgecolor=col,
                                 markerfacecolor=col,
                             )
-                        ax[row_index, col_index].set_title(
-                            chem, fontsize=18, fontweight=600
-                        )
+                        ax[row_index, col_index].set_title(chem, fontsize=18, fontweight=600)
                         col_index += 1
                     except:
                         row_index += 1
@@ -520,9 +512,7 @@ def make_seq_scatter(
         """
 
         listOfPNGs = []
-        debug_list = (
-            []
-        )  # List of lists/dataframes/etc to export out of function for debugging purposes
+        debug_list = []  # List of lists/dataframes/etc to export out of function for debugging purposes
 
         # Debug_list
         debug_list.append("Beginning of make_seq_scatter: df_in columns")
@@ -546,9 +536,7 @@ def make_seq_scatter(
 
             # If there is no sequence file, create a dummy sequence dataframe containing the sample names straight from the input data file
             headers = parse_headers(df_in)
-            abundance = [
-                item for sublist in headers for item in sublist if len(sublist) > 1
-            ]
+            abundance = [item for sublist in headers for item in sublist if len(sublist) > 1]
 
             # Debug_list
             debug_list.append("Samples from parse_headers")
@@ -569,9 +557,7 @@ def make_seq_scatter(
             abundance = [
                 entry
                 for entry in abundance
-                if not any(
-                    entry.startswith(prefix) for prefix in column_prefixes_to_remove
-                )
+                if not any(entry.startswith(prefix) for prefix in column_prefixes_to_remove)
             ]
 
             df_loc_seq = pd.DataFrame()
@@ -623,9 +609,7 @@ def make_seq_scatter(
         # AC Loop through sample group column and get indices of samples for each sample group
         indices_list = []
         for i in range(len(sample_group_unique)):
-            temp_indices = df_loc_seq.index[
-                df_loc_seq.iloc[:, 1] == sample_group_unique[i]
-            ].tolist()
+            temp_indices = df_loc_seq.index[df_loc_seq.iloc[:, 1] == sample_group_unique[i]].tolist()
             indices_list.append(temp_indices)
 
         ################################################
@@ -635,22 +619,19 @@ def make_seq_scatter(
         # start by getting df with chemical names and abundance at each location in sequential order
         if order_samples:
             col_names = [x for x in df_loc_seq.iloc[:, 0]]
-            col_names.insert(
-                0, "Chemical_Name"
-            )  # AC 1/4/2024 Add in chemical name column to dataframe
+            col_names.insert(0, "Chemical_Name")  # AC 1/4/2024 Add in chemical name column to dataframe
             # col_names.insert(0, 'Chemical_Name')
             df = df_in[col_names].copy()
         else:
             # Sort dataframe columns alphabetically prior to parsing headers
             df_in = df_in.reindex(sorted(df_in.columns), axis=1)
+            df_in = df_in[
+                ["Feature_ID"] + [col for col in df_in.columns if col != "Feature_ID"]
+            ]  # Move mass column to front of dataframe; if a sample replicate is the first column when parsing headers it loses that replicate from the group
 
             headers = parse_headers(df_in)
-            abundance = [
-                item for sublist in headers for item in sublist if len(sublist) > 1
-            ]
-            abundance.insert(
-                0, "Chemical_Name"
-            )  # AC 1/4/2024 Add in chemical name column to dataframe
+            abundance = [item for sublist in headers for item in sublist if len(sublist) > 1]
+            abundance.insert(0, "Chemical_Name")  # AC 1/4/2024 Add in chemical name column to dataframe
             # abundance.remove('Detection_Count(all_samples)')
             # abundance.remove('Detection_Count(all_samples)(%)')
             # 5/21/2024 AC: In certain cases if the samples have multiple layers of repetition to their naming,
@@ -668,9 +649,7 @@ def make_seq_scatter(
             abundance = [
                 entry
                 for entry in abundance
-                if not any(
-                    entry.startswith(prefix) for prefix in column_prefixes_to_remove
-                )
+                if not any(entry.startswith(prefix) for prefix in column_prefixes_to_remove)
             ]
 
             df = df_in[abundance].copy()
@@ -794,9 +773,7 @@ def make_seq_scatter(
                 # legend innards
                 # AC Loop through legend label generation
                 legend_x_coord = []  # List of x-coordinates for sample group in legend
-                character_increment = (
-                    0.018  # How much to increment x-coordinate per character
-                )
+                character_increment = 0.018  # How much to increment x-coordinate per character
 
                 for b in range(len(sample_group_unique)):
                     # Get x coordinate of sample group legend text based on number of characters
@@ -981,9 +958,7 @@ def make_seq_scatter(
                                 if y_fit[0] < 0:
                                     y_fit = y_fit[1:]
                                     x_fit = x_fit[1:]
-                                ax[row_index].plot(
-                                    x_fit, y_fit, color=c_aes[b], lw=3, zorder=100
-                                )
+                                ax[row_index].plot(x_fit, y_fit, color=c_aes[b], lw=3, zorder=100)
 
                     # # add a quadratic fits to plot
                     # if fit == True:
@@ -1040,9 +1015,7 @@ def make_seq_scatter(
                                 zorder=100,
                             )
 
-                        ax[row_index, col_index].set_title(
-                            chem, fontsize=18, fontweight=600
-                        )
+                        ax[row_index, col_index].set_title(chem, fontsize=18, fontweight=600)
 
                         # add a quadratic fits to plot
                         if fit == True:
@@ -1063,9 +1036,7 @@ def make_seq_scatter(
                                     if y_fit[0] < 0:
                                         y_fit = y_fit[1:]
                                         x_fit = x_fit[1:]
-                                    ax[row_index, col_index].plot(
-                                        x_fit, y_fit, color=c_aes[b], lw=3, zorder=100
-                                    )
+                                    ax[row_index, col_index].plot(x_fit, y_fit, color=c_aes[b], lw=3, zorder=100)
                         # # add a quadratic fits to plot
                         # if fit == True:
                         #     if len(x_values_sample) > 2:
@@ -1126,9 +1097,7 @@ def make_seq_scatter(
                                 zorder=100,
                             )
 
-                        ax[row_index, column_index].set_title(
-                            chem, fontsize=18, fontweight=600
-                        )
+                        ax[row_index, column_index].set_title(chem, fontsize=18, fontweight=600)
 
                         # add a quadratic fits to plot
                         if fit == True:
@@ -1149,9 +1118,7 @@ def make_seq_scatter(
                                     if y_fit[0] < 0:
                                         y_fit = y_fit[1:]
                                         x_fit = x_fit[1:]
-                                    ax[row_index, column_index].plot(
-                                        x_fit, y_fit, color=c_aes[b], lw=3, zorder=100
-                                    )
+                                    ax[row_index, column_index].plot(x_fit, y_fit, color=c_aes[b], lw=3, zorder=100)
                         # # add a quadratic fits to plot
                         # if fit == True:
                         #     if len(x_values_sample) > 2:
@@ -1476,9 +1443,7 @@ def make_loc_plot(
                 dark_mode=dark_mode,
             )
         else:
-            figs_axes = make_subplots(
-                chem_names, same_frame=same_frame, dark_mode=dark_mode
-            )
+            figs_axes = make_subplots(chem_names, same_frame=same_frame, dark_mode=dark_mode)
 
         # if plot_type='loc', we should set our df to df_loc... this wasn't done earlier because of
         # the y_max and y_min variables for y_fixed since df_loc has lists of entries
@@ -1548,9 +1513,7 @@ def make_loc_plot(
                 0,
                 0,
             )  # indices for which subplot to put a chemical in
-            while (
-                chem_index - 16 * sublist_index
-            ) < ax_count:  # and (chem_index < n_chems):
+            while (chem_index - 16 * sublist_index) < ax_count:  # and (chem_index < n_chems):
                 # for loop to set up x and y values for each plot in a figure
                 x_values, y_values = [], []
                 for i, v in enumerate(df.iloc[chem_index, 1:]):
@@ -1581,9 +1544,7 @@ def make_loc_plot(
                             ha="right",
                         )
                         ax.set_xlim(0, tick_pos[-1] + 0.75)
-                        ax.set_title(
-                            chemical_names.iloc[chem_index], fontsize=18, fontweight=600
-                        )
+                        ax.set_title(chemical_names.iloc[chem_index], fontsize=18, fontweight=600)
                         i += 1  # iterate to next location for plotting this chemical
 
                 # now deal with 2 and 3 chemicals
@@ -1609,9 +1570,7 @@ def make_loc_plot(
                             ha="right",
                         )
                         ax[row_index].set_xlim(0, tick_pos[-1] + 0.75)
-                        ax[row_index].set_title(
-                            chemical_names.iloc[chem_index], fontsize=18, fontweight=600
-                        )
+                        ax[row_index].set_title(chemical_names.iloc[chem_index], fontsize=18, fontweight=600)
                         # set up the shared x-axis
                         if row_index == nrows - 1:
                             ax[row_index].set_xticks(
@@ -1660,9 +1619,7 @@ def make_loc_plot(
                             ha="right",
                         )
                         ax[row_index, col_index].set_xlim(0, tick_pos[-1] + 0.75)
-                        ax[row_index, col_index].set_title(
-                            chemical_names.iloc[chem_index], fontsize=18, fontweight=600
-                        )
+                        ax[row_index, col_index].set_title(chemical_names.iloc[chem_index], fontsize=18, fontweight=600)
                         i += 1  # iterate to next location for plotting this chemical
 
                     # set x_ticks -- need to make sure the axis is so labels go to the highest row of plots
@@ -1819,9 +1776,7 @@ def make_subplots(
     # ensure each list within chem_names is the right length
     for c_list in chem_names:
         if len(c_list) > 16:
-            raise Exception(
-                "You have a list within chem_names whose length is longer than 16"
-            )
+            raise Exception("You have a list within chem_names whose length is longer than 16")
 
     # rcParams must be set before any plt objects are created!
     # Now set parameters that are needed for dark_mode=True
@@ -1983,20 +1938,15 @@ def make_subplots(
                 # pick the tick locations and labels
                 ax.set_yticks(
                     ticks=[10**x for x in np.linspace(y_min_pow, y_max_pow, n)],
-                    labels=[
-                        f"$10^{{{int(x)}}}$"
-                        for x in np.linspace(y_min_pow, y_max_pow, n)
-                    ],
+                    labels=[f"$10^{{{int(x)}}}$" for x in np.linspace(y_min_pow, y_max_pow, n)],
                 )
         else:
             # generate the subtitle for the plot to state which chemicals are being plotted
             subtitle_f = subtitle_i + n_chems
             subtitle = f"Chemicals {subtitle_i+1}-{subtitle_f}"
             subtitle_i = subtitle_f
             # get shape of axis object
-            axe = (
-                ax.ravel()
-            )  # have to unpack gridspec object (from subplots() function)
+            axe = ax.ravel()  # have to unpack gridspec object (from subplots() function)
             gs = axe[0].get_gridspec()
             shape = (gs.nrows, gs.ncols)
 
@@ -2020,10 +1970,7 @@ def make_subplots(
                     # pick the tick locations and labels
                     axe[j].set_yticks(
                         ticks=[10**x for x in np.linspace(y_min_pow, y_max_pow, n)],
-                        labels=[
-                            f"$10^{{{int(x)}}}$"
-                            for x in np.linspace(y_min_pow, y_max_pow, n)
-                        ],
+                        labels=[f"$10^{{{int(x)}}}$" for x in np.linspace(y_min_pow, y_max_pow, n)],
                     )
 
         # a few more fig aesthetics, append our list of tupples to return, then iterate to next figure