Skip to content

Commit

Permalink
Debug missing replicate from tracer plot
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex Chao authored and alexchao32 committed May 23, 2024
1 parent b30081a commit 763d41f
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 86 deletions.
11 changes: 7 additions & 4 deletions app/data_handler/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,25 @@
logger = logging.getLogger("nta_app.ms2")
logger.setLevel(logging.INFO)

DSSTOX_API = os.environ.get('UBERTOOL_REST_SERVER')
DSSTOX_API = os.environ.get("UBERTOOL_REST_SERVER")


def connect_to_mongoDB(address):
mongo = pymongo.MongoClient(host=address)
mongo_db = mongo['nta_ms2_runs']
mongo_db = mongo["nta_ms2_runs"]
mongo.nta_ms2_runs.Collection.create_index([("date", pymongo.DESCENDING)], expireAfterSeconds=86400)
# ALL entries into mongo.nta_runs must have datetime.utcnow() timestamp, which is used to delete the record after 86400
# seconds, 24 hours.
return mongo_db

def connect_to_mongo_gridfs(address, path = None):

def connect_to_mongo_gridfs(address, path=None):
db = pymongo.MongoClient(host=address).nta_ms2_storage
print("Connecting to mongodb at {}".format(address))
fs = gridfs.GridFS(db)
return fs


def get_mongoDB(address):
mongo = pymongo.MongoClient(host=address).nta_ms2_storage
return mongo
return mongo
111 changes: 29 additions & 82 deletions app/ms1/WebApp_plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,7 @@ def make_seq_line(
pool_indices = locs.index[locs.str.startswith("Pooled")]
# set marker colors for plot
mark_colors = [
c_aes[1]
if i in mb_indices
else c_aes[2]
if i in pool_indices
else c_aes[0]
c_aes[1] if i in mb_indices else c_aes[2] if i in pool_indices else c_aes[0]
for i in range(0, len(locs))
]
# in case there is an error above... this can probably be removed
Expand Down Expand Up @@ -358,9 +354,7 @@ def make_seq_line(
else:
try:
# plot once to get lines with no markers
ax[row_index, col_index].plot(
x_values, y_values, color=c_aes[0]
)
ax[row_index, col_index].plot(x_values, y_values, color=c_aes[0])
# need to iterate through each color
for x, y, col in zip(x_values, y_values, mark_colors):
ax[row_index, col_index].plot(
Expand All @@ -372,9 +366,7 @@ def make_seq_line(
markeredgecolor=col,
markerfacecolor=col,
)
ax[row_index, col_index].set_title(
chem, fontsize=18, fontweight=600
)
ax[row_index, col_index].set_title(chem, fontsize=18, fontweight=600)
col_index += 1
except:
row_index += 1
Expand Down Expand Up @@ -520,9 +512,7 @@ def make_seq_scatter(
"""

listOfPNGs = []
debug_list = (
[]
) # List of lists/dataframes/etc to export out of function for debugging purposes
debug_list = [] # List of lists/dataframes/etc to export out of function for debugging purposes

# Debug_list
debug_list.append("Beginning of make_seq_scatter: df_in columns")
Expand All @@ -546,9 +536,7 @@ def make_seq_scatter(

# If there is no sequence file, create a dummy sequence dataframe containing the sample names straight from the input data file
headers = parse_headers(df_in)
abundance = [
item for sublist in headers for item in sublist if len(sublist) > 1
]
abundance = [item for sublist in headers for item in sublist if len(sublist) > 1]

# Debug_list
debug_list.append("Samples from parse_headers")
Expand All @@ -569,9 +557,7 @@ def make_seq_scatter(
abundance = [
entry
for entry in abundance
if not any(
entry.startswith(prefix) for prefix in column_prefixes_to_remove
)
if not any(entry.startswith(prefix) for prefix in column_prefixes_to_remove)
]

df_loc_seq = pd.DataFrame()
Expand Down Expand Up @@ -623,9 +609,7 @@ def make_seq_scatter(
# AC Loop through sample group column and get indices of samples for each sample group
indices_list = []
for i in range(len(sample_group_unique)):
temp_indices = df_loc_seq.index[
df_loc_seq.iloc[:, 1] == sample_group_unique[i]
].tolist()
temp_indices = df_loc_seq.index[df_loc_seq.iloc[:, 1] == sample_group_unique[i]].tolist()
indices_list.append(temp_indices)

################################################
Expand All @@ -635,22 +619,19 @@ def make_seq_scatter(
# start by getting df with chemical names and abundance at each location in sequential order
if order_samples:
col_names = [x for x in df_loc_seq.iloc[:, 0]]
col_names.insert(
0, "Chemical_Name"
) # AC 1/4/2024 Add in chemical name column to dataframe
col_names.insert(0, "Chemical_Name") # AC 1/4/2024 Add in chemical name column to dataframe
# col_names.insert(0, 'Chemical_Name')
df = df_in[col_names].copy()
else:
# Sort dataframe columns alphabetically prior to parsing headers
df_in = df_in.reindex(sorted(df_in.columns), axis=1)
df_in = df_in[
["Feature_ID"] + [col for col in df_in.columns if col != "Feature_ID"]
] # Move mass column to front of dataframe; if a sample replicate is the first column when parsing headers it loses that replicate from the group

headers = parse_headers(df_in)
abundance = [
item for sublist in headers for item in sublist if len(sublist) > 1
]
abundance.insert(
0, "Chemical_Name"
) # AC 1/4/2024 Add in chemical name column to dataframe
abundance = [item for sublist in headers for item in sublist if len(sublist) > 1]
abundance.insert(0, "Chemical_Name") # AC 1/4/2024 Add in chemical name column to dataframe
# abundance.remove('Detection_Count(all_samples)')
# abundance.remove('Detection_Count(all_samples)(%)')
# 5/21/2024 AC: In certain cases if the samples have multiple layers of repetition to their naming,
Expand All @@ -668,9 +649,7 @@ def make_seq_scatter(
abundance = [
entry
for entry in abundance
if not any(
entry.startswith(prefix) for prefix in column_prefixes_to_remove
)
if not any(entry.startswith(prefix) for prefix in column_prefixes_to_remove)
]

df = df_in[abundance].copy()
Expand Down Expand Up @@ -794,9 +773,7 @@ def make_seq_scatter(
# legend innards
# AC Loop through legend label generation
legend_x_coord = [] # List of x-coordinates for sample group in legend
character_increment = (
0.018 # How much to increment x-coordinate per character
)
character_increment = 0.018 # How much to increment x-coordinate per character

for b in range(len(sample_group_unique)):
# Get x coordinate of sample group legend text based on number of characters
Expand Down Expand Up @@ -981,9 +958,7 @@ def make_seq_scatter(
if y_fit[0] < 0:
y_fit = y_fit[1:]
x_fit = x_fit[1:]
ax[row_index].plot(
x_fit, y_fit, color=c_aes[b], lw=3, zorder=100
)
ax[row_index].plot(x_fit, y_fit, color=c_aes[b], lw=3, zorder=100)

# # add a quadratic fits to plot
# if fit == True:
Expand Down Expand Up @@ -1040,9 +1015,7 @@ def make_seq_scatter(
zorder=100,
)

ax[row_index, col_index].set_title(
chem, fontsize=18, fontweight=600
)
ax[row_index, col_index].set_title(chem, fontsize=18, fontweight=600)

# add a quadratic fits to plot
if fit == True:
Expand All @@ -1063,9 +1036,7 @@ def make_seq_scatter(
if y_fit[0] < 0:
y_fit = y_fit[1:]
x_fit = x_fit[1:]
ax[row_index, col_index].plot(
x_fit, y_fit, color=c_aes[b], lw=3, zorder=100
)
ax[row_index, col_index].plot(x_fit, y_fit, color=c_aes[b], lw=3, zorder=100)
# # add a quadratic fits to plot
# if fit == True:
# if len(x_values_sample) > 2:
Expand Down Expand Up @@ -1126,9 +1097,7 @@ def make_seq_scatter(
zorder=100,
)

ax[row_index, column_index].set_title(
chem, fontsize=18, fontweight=600
)
ax[row_index, column_index].set_title(chem, fontsize=18, fontweight=600)

# add a quadratic fits to plot
if fit == True:
Expand All @@ -1149,9 +1118,7 @@ def make_seq_scatter(
if y_fit[0] < 0:
y_fit = y_fit[1:]
x_fit = x_fit[1:]
ax[row_index, column_index].plot(
x_fit, y_fit, color=c_aes[b], lw=3, zorder=100
)
ax[row_index, column_index].plot(x_fit, y_fit, color=c_aes[b], lw=3, zorder=100)
# # add a quadratic fits to plot
# if fit == True:
# if len(x_values_sample) > 2:
Expand Down Expand Up @@ -1476,9 +1443,7 @@ def make_loc_plot(
dark_mode=dark_mode,
)
else:
figs_axes = make_subplots(
chem_names, same_frame=same_frame, dark_mode=dark_mode
)
figs_axes = make_subplots(chem_names, same_frame=same_frame, dark_mode=dark_mode)

# if plot_type='loc', we should set our df to df_loc... this wasn't done earlier because of
# the y_max and y_min variables for y_fixed since df_loc has lists of entries
Expand Down Expand Up @@ -1548,9 +1513,7 @@ def make_loc_plot(
0,
0,
) # indices for which subplot to put a chemical in
while (
chem_index - 16 * sublist_index
) < ax_count: # and (chem_index < n_chems):
while (chem_index - 16 * sublist_index) < ax_count: # and (chem_index < n_chems):
# for loop to set up x and y values for each plot in a figure
x_values, y_values = [], []
for i, v in enumerate(df.iloc[chem_index, 1:]):
Expand Down Expand Up @@ -1581,9 +1544,7 @@ def make_loc_plot(
ha="right",
)
ax.set_xlim(0, tick_pos[-1] + 0.75)
ax.set_title(
chemical_names.iloc[chem_index], fontsize=18, fontweight=600
)
ax.set_title(chemical_names.iloc[chem_index], fontsize=18, fontweight=600)
i += 1 # iterate to next location for plotting this chemical

# now deal with 2 and 3 chemicals
Expand All @@ -1609,9 +1570,7 @@ def make_loc_plot(
ha="right",
)
ax[row_index].set_xlim(0, tick_pos[-1] + 0.75)
ax[row_index].set_title(
chemical_names.iloc[chem_index], fontsize=18, fontweight=600
)
ax[row_index].set_title(chemical_names.iloc[chem_index], fontsize=18, fontweight=600)
# set up the shared x-axis
if row_index == nrows - 1:
ax[row_index].set_xticks(
Expand Down Expand Up @@ -1660,9 +1619,7 @@ def make_loc_plot(
ha="right",
)
ax[row_index, col_index].set_xlim(0, tick_pos[-1] + 0.75)
ax[row_index, col_index].set_title(
chemical_names.iloc[chem_index], fontsize=18, fontweight=600
)
ax[row_index, col_index].set_title(chemical_names.iloc[chem_index], fontsize=18, fontweight=600)
i += 1 # iterate to next location for plotting this chemical

# set x_ticks -- need to make sure the axis is so labels go to the highest row of plots
Expand Down Expand Up @@ -1819,9 +1776,7 @@ def make_subplots(
# ensure each list within chem_names is the right length
for c_list in chem_names:
if len(c_list) > 16:
raise Exception(
"You have a list within chem_names whose length is longer than 16"
)
raise Exception("You have a list within chem_names whose length is longer than 16")

# rcParams must be set before any plt objects are created!
# Now set parameters that are needed for dark_mode=True
Expand Down Expand Up @@ -1983,20 +1938,15 @@ def make_subplots(
# pick the tick locations and labels
ax.set_yticks(
ticks=[10**x for x in np.linspace(y_min_pow, y_max_pow, n)],
labels=[
f"$10^{{{int(x)}}}$"
for x in np.linspace(y_min_pow, y_max_pow, n)
],
labels=[f"$10^{{{int(x)}}}$" for x in np.linspace(y_min_pow, y_max_pow, n)],
)
else:
# generate the subtitle for the plot to state which chemicals are being plotted
subtitle_f = subtitle_i + n_chems
subtitle = f"Chemicals {subtitle_i+1}-{subtitle_f}"
subtitle_i = subtitle_f
# get shape of axis object
axe = (
ax.ravel()
) # have to unpack gridspec object (from subplots() function)
axe = ax.ravel() # have to unpack gridspec object (from subplots() function)
gs = axe[0].get_gridspec()
shape = (gs.nrows, gs.ncols)

Expand All @@ -2020,10 +1970,7 @@ def make_subplots(
# pick the tick locations and labels
axe[j].set_yticks(
ticks=[10**x for x in np.linspace(y_min_pow, y_max_pow, n)],
labels=[
f"$10^{{{int(x)}}}$"
for x in np.linspace(y_min_pow, y_max_pow, n)
],
labels=[f"$10^{{{int(x)}}}$" for x in np.linspace(y_min_pow, y_max_pow, n)],
)

# a few more fig aesthetics, append our list of tupples to return, then iterate to next figure
Expand Down

0 comments on commit 763d41f

Please sign in to comment.