In [None]:
import pandas as pd

# Load the data
file_path = "input_proteomics.xlsx"
all_data = pd.read_excel(file_path, sheet_name="all", usecols="A:AJ")

# Filter out unassigned genera
filtered_data = all_data[all_data["Genus"] != "unassigned"]

# Group by genus and calculate sums
group_sums1 = filtered_data.groupby("Genus")[["spec_counts_1"]].sum()
group_sums2 = filtered_data.groupby("Genus")[["spec_counts_2"]].sum()

# Calculate relative abundances
group_sums1["Relative_Abundance1"] = (group_sums1 / group_sums1.sum()) * 100
group_sums2["Relative_Abundance2"] = (group_sums2 / group_sums2.sum()) * 100

# Combine the dataframes to calculate averages
combined = pd.merge(
    group_sums1[["Relative_Abundance1"]], 
    group_sums2[["Relative_Abundance2"]], 
    left_index=True, 
    right_index=True
)

# Calculate the average of the relative abundances
combined["Average_Relative_Abundance"] = combined.mean(axis=1)

# Sort by the average in descending order
sorted_combined = combined.sort_values(by="Average_Relative_Abundance", ascending=False)

# Export the result to a CSV file
output_path = "taxonomic_groups_abundance.csv"
sorted_combined.to_csv(output_path)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import ConnectionPatch
import numpy as np

# Load data from Excel
file_path = "input_proteomics.xlsx"
all_data = pd.read_excel(file_path, sheet_name="all", usecols="A:AJ")
selection_data = pd.read_excel(file_path, sheet_name="selection", usecols="A:B")

# Filter "Candidatus Accumulibacter" data
accumulibacter_data = all_data[all_data["Genus"] == "Candidatus Accumulibacter"].copy()

# Sum abundance for all proteins from Candidatus Accumulibacter
accumulibacter_data["Sum"] = accumulibacter_data[["spec_counts_1", "spec_counts_2"]].sum(axis=1)
total_abundance_accumulibacter = accumulibacter_data["Sum"].sum()

# Identify glycolytic enzymes using "index" in selection_data
glycolytic_data = pd.merge(accumulibacter_data, selection_data, on="index").copy()

# Sum abundance for glycolytic enzymes
glycolytic_data["Sum"] = glycolytic_data[["spec_counts_1", "spec_counts_2"]].sum(axis=1)
total_abundance_glycolytic = glycolytic_data["Sum"].sum()

# Calculate fractions for the first pie chart
glycolytic_fraction = total_abundance_glycolytic / total_abundance_accumulibacter
other_fraction = 1 - glycolytic_fraction

# Group glycolytic enzymes based on "rxnID"
enzyme_groups = {
    "only NTS": ["RbuK", "RibE", "RibI", "SBPase", "TKT", "RbuCO"],
    "Aldolases": ["ALD"],
    "only EMP": ["GAPDH", "PGK"],
}

group_data = []
group_labels = []
group_colors = ['#199c19', '#02f8fc', '#020afc', '#f7b259']

for group_name, rxn_ids in enzyme_groups.items():
    group_sum = glycolytic_data[glycolytic_data["rxnID"].isin(rxn_ids)]["Sum"].sum()
    group_data.append(group_sum)
    group_labels.append(group_name)

# Handle remaining enzymes (Others)
remaining_sum = total_abundance_glycolytic - sum(group_data)
group_data.append(remaining_sum)
group_labels.append("EMP and NTS")

# === Plotting ===
fig = plt.figure(figsize=(12, 5))

# --- Left Pie Chart: Total proteins ---
r = 0.7  #
# --- Left Pie Chart: Glycolytic vs. Other Proteins ---
ax1 = fig.add_subplot(1, 2, 1)
wedges1, texts1, autotexts1 = ax1.pie(
    [glycolytic_fraction, other_fraction],
    labels=["glycolytic enzymes", "other proteins"],
    colors=["#f51406", "#e6e3e3"], 
    autopct='%1.0f%%',
    radius=r,
    pctdistance=0.8
)
ax1.set_title("fraction of the proteome corresponding to glycolytic enzymes", fontsize=12)

# Set font size for labels and autopct text
for text in texts1:
    text.set_fontsize(14) #label font size (the category labels)
for autotext in autotexts1:
    autotext.set_fontsize(11) #percentage label font size

# --- Right Pie Chart: Glycolytic groups ---
ax2 = fig.add_subplot(1, 2, 2)
wedges2, texts2, autotexts2 = ax2.pie(
    group_data,
    labels=[""] * len(group_labels),  # Hide labels on pie
    colors=group_colors,
    autopct='%1.0f%%',
    pctdistance=0.8
)
ax2.set_title("distribution of the glycolytic enzymes", fontsize=12)

# Set font size for autopct texts
for autotext in autotexts2:
    autotext.set_fontsize(11)

# Add legend outside right pie chart
ax2.legend(wedges2, group_labels, title="", loc="center left", fontsize=12, bbox_to_anchor=(1, 0.5), frameon=False)

# === Connection lines from red slice to right pie ===
theta1, theta2 = wedges1[0].theta1, wedges1[0].theta2

# Start and end points on red slice
x1_start = r * np.cos(np.radians(theta1))
y1_start = r * np.sin(np.radians(theta1))
x1_end = r * np.cos(np.radians(theta2))
y1_end = r * np.sin(np.radians(theta2))

# Connect to left and right edge of right pie (adjust for clean lines)
con1 = ConnectionPatch(
    xyA=(x1_start, y1_start), coordsA=ax1.transData,
    xyB=(-1.0, -1.0), coordsB=ax2.transData,  # flip this
    color="#f51406", linestyle="--", linewidth=1.5
)
con2 = ConnectionPatch(
    xyA=(x1_end, y1_end), coordsA=ax1.transData,
    xyB=(-1.0, 1.0), coordsB=ax2.transData,  # flip this
    color="#f51406", linestyle="--", linewidth=1.5
)

fig.add_artist(con1)
fig.add_artist(con2)

plt.tight_layout()
plt.savefig("glycolytic_pie_charts_final.tiff", dpi=100)
plt.show()

In [None]:
import pandas as pd

# Load data from Excel file
file_path = "input_proteomics.xlsx"

# Load the "all" sheet
all_data = pd.read_excel(file_path, sheet_name="all", usecols="A:AJ")

# Load the "selection" sheet
selection_data = pd.read_excel(file_path, sheet_name="selection", usecols="A:B")

# Filter records for Genus "Candidatus Accumulibacter"
candidatus_data = all_data[all_data["Genus"] == "Candidatus Accumulibacter"]

# Merge candidatus_data with selection_data on the "index" column
merged_data = pd.merge(candidatus_data, selection_data, on="index", how="inner")

# Aggregate spec_counts_1 and spec_counts_2 by rxnID
aggregated_data = merged_data.groupby("rxnID")[["spec_counts_1", "spec_counts_2"]].sum().reset_index()

# Calculate the total spec_counts for Candidatus Accumulibacter
total_spec_counts1 = candidatus_data["spec_counts_1"].sum()
total_spec_counts2 = candidatus_data["spec_counts_2"].sum()

# Calculate ratio1 and ratio2
aggregated_data["ratio1"] = aggregated_data["spec_counts_1"] / total_spec_counts1
aggregated_data["ratio2"] = aggregated_data["spec_counts_2"] / total_spec_counts2

# Calculate average ratios and convert to Molar
conversion_factor = (2e6 / 1e-15) * (5.5e-3) * 526 * (1 / 6.022e23)

# Calculate average ratio and convert to molar concentration
aggregated_data["average_ratio"] = aggregated_data[["ratio1", "ratio2"]].mean(axis=1)
aggregated_data["Abundance"] = aggregated_data["average_ratio"] * conversion_factor

# Save results to CSV
output_data = aggregated_data[["rxnID", "Abundance"]].rename(columns={"rxnID": "Protein"})
output_path = "glycolytic_protein_abundance.csv"
output_data.to_csv(output_path, index=False)

aggregated_data2 = aggregated_data
aggregated_data2["Abundance"] = aggregated_data["Abundance"] * 1e6
output_data = aggregated_data2[["rxnID", "Abundance"]].rename(columns={"rxnID": "Protein", "Abundance": "Cytoplasmic concentration (µM)"})
output_path = "glycolytic_protein_micromolar_concentrations.csv"
output_data.to_csv(output_path, index=False)

In [None]:
# Normalize individual protein abundances to 100
total_abundance = aggregated_data["Abundance"].sum()

# Normalize each protein's abundance to a scale of 100
aggregated_data["Normalized Abundance"] = (aggregated_data["Abundance"] / total_abundance) * 100

# Save the normalized abundance values to a new CSV file
output_path = "normalized_protein_abundance_100_scale.csv"
normalized_output = aggregated_data[["rxnID", "Normalized Abundance"]].rename(columns={"rxnID": "Protein"})
normalized_output.to_csv(output_path, index=False)

print(f"CSV file '{output_path}' has been successfully generated!")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV file
file_path = "input_data_bioreactor.csv"
df = pd.read_csv(file_path)

# Extract time and metabolite names
time = df.iloc[:, 0]  # First column is time
metabolite_names = df.columns[1:]  # Skip time column

# Customize styles for each metabolite
styles = {
    "acetate":   {"marker": "^", "color": "#b1acb0", "size": 80},
    "phosphate": {"marker": "s", "color": "#fa0f08", "size": 80},
    "glycogen":  {"marker": "o", "color": "#2a8c0f", "size": 80},
    "PHB":       {"marker": "D", "color": "#0b5b9e", "size": 80},
    "PHV":       {"marker": "s", "color": "#e7e510", "size": 80},
}

# Create scatter plot
fig, ax = plt.subplots(figsize=(8, 6))

for metabolite in metabolite_names:
    style = styles.get(metabolite, {"marker": "o", "color": "#000000", "size": 60})
    ax.scatter(
        time,
        df[metabolite],
        label=metabolite,
        marker=style["marker"],
        color=style["color"],
        s=style["size"],
        edgecolors="black"
    )

# Axis labels with superscripts
ax.set_xlabel("time (min)", fontsize=16)
ax.set_ylabel("concentration (mC(P)mol/g$^{CDW}$)", fontsize=16)

# Customize tick labels
ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)

# Manual legend position (adjust as needed)
ax.legend(loc='upper right', bbox_to_anchor=(0.95, 0.8), fontsize=12, frameon=False)

# Optional: remove borders
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

plt.tight_layout()
# Save the figure
plt.savefig("anaerobic_phase_bioreactor.tiff", format='tiff', dpi=150)  # Control resolution with dpi parameter
plt.show()

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import linregress

# Regression data for five substrates
datasets = {
    'acetate'  : {'independent': np.array([0, 0.167, 0.333, 0.5]),
                  'dependent'  : np.array([0.70745, 0.36072, 0.08824, 0.0])},
    'phosphate': {'independent': np.array([-0.033, 0, 0.167, 0.333, 0.5, 1.0, 1.5, 2.0]),
                  'dependent'  : np.array([0.002, 0.283, 0.672, 1.046, 1.176, 1.275, 1.270, 1.305])},
    'glycogen' : {'independent': np.array([-0.033, 0, 0.167, 1.5]),
                  'dependent'  : np.array([1.169,  0.718, 0.582, 0.435])},
    'HB'       : {'independent': np.array([-0.033, 0.167, 0.333, 0.5]),
                  'dependent'  : np.array([0.034, 0.294, 0.456, 0.486])},
    'HV'       : {'independent': np.array([-0.033, 0.0, 0.167, 0.333, 0.5, 1.0]),
                  'dependent'  : np.array([0.013, 0.013, 0.027, 0.034, 0.040, 0.055])},
}

# Perform regressions
regression_results = []
for name, data in datasets.items():
    x, y = data['independent'], data['dependent']
    slope, intercept, r_val, p_val, slope_se = linregress(x, y)
    regression_results.append({
        'Dataset': name,
        'Slope':   slope,
        'StdErr':  slope_se
    })

results_df = pd.DataFrame(regression_results).set_index('Dataset')
print("Regression results:\n", results_df, "\n")

# Define which rates actually go into reconciliation:
measured = ['acetate', 'glycogen', 'HB', 'HV']

# Build the derived CO2 rate & its SD
m = results_df['Slope']
se = results_df['StdErr']

qCO2  = abs(m['acetate']*8 + m['glycogen']*6 - m['HB']*4 - m['HV']*5)

coeffs = {
    'acetate':  8,
    'glycogen': 6,
    'HB':      -4,
    'HV':      -5
}

variance_CO2 = sum((coeffs[name]**2) * (se[name]**2) for name in coeffs)
sdCO2 = np.sqrt(variance_CO2)

# Reconciliation
variables = measured + ['CO2']  # length = 5

# Carbon & electron balance rows
E = np.array([
    [ 2,  6,  4,  5,  1],   # C
    [ 8, 24, 18, 24,  0]    # e-
])

# Identity for each “measurement” equation (4 rates + CO2)
I5 = np.eye(len(variables))   # 5×5

# Full C matrix (7×5)
C = np.vstack((E, I5))

# Measurement vector b
q_meas = np.array([results_df.loc[r,'Slope'] for r in measured] + [qCO2])
b = np.concatenate((np.zeros(2), q_meas))

# Build S 
vars_balance = [1e-9, 1e-9]
vars_rates   = [results_df.loc[r,'StdErr']**2 for r in measured]
vars_CO2     = [sdCO2**2]
S = np.diag(vars_balance + vars_rates + vars_CO2)

# Weighted least squares
Sinv  = np.linalg.inv(S)
CTSIC = C.T @ Sinv @ C       
q_rec = np.linalg.inv(CTSIC) @ (C.T @ Sinv @ b)

# Propagate uncertainties
J      = np.linalg.inv(CTSIC) @ C.T @ Sinv  
S_beta = J @ S @ J.T                       
sd_rec = np.sqrt(np.diag(S_beta))

# Summarize & save
output = pd.DataFrame({
    'Reconciled_mean': q_rec,
    'Reconciled_SD':   sd_rec
}, index=variables)

print("Reconciled rates + SDs:\n", output)
output.to_csv('reconciled_reactor_rates.csv', float_format='%.6f')

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress
import csv
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import pandas as pd

# Read data from Excel file
excel_file = "input physiologic data during labeling.xlsx"
df = pd.read_excel(excel_file)

# Get data from specific cells and convert to numpy arrays explicitly
time_hours = np.array(df.iloc[0:15, 3].values)   
acetate = np.array(df.iloc[0:15, 4].values)      
phosphate = np.array(df.iloc[0:15, 5].values)    
biomass_concentration = float(df.iloc[0, 6])
time_minutes = time_hours * 60

# Defining the time window (in minutes)
min_time_minutes = 0    # Start
max_time_minutes = 20   # End

# Find closest time points to manually defined values
closest_min_idx = np.argmin(np.abs(time_minutes - min_time_minutes))
closest_max_idx = np.argmin(np.abs(time_minutes - max_time_minutes))

# Ensure we have at least 2 points for regression
if closest_max_idx - closest_min_idx < 1:
    closest_max_idx = closest_min_idx + 1
    print(f"Warning: Adjusted window to include at least 2 points")

# Create subset based on closest time points
time_subset = time_hours[closest_min_idx:closest_max_idx+1]
acetate_subset = acetate[closest_min_idx:closest_max_idx+1]
phosphate_subset = phosphate[closest_min_idx:closest_max_idx+1]

# Ensure we have numpy arrays (not pandas Series or single values)
time_subset = np.asarray(time_subset).reshape(-1)
acetate_subset = np.asarray(acetate_subset).reshape(-1)
phosphate_subset = np.asarray(phosphate_subset).reshape(-1)

print(f"Selected time window: {time_subset[0]*60:.2f} to {time_subset[-1]*60:.2f} minutes")
print(f"Number of points in subset: {len(time_subset)}")

# Robust linear regression that works with older scipy versions
def safe_linregress(x, y):
    """Wrapper for linregress that ensures proper array formatting"""
    x = np.asarray(x).reshape(-1)
    y = np.asarray(y).reshape(-1)
    if len(x) < 2:
        raise ValueError("Need at least 2 points for regression")
    return linregress(x, y)

# Linear regression for acetate consumption rate
slope_ac, _, _, _, _ = safe_linregress(time_subset, acetate_subset)
qAc = slope_ac / biomass_concentration  # Specific acetate consumption rate

# Linear regression for phosphate release rate
slope_pi, _, _, _, _ = safe_linregress(time_subset, phosphate_subset)
qPi = slope_pi / biomass_concentration  # Specific phosphate releasing rate

# Save results to a CSV file
output_filename = "specific_rates.csv"
with open(output_filename, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Variable", "Value"])
    writer.writerow(["qAc", qAc])
    writer.writerow(["qPi", qPi])

# Convert time to minutes
time_minutes = time_hours * 60

time_subset_minutes = time_subset * 60

# Plot settings
fig, ax = plt.subplots(figsize=(8, 6))
acetate_color = "#717070" 
phosphate_color = "#fa0f08" 
marker_size = 80 

ax.scatter(time_minutes, acetate, marker='^', s=marker_size, facecolors=acetate_color, edgecolors="black", label='acetate')
ax.scatter(time_minutes, phosphate, marker='s', s=marker_size, facecolors=phosphate_color, edgecolors="black", label='phosphate')

# Labels and limits
ax.set_xlabel("time (min)", fontsize=16)
ax.set_ylabel("concentration (mmol/L)", fontsize=16)

ax.tick_params(axis='x', labelsize=16)  
ax.tick_params(axis='y', labelsize=16) 

ax.set_xlim(0, 140)
ax.set_ylim(0, max(max(acetate), max(phosphate)) + 2)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

ax.legend(fontsize=12, frameon=False)

# Add textbox with rates
textstr = f"q$_{{Ac}}$ = {qAc:.2f} mmol/g$^{{CDW}}$/h\nq$_{{Pi}}$ = {qPi:.2f} mmol/g$^{{CDW}}$/h "
text_x, text_y = 85, 5  # Allow manual control
ax.text(text_x, text_y, textstr, fontsize=12, bbox=dict(facecolor='#FFFFFF', edgecolor='white', boxstyle='round,pad=0.5'))

#printing the rates
print('The specific rates during the labeling experiments were:')
print(f"q_Ac = {qAc:.2f} mmol/gCDW/h")
print(f"q_Pi = {qPi:.2f} mmol/gCDW/h")


# Add inset
inset_x, inset_y = "80%", "80%"
inset_x_pos, inset_y_pos = 0.65, 0.4  # control over position
ax_inset = inset_axes(ax, width=inset_x, height=inset_y, bbox_to_anchor=(inset_x_pos, inset_y_pos, 0.3, 0.3), bbox_transform=ax.transAxes)
ax_inset.scatter(time_subset_minutes, acetate_subset, marker='^', s=marker_size, edgecolors="black", facecolors=acetate_color)
ax_inset.scatter(time_subset_minutes, phosphate_subset, marker='s', s=marker_size, edgecolors="black", facecolors=phosphate_color)
ax_inset.set_xlim(min(time_subset_minutes), max(time_subset_minutes)+1)
ax_inset.set_ylim(min(min(acetate_subset), min(phosphate_subset)), max(max(acetate_subset), max(phosphate_subset))+1)
ax_inset.tick_params(axis='x', labelsize=12)
ax_inset.tick_params(axis='y', labelsize=12)

# Save figure
plt.savefig("Acetate_Phosphate_during_labeling.tiff", format='tiff', dpi=150)
plt.show()