In [None]:
from equilibrator_api import Q_ 
import equilibrator_custom_functions as eq 
import numpy as np 
import math
import matplotlib.pyplot as plt 

In [None]:
#Setting physiological conditions as a Python dictionary
cellular_conditions = {
            "p_h": str(Q_("7.4")),
            "p_mg": str(Q_(3)),
            "ionic_strength": str(Q_('250 mM')),
            "temperature": str(Q_(20+273.15,"K")),
            "kcat_source": "fwd",
}

#Generating Compound Settings with the default eQuilibrator bounds
cs_default_bounds = eq.obtain_compound_settings("input_metabolite_ranges_default", custom_bounds = True)
import warnings
warnings.filterwarnings('ignore')

In [None]:
import os
import warnings
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FormatStrFormatter

lower_qAc = 0.005
upper_qAc = 2
points = 20

# Constants
Volume_per_CDW = Q_(1.9, 'ul/mg').to('L/g')
q_Ac_values = np.linspace(lower_qAc, upper_qAc, points)  # Range of q_Ac values (mmol/gCDW/h)
crit_q_Ac_values = {}  # Store critical q_Ac values

# Load experimental protein data from file
df_counts = pd.read_csv('glycolytic_protein_abundance.csv')
df_counts['experimental_concentration'] = df_counts['Abundance']
df_counts['Reaction'] = df_counts['Protein']

warnings.filterwarnings("ignore", category=RuntimeWarning)

scatter_data = {}

# Loop over q_Ac values
for q_Ac in q_Ac_values:
    try:
        # Calculate absolute flux
        q_rate = Q_(q_Ac / 1000, 'mol/g/hour')
        flux_rate = q_rate / Volume_per_CDW
        absolute_flux = flux_rate.to('M/s')

        # Run ECM model
        model = eq.ECM_analysis("file_for_ECM", cs_default_bounds, cellular_conditions, absolute_flux)
        lnC_ECM = model.ecf.optimize_ecm()[1]
        enz_conc = dict(zip(model.reaction_ids, list(np.exp(model.ecf.ECF(lnC_ECM).value))))

        # Prepare predicted abundance DataFrame
        df_predicted = pd.DataFrame(enz_conc.items(), columns=['Protein', 'Concentration'])
        df_predicted['Protein'] = df_predicted['Protein'].replace({
            'TktA': 'TKT', 'TktB': 'TKT',
            'SBPald': 'ALD', 'FBPald': 'ALD'
        })
        df_predicted = df_predicted.groupby('Protein', as_index=False)['Concentration'].sum()

        # Merge experimental and predicted data
        comparison_df = pd.merge(df_counts, df_predicted, left_on='Reaction', right_on='Protein', how='inner')
        comparison_df['Difference'] = abs(comparison_df['experimental_concentration'] - comparison_df['Concentration'])

        # Track critical q_Ac for each protein (if approached)
        for _, row in comparison_df.iterrows():
            protein = row['Reaction']
            exp_abundance = row['experimental_concentration']
            pred_abundance = row['Concentration']

            if protein not in crit_q_Ac_values:
                crit_q_Ac_values[protein] = {}

            if 'counts' not in crit_q_Ac_values[protein] and pred_abundance >= exp_abundance:
                crit_q_Ac_values[protein]['counts'] = q_Ac

            
            if protein not in scatter_data:
                scatter_data[protein] = {'q_Ac': [], 'predicted': []}
            scatter_data[protein]['q_Ac'].append(q_Ac)
            scatter_data[protein]['predicted'].append(pred_abundance)

    except Exception as e:
        print(f"⚠️ Skipping q_Ac = {q_Ac} due to error: {e}")
        continue

non_empty_q_Ac = {k: v['counts'] for k, v in crit_q_Ac_values.items() if 'counts' in v}

# Set chosen_q_Ac to the minimum value found among the crit_q_Ac_values
if non_empty_q_Ac:
    chosen_q_Ac = min(non_empty_q_Ac.values())
    highlighted_reaction = [k for k, v in non_empty_q_Ac.items() if v == chosen_q_Ac][0]
else:
    raise ValueError("No valid 'counts' values found in crit_q_Ac_values.")

# Save critical q_Ac values
crit_q_Ac_df = pd.DataFrame.from_dict(crit_q_Ac_values, orient='index')
crit_q_Ac_df.to_csv('critical_q_Ac_values.csv')


In [None]:
min_count_reaction = crit_q_Ac_df['counts'].idxmin()

selected_reactions = [min_count_reaction, 'TKT', 'ALD', 'SBPase', 'RibE', 'RibI', 'RbuK', 'RbuCO']
fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(14, 6))
axes = axes.flatten()

import pandas as pd

df = pd.read_csv('reconciled_reactor_rates.csv', index_col=0)

# reading the reconciled mean for acetate
acetate_rec_mean = df.loc['acetate', 'Reconciled_mean']
acetate_rec_SD = df.loc['acetate', 'Reconciled_SD']
experimental_qAc = abs(acetate_rec_mean)
lower_limit = experimental_qAc - acetate_rec_SD
upper_limit = experimental_qAc + acetate_rec_SD

experimental_qAc_color = "#05a70c" 
theoretical_qAc_color = "#0408f8"  

for i, protein in enumerate(selected_reactions):
    if protein in scatter_data:
        data = scatter_data[protein]
        ax = axes[i]
        ax.scatter(data['q_Ac'], data['predicted'], label='protein concentration required to sustain $q_{Ac}$', color='black')
        exp_conc_counts = df_counts.loc[df_counts['Reaction'] == protein, 'experimental_concentration']
        if not exp_conc_counts.empty:
            ax.axhline(y=exp_conc_counts.iloc[0], color='red', linestyle='--', label='observed protein concentration')
        ax.axvline(x=chosen_q_Ac, color=theoretical_qAc_color, linestyle='--', label='supportable $q_{Ac}$')
        ax.axvspan(lower_limit, upper_limit, color=experimental_qAc_color, alpha=0.3, label='$q_{Ac}$ bioreactor')    
        ax.set_title(f'{protein}')
        ax.yaxis.set_major_formatter(FormatStrFormatter('%.1e'))
        ax.grid()

fig.text(0.5, 0.02, '$q_{Ac}$ (mmol/g$^{CDW}$/h)', ha='center', fontsize=14)
fig.text(0.02, 0.5, 'protein concentration (M)', va='center', rotation='vertical', fontsize=14)

handles, labels = axes[0].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', bbox_to_anchor=(0.5, -0.1), ncol=4, fontsize=12)

plt.subplots_adjust(bottom=0.2)  # Increase bottom margin
plt.tight_layout(rect=[0.03, 0.1, 1, 1])  # Adjust bottom margin further

plt.savefig('selected_reactions_plots.tiff', dpi=150, bbox_inches='tight')
plt.show()
plt.close()

In [None]:
all_proteins = list(scatter_data.keys())
num_proteins = len(all_proteins)
cols = 4
rows = -(-num_proteins // cols)  

fig, axes = plt.subplots(nrows=rows, ncols=cols, figsize=(14, 3 * rows))
axes = axes.flatten()

for i, protein in enumerate(all_proteins):
    data = scatter_data[protein]
    ax = axes[i]
    ax.scatter(data['q_Ac'], data['predicted'], label='protein concentration required to sustain $q_{Ac}$', color='black')

    
    exp_conc_counts = df_counts.loc[df_counts['Reaction'] == protein, 'experimental_concentration']
    if not exp_conc_counts.empty:
        ax.axhline(y=exp_conc_counts.iloc[0], color='red', linestyle='--', label='observed protein concentration')

    ax.axvline(x=chosen_q_Ac, color=theoretical_qAc_color, linestyle='--', label='supportable $q_{Ac}$')
    ax.axvspan(lower_limit, upper_limit, color=experimental_qAc_color, alpha=0.3, label='$q_{Ac}$ bioreactor')

    ax.set_title(f'{protein}')
    ax.yaxis.set_major_formatter(FormatStrFormatter('%.1e'))
    ax.grid()

# Hide unused axes
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

fig.text(0.5, 0.02, '$q_{Ac}$ (mmol/g$^{CDW}$/h)', ha='center', fontsize=14)
fig.text(0.02, 0.5, 'protein concentration (M)', va='center', rotation='vertical', fontsize=14)

handles, labels = axes[0].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', bbox_to_anchor=(0.5, -0.02), ncol=4, fontsize=12)

plt.subplots_adjust(bottom=0.1)
plt.tight_layout(rect=[0.03, 0.03, 1, 1])

plt.savefig('all_proteins_plots.tiff', dpi=150, bbox_inches='tight')
plt.show()
plt.close()


In [None]:
import os
import warnings
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FormatStrFormatter

# Constants
Volume_per_CDW = Q_(1.9, 'ul/mg').to('L/g')

# User-defined qAc value (mmol/gCDW/h)
user_qAc =  experimental_qAc 

# Convert qAc to absolute flux
q_rate = Q_(user_qAc / 1000, 'mol/g/hour')
flux_rate = q_rate / Volume_per_CDW
absolute_flux = flux_rate.to('M/s')

df_counts = pd.read_csv('glycolytic_protein_abundance.csv')
df_counts['experimental_concentration'] = df_counts['Abundance']
df_counts['Reaction'] = df_counts['Protein']

warnings.filterwarnings("ignore", category=RuntimeWarning)

# Run ECM model for the single qAc
model = eq.ECM_analysis("file_for_ECM", cs_default_bounds, cellular_conditions, absolute_flux)
lnC_ECM = model.ecf.optimize_ecm()[1]
enz_conc = dict(zip(model.reaction_ids, list(np.exp(model.ecf.ECF(lnC_ECM).value))))

df_predicted = pd.DataFrame(enz_conc.items(), columns=['Protein', 'Concentration'])
df_predicted['Protein'] = df_predicted['Protein'].replace({
    'TktA': 'TKT', 'TktB': 'TKT',
    'SBPald': 'ALD', 'FBPald': 'ALD'
})
df_predicted = df_predicted.groupby('Protein', as_index=False)['Concentration'].sum()

comparison_df = pd.merge(df_counts, df_predicted, left_on='Reaction', right_on='Protein', how='inner')
comparison_df['Difference'] = abs(comparison_df['experimental_concentration'] - comparison_df['Concentration'])

scatter_data = {}
for _, row in comparison_df.iterrows():
    protein = row['Reaction']
    scatter_data[protein] = {
        'experimental': row['experimental_concentration'],
        'predicted': row['Concentration']
    }

# Save results
comparison_df.to_csv('single_qAc_comparison.csv', index=False)


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import pearsonr

proteins = []
exp_counts = []
predicted_values = []

for protein, data in scatter_data.items():
    proteins.append(protein)
    exp_counts.append(data['experimental'] * 1e6)  # Convert to µM
    predicted_values.append(data['predicted'] * 1e6)  # Convert to µM

# Sort proteins by experimental abundance in descending order
sorted_indices = np.argsort(exp_counts)[::-1]
proteins = [proteins[i] for i in sorted_indices]
exp_counts = [exp_counts[i] for i in sorted_indices]
predicted_values = [predicted_values[i] for i in sorted_indices]

# Calculate correlation coefficients
r, p_pearson = pearsonr(exp_counts, predicted_values)

# Print correlation statistics
print(f"Pearson correlation coefficient: {r:.2f}, p-value: {p_pearson:.2e}")


from scipy.stats import linregress

# Linear regression (in log-log space, to match the axis scaling)
log_exp = np.log10(exp_counts)
log_pred = np.log10(predicted_values)
slope, intercept, r_value, p_value, std_err = linregress(log_pred, log_exp)

# Create regression line points
x_vals = np.linspace(min(log_pred), max(log_pred), 100)
y_vals = slope * x_vals + intercept

# Scatter plot
fig, ax = plt.subplots(figsize=(8, 6)) 

# Define proteins whose labels should appear in a different direction
reverse_label_proteins = {"PGK", "PGM"} 

# Plot data points and labels
labels = []
for i, (x, y, protein) in enumerate(zip(predicted_values, exp_counts, proteins), start=1):
    if protein in reverse_label_proteins:
        ha = 'right'
        va = 'bottom'
    else:
        ha = 'left'
        va = 'top'
    
    ax.text(x, y, f'{i}', fontsize=10, color='black', ha=ha, va=va)
    ax.scatter(x, y, color='#05a70c', alpha=0.7)
    labels.append(f"{i}: {protein}")

# Add a diagonal line for ideally perfect correlation
max_val = max(max(exp_counts), max(predicted_values))
min_val = min(min(exp_counts), min(predicted_values))
ax.plot([min_val, max_val], [min_val, max_val], color='#b6b6b4', linestyle='--')


textbox_content = f"r$^{{Pearson}}$ = {r:.2f}, p = {p_pearson:.2e}"
ax.text(
    0.5, 0.15, textbox_content, transform=ax.transAxes, fontsize=12,
    verticalalignment='top', bbox=dict(facecolor='white', alpha=0.8, edgecolor='white')
)

legend_text = "\n".join(labels)
fig.text(0.85, 0.5, legend_text, fontsize=10, va='center', ha='left', bbox=dict(facecolor='white', alpha=0.8, edgecolor='black'))

# Customize plot
ax.set_ylabel('observed protein concentration (µM)', fontsize=16)
ax.set_xlabel('required protein concentration (µM)', fontsize=16)
ax.set_title('')
ax.set_xscale('log')
ax.set_yscale('log')
ax.tick_params(axis='both', labelsize=16)

plt.tight_layout(rect=[0, 0, 0.85, 1]) 

# Save the scatter plot
plt.savefig('scatter_plot_correlation_experimental_qAc.tiff', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# over-capacity

import numpy as np
import matplotlib.pyplot as plt

exp_arr  = np.array(exp_counts)
pred_arr = np.array(predicted_values)

delta = np.log10(exp_arr) - np.log10(pred_arr)

# Sort descending
idx_sorted      = np.argsort(delta)[::-1]
proteins_sorted = [proteins[i] for i in idx_sorted]
delta_sorted    = delta[idx_sorted]

# Plot
fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(range(len(delta_sorted)), delta_sorted, alpha=0.8)

ax.set_xticks(range(len(proteins_sorted)))
ax.set_xticklabels(proteins_sorted, rotation=90, fontsize=10)

ax.set_ylabel(
    r'$\log_{10}\!\left(\frac{\mathrm{obs}}{\mathrm{req}}\right)$',
    fontsize=14
)

ax.axhline(0, color='gray', linestyle='--', linewidth=1)

ax.set_title('Enzyme Over-capacity (log₁₀ fold-change)', fontsize=16)
plt.tight_layout()
plt.show()


In [None]:
# over-capacity in µM

import numpy as np
import matplotlib.pyplot as plt

exp_arr  = np.array(exp_counts)        # observed in µM
pred_arr = np.array(predicted_values)  # required in µM

# Compute absolute over-capacity: observed minus required (µM)
abs_over = exp_arr - pred_arr

# Sort by descending over-capacity
idx_sorted      = np.argsort(abs_over)[::-1]
proteins_sorted = [proteins[i] for i in idx_sorted]
abs_over_sorted = abs_over[idx_sorted]

fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.bar(range(len(abs_over_sorted)), abs_over_sorted, alpha=0.8)

ax.set_xticks(range(len(proteins_sorted)))
ax.set_xticklabels(proteins_sorted, rotation=90, fontsize=10)

ax.set_ylabel('Observed – Required Enzyme Concentration (µM)', fontsize=14)
ax.set_title('Absolute Enzyme Over-capacity (µM)', fontsize=16)

ax.axhline(0, color='gray', linestyle='--', linewidth=1)

plt.tight_layout()
#plt.savefig('enzyme_absolute_overcapacity_barplot.tiff', dpi=150)
plt.show()
