In [None]:
pip install cobra[array]

In [1]:
import pandas as pd
import cobra
from cobra import flux_analysis
from cobra.flux_analysis import flux_variability_analysis
from itertools import chain

# Setting uptake rates and desired objective function

In [3]:
# Load your model
model = cobra.io.read_sbml_model("C:/Users/Saathvik/Documents/MATLAB/Project_Liver/data/GSMMs/Liver_FASTCORE.xml")  # Replace with your actual model file

# Iterate through all exchange reactions in the model
for reaction in model.exchanges:
    # Check if the reaction contains a metabolite with carbon and is not CO2 or H2CO3
    for metabolite in reaction.metabolites:
        if (
            metabolite.formula is not None and  # Ensure the formula exists
            "C" in metabolite.formula and  # Check if it contains carbon
            reaction.id not in ["EX_co2_e", "EX_h2co3_e"]  # Exclude CO2 and H2CO3 exchanges
        ):
            reaction.lower_bound = -10
            break  # Exit the loop after modifying the reaction

# Set O2 exchange reaction lower bound to -10 explicitly
model.reactions.get_by_id("EX_o2_e").lower_bound = -10
model.objective = "BIOMASS_reaction"

# Save the modified model
cobra.io.sbml.write_sbml_model(model,"C:/Users/Saathvik/Documents/MATLAB/Project_Liver/data/GSMMs/Liver_FASTCORE_fva.xml")  # Save the updated model


# Gene deletion to mimic drug activity, followed by FVA

In [None]:
model=cobra.io.read_sbml_model("C:/Users/Saathvik/Documents/MATLAB/Project_Liver/data/GSMMs/Liver_FASTCORE_fva.xml")
data=pd.read_csv("C:/Users/Saathvik/Documents/MATLAB/Project_Liver/data/DrugBank/drug_gene_targets_meta_1.csv")
a=None
for i in range(0,len(data['target_gene_name'])):
    targets=data['target_gene_name'][i].split(',')
    temp_model=model
    print(targets)
    for j in range(0,len(targets)):
        target_gene=model.genes.get_by_id(targets[j].lstrip())
        controlled_reactions = [rxn for rxn in model.reactions 
                       if target_gene in rxn.genes]
        
        for reaction in controlled_reactions:
            
            reaction.lower_bound = 0  # Set new lower bound
            reaction.upper_bound = 0  # Set new upper bound


        '''gene_function = getattr(model.genes, targets[j].lstrip())
        result=gene_function
        from cobra.manipulation import knock_out_model_genes


        affected_reactions = knock_out_model_genes(model, ["your_gene_id"])
'''
        #print(result)
    fva_result=flux_variability_analysis(temp_model,processes=8)
    # Assuming your DataFrame is called 'df'
    path=f'C:/Users/Saathvik/Documents/MATLAB/Project_Liver/data/FVA_Bounds/fva_bounds_{data['dg_id'][i]}.csv'
    fva_result.to_csv(path, index=False)


In [2]:


model=cobra.io.read_sbml_model("C:/Users/Saathvik/Documents/MATLAB/Project_Liver/data/GSMMs/Liver_FASTCORE_fva.xml")

# 1. Save ALL reactions
all_reactions = [(rxn.id, rxn.name) for rxn in model.reactions]
pd.DataFrame(all_reactions).to_csv("C:/Users/Saathvik/Documents/MATLAB/Project_Liver/data/GSMMs/all_reactions.csv", index=False, header=False)

# 2. Save EXCHANGE reactions
exchange_reactions = [(rxn.id, rxn.name) for rxn in model.exchanges]
pd.DataFrame(exchange_reactions).to_csv("C:/Users/Saathvik/Documents/MATLAB/Project_Liver/data/GSMMs/exchange_reactions.csv", index=False, header=False)


# Normalization

## Normalization technique 1

across individual features

In [5]:
import os
import pandas as pd
import numpy as np

# Configuration
input_folder = 'C:/Users/Saathvik/Documents/MATLAB/Project_Liver/data/FVA_Bounds/'  # Update this path
output_folder = 'C:/Users/Saathvik/Documents/MATLAB/Project_Liver/data/FVA_Bounds_norm1/'

csv_files = [os.path.join(input_folder, f) 
            for f in os.listdir(input_folder) 
            if f.endswith('.csv')]
dtt=pd.read_csv(csv_files[0])
min_df=pd.DataFrame(np.zeros((len(dtt['minimum']),len(csv_files))))
max_df=pd.DataFrame(np.zeros((len(dtt['maximum']),len(csv_files))))

for i in range(0,len(csv_files)):
  temp_df=pd.read_csv(csv_files[i])
  min_df[i]=temp_df['minimum']
  max_df[i]=temp_df['maximum']

means_df = pd.DataFrame(0, index=dtt.index, columns=dtt.columns)
std_df= pd.DataFrame(0, index=dtt.index, columns=dtt.columns)

for i in range(0,len(dtt['minimum'])):
  means_df['minimum'][i]=np.mean(min_df.iloc[i])
  std_df['minimum'][i]=np.std(min_df.iloc[i])
  means_df['maximum'][i]=np.mean(max_df.iloc[i])
  std_df['maximum'][i]=np.std(max_df.iloc[i])




for i in csv_files:
    clean_df=pd.DataFrame(0, index=dtt.index, columns=dtt.columns)
    old_df=pd.read_csv(i)
    temp_df=old_df.sub(means_df)
    clean_df=temp_df.div(std_df)
    clean_df.fillna(0, inplace=True)
    clean_df.to_csv(os.path.join(output_folder,os.path.basename(i)),index=False, header=False)




You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  means_df['minimum'][i]=np.mean(min_df.iloc[i])
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataF

## Normalization technique 2

across drugs

In [6]:
import os
import pandas as pd
import numpy as np

# Configuration
input_folder = 'C:/Users/Saathvik/Documents/MATLAB/Project_Liver/data/FVA_Bounds/'  # Update this path
output_folder = 'C:/Users/Saathvik/Documents/MATLAB/Project_Liver/data/FVA_Bounds_norm2/'


# Get CSV files list
csv_files = [os.path.join(input_folder, f) 
            for f in os.listdir(input_folder) 
            if f.endswith('.csv')]

# Phase 1: Data Collection and Statistics Calculation
all_lower = []
all_upper = []

print("Starting data collection phase...")
for file in csv_files:
    try:
        # Read CSV with numeric conversion
        df = pd.read_csv(file, header=None, dtype=str)
        
        # Convert columns to numeric
        df[0] = pd.to_numeric(df[0], errors='coerce')  # Lower bound
        df[1] = pd.to_numeric(df[1], errors='coerce')  # Upper bound
        
        # Clean data
        clean_df = df.dropna()
        
        if not clean_df.empty:
            all_lower.extend(clean_df[0].tolist())
            all_upper.extend(clean_df[1].tolist())
            print(f"Collected data from {os.path.basename(file)}: {len(clean_df)} valid rows")
        else:
            print(f"Warning: {os.path.basename(file)} contained no valid numeric data")
            
    except Exception as e:
        print(f"Error reading {os.path.basename(file)}: {str(e)}")

# Handle empty data scenarios
if not all_lower or not all_upper:
    raise ValueError("Insufficient valid data for calculation - check input files")

# Calculate statistics
stats = {
    'lower': {
        'mean': np.mean(all_lower),
        'std': np.std(all_lower) if np.std(all_lower) != 0 else 1.0
    },
    'upper': {
        'mean': np.mean(all_upper),
        'std': np.std(all_upper) if np.std(all_upper) != 0 else 1.0
    }
}

print("\nGlobal Statistics:")
print(f"Lower Bounds - Mean: {stats['lower']['mean']:.4f}, Std: {stats['lower']['std']:.4f}")
print(f"Upper Bounds - Mean: {stats['upper']['mean']:.4f}, Std: {stats['upper']['std']:.4f}\n")

# Phase 2: Data Processing
print("Starting standardization phase...")
processed_files = 0
for file in csv_files:
    try:
        # Read and clean data
        df = pd.read_csv(file, header=None, dtype=str)
        df[0] = pd.to_numeric(df[0], errors='coerce')
        df[1] = pd.to_numeric(df[1], errors='coerce')
        clean_df = df.dropna()
        
        if clean_df.empty:
            print(f"Skipping {os.path.basename(file)}: No valid data after cleaning")
            continue
            
        # Standardization with safe division
        clean_df[0] = (clean_df[0] - stats['lower']['mean']) / stats['lower']['std']
        clean_df[1] = (clean_df[1] - stats['upper']['mean']) / stats['upper']['std']
        
        # Save results
        output_path = os.path.join(output_folder, os.path.basename(file))
        clean_df.to_csv(output_path, index=False, header=False)
        processed_files += 1
        print(f"Successfully processed {os.path.basename(file)}")
        
    except Exception as e:
        print(f"Failed to process {os.path.basename(file)}: {str(e)}")

# Final report
print("\nProcessing Complete")
print(f"Total files attempted: {len(csv_files)}")
print(f"Successfully processed: {processed_files}")
print(f"Failed files: {len(csv_files) - processed_files}")
print(f"Output folder: {output_folder}")



Starting data collection phase...
Collected data from fva_bounds_DB00175.csv: 2366 valid rows
Collected data from fva_bounds_DB00182.csv: 2366 valid rows
Collected data from fva_bounds_DB00184.csv: 2366 valid rows
Collected data from fva_bounds_DB00197.csv: 2366 valid rows
Collected data from fva_bounds_DB00198.csv: 2366 valid rows
Collected data from fva_bounds_DB00201.csv: 2366 valid rows
Collected data from fva_bounds_DB00205.csv: 2366 valid rows
Collected data from fva_bounds_DB00210.csv: 2366 valid rows
Collected data from fva_bounds_DB00227.csv: 2366 valid rows
Collected data from fva_bounds_DB00233.csv: 2366 valid rows
Collected data from fva_bounds_DB00242.csv: 2366 valid rows
Collected data from fva_bounds_DB00244.csv: 2366 valid rows
Collected data from fva_bounds_DB00277.csv: 2366 valid rows
Collected data from fva_bounds_DB00313.csv: 2366 valid rows
Collected data from fva_bounds_DB00316.csv: 2366 valid rows
Collected data from fva_bounds_DB00323.csv: 2366 valid rows
Collec

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df[0] = (clean_df[0] - stats['lower']['mean']) / stats['lower']['std']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df[1] = (clean_df[1] - stats['upper']['mean']) / stats['upper']['std']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df[0] = (clean_df[0] - stats['lower']['mean']

Successfully processed fva_bounds_DB00439.csv
Successfully processed fva_bounds_DB00461.csv
Successfully processed fva_bounds_DB00465.csv
Successfully processed fva_bounds_DB00482.csv
Successfully processed fva_bounds_DB00494.csv
Successfully processed fva_bounds_DB00533.csv
Successfully processed fva_bounds_DB00548.csv
Successfully processed fva_bounds_DB00550.csv
Successfully processed fva_bounds_DB00563.csv
Successfully processed fva_bounds_DB00573.csv
Successfully processed fva_bounds_DB00575.csv
Successfully processed fva_bounds_DB00580.csv
Successfully processed fva_bounds_DB00630.csv
Successfully processed fva_bounds_DB00631.csv
Successfully processed fva_bounds_DB00641.csv
Successfully processed fva_bounds_DB00651.csv
Successfully processed fva_bounds_DB00710.csv
Successfully processed fva_bounds_DB00712.csv
Successfully processed fva_bounds_DB00721.csv
Successfully processed fva_bounds_DB00735.csv
Successfully processed fva_bounds_DB00749.csv
Successfully processed fva_bounds_

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df[0] = (clean_df[0] - stats['lower']['mean']) / stats['lower']['std']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df[1] = (clean_df[1] - stats['upper']['mean']) / stats['upper']['std']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df[0] = (clean_df[0] - stats['lower']['mean']

Successfully processed fva_bounds_DB00780.csv
Successfully processed fva_bounds_DB00784.csv
Successfully processed fva_bounds_DB00788.csv
Successfully processed fva_bounds_DB00806.csv
Successfully processed fva_bounds_DB00822.csv
Successfully processed fva_bounds_DB00861.csv
Successfully processed fva_bounds_DB00870.csv
Successfully processed fva_bounds_DB00884.csv
Successfully processed fva_bounds_DB00909.csv
Successfully processed fva_bounds_DB00920.csv
Successfully processed fva_bounds_DB00924.csv
Successfully processed fva_bounds_DB00936.csv
Successfully processed fva_bounds_DB00945.csv
Successfully processed fva_bounds_DB00963.csv
Successfully processed fva_bounds_DB00973.csv
Successfully processed fva_bounds_DB00991.csv
Successfully processed fva_bounds_DB00993.csv
Successfully processed fva_bounds_DB01005.csv
Successfully processed fva_bounds_DB01009.csv
Successfully processed fva_bounds_DB01011.csv
Successfully processed fva_bounds_DB01016.csv
Successfully processed fva_bounds_

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df[0] = (clean_df[0] - stats['lower']['mean']) / stats['lower']['std']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df[1] = (clean_df[1] - stats['upper']['mean']) / stats['upper']['std']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df[0] = (clean_df[0] - stats['lower']['mean']

Successfully processed fva_bounds_DB01073.csv
Successfully processed fva_bounds_DB01075.csv
Successfully processed fva_bounds_DB01080.csv
Successfully processed fva_bounds_DB01083.csv
Successfully processed fva_bounds_DB01091.csv
Successfully processed fva_bounds_DB01098.csv
Successfully processed fva_bounds_DB01110.csv
Successfully processed fva_bounds_DB01132.csv
Successfully processed fva_bounds_DB01136.csv
Successfully processed fva_bounds_DB01151.csv
Successfully processed fva_bounds_DB01168.csv
Successfully processed fva_bounds_DB01213.csv
Successfully processed fva_bounds_DB01216.csv
Successfully processed fva_bounds_DB01221.csv
Successfully processed fva_bounds_DB01247.csv
Successfully processed fva_bounds_DB01275.csv
Successfully processed fva_bounds_DB01280.csv
Successfully processed fva_bounds_DB01399.csv
Successfully processed fva_bounds_DB01577.csv
Successfully processed fva_bounds_DB04854.csv
Successfully processed fva_bounds_DB06736.csv
Successfully processed fva_bounds_

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df[0] = (clean_df[0] - stats['lower']['mean']) / stats['lower']['std']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df[1] = (clean_df[1] - stats['upper']['mean']) / stats['upper']['std']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df[0] = (clean_df[0] - stats['lower']['mean']

Successfully processed fva_bounds_DB09096.csv
Successfully processed fva_bounds_DB09526.csv
Successfully processed fva_bounds_DB13867.csv
Successfully processed fva_bounds_DB14511.csv

Processing Complete
Total files attempted: 97
Successfully processed: 97
Failed files: 0
Output folder: C:/Users/Saathvik/Documents/MATLAB/Project_Liver/data/FVA_Bounds_norm2/


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df[0] = (clean_df[0] - stats['lower']['mean']) / stats['lower']['std']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df[1] = (clean_df[1] - stats['upper']['mean']) / stats['upper']['std']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df[0] = (clean_df[0] - stats['lower']['mean']

## Normalization Technique 3

using scikit-learn scaler only on the training and test subsets, just before training 

In [7]:
import os
import pandas as pd
import numpy as np

# Configuration
input_folder = 'C:/Users/Saathvik/Documents/MATLAB/Project_Liver/data/FVA_Bounds/'  # Update this path
output_folder = 'C:/Users/Saathvik/Documents/MATLAB/Project_Liver/data/FVA_Bounds_norm3/'

csv_files = [os.path.join(input_folder, f) 
            for f in os.listdir(input_folder) 
            if f.endswith('.csv')]
for i in csv_files:
    old_df=pd.read_csv(i)
    old_df.to_csv(os.path.join(output_folder,os.path.basename(i)),index=False, header=False)