# check environment

In [1]:
import subprocess

# Command to list conda environments
conda_env_command = ["conda", "env", "list"]

# Command to print current directory
pwd_command = ["pwd"]

# Execute the commands
try:
    # Run the command to list conda environments
    subprocess.run(conda_env_command, check=True)

    # Run the command to print current directory
    subprocess.run(pwd_command, check=True)
except subprocess.CalledProcessError as e:
    print("An error occurred while executing the command:", e)


# conda environments:
#
base                     /opt/conda
jupyterlab               /opt/conda/envs/jupyterlab
pytorch                  /opt/conda/envs/pytorch
tensorflow               /opt/conda/envs/tensorflow
tvnEnv0005_cobra      *  /opt/conda/envs/tvnEnv0005_cobra

/home/jupyter/google_cloud/fba/formatotroph


# import data from google sheet

In [1]:
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
import pandas as pd

# Configuration variables
SPREADSHEET_url = 'https://docs.google.com/spreadsheets/d/1LuksiakZgUk0-WDZ-GSwJFKvNZS7_Kv425EI-KMUfuI/edit?usp=sharing'
SERVICE_ACCOUNT_FILE = '/home/jupyter/google_cloud/fba/google_sheet/thuanguyen2-230332373cd1.json'
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']

# Extract the SPREADSHEET_ID from the URL
SPREADSHEET_ID = SPREADSHEET_url.split('/')[5]

def get_sheet_names(service, spreadsheet_id):
    sheet_metadata = service.spreadsheets().get(spreadsheetId=spreadsheet_id).execute()
    sheets = sheet_metadata.get('sheets', '')
    return [sheet.get("properties", {}).get("title", "") for sheet in sheets]

def read_sheet_to_dataframe(service, spreadsheet_id, sheet_name):
    range_name = f'{sheet_name}'
    result = service.spreadsheets().values().get(spreadsheetId=spreadsheet_id, range=range_name).execute()
    values = result.get('values', [])
    if values and len(values) > 1:
        return pd.DataFrame(values[1:], columns=values[0])
    else:
        print(f'No data found in sheet: {sheet_name}')
        return pd.DataFrame()

def main():
    creds = Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
    service = build('sheets', 'v4', credentials=creds)
    sheet_names = get_sheet_names(service, SPREADSHEET_ID)
    for sheet_name in sheet_names:
        df = read_sheet_to_dataframe(service, SPREADSHEET_ID, sheet_name)
        # Dynamically assign DataFrame to a variable named after the sheet
        globals()[sheet_name.replace(" ", "_")] = df
        print(f'Sheet "{sheet_name}" has been read into a DataFrame and assigned to a variable with a similar name.')

# Execute the script
if __name__ == '__main__':
    main()


Sheet "stoimax" has been read into a DataFrame and assigned to a variable with a similar name.
Sheet "metabolite" has been read into a DataFrame and assigned to a variable with a similar name.
Sheet "reaction" has been read into a DataFrame and assigned to a variable with a similar name.


# import metabolic model

In [6]:
import cobra

# The ID of the iJO1366 model in the BiGG database
model_id = "iJO1366"

try:
    # Attempt to load the model from the BiGG database
    model = cobra.io.load_model(model_id, cache=True)
    print(f"Model {model_id} successfully loaded.")
except Exception as e:
    print(f"An error occurred while loading the model: {e}")

# You can now work with the `model` object for your analyses


Model iJO1366 successfully loaded.


In [7]:
model

0,1
Name,iJO1366
Memory address,7f559a636510
Number of metabolites,1805
Number of reactions,2583
Number of genes,1367
Number of groups,37
Objective expression,1.0*BIOMASS_Ec_iJO1366_core_53p95M - 1.0*BIOMASS_Ec_iJO1366_core_53p95M_reverse_5c8b1
Compartments,"cytosol, extracellular space, periplasm"


# check metabolites in model

In [11]:
import pandas as pd
import cobra

# Get the list of metabolite IDs from the model for comparison
metabolite_ids_model = [met.id for met in model.metabolites]

# Create a new column in the metabolite_matrix DataFrame to store the status of each metabolite
metabolite['Status'] = metabolite['id'].apply(lambda x: 'Present' if x in metabolite_ids_model else 'Not Present')

# Specify the file path where you want to save the status list
file_path = '/home/jupyter/google_cloud/fba/formatotroph/check_metabolite/metabolite_status.csv'

# Save the DataFrame with metabolite statuses to a CSV file
metabolite.to_csv(file_path, index=False)

print(f"Metabolite status list has been saved to {file_path}")


Metabolite status list has been saved to /home/jupyter/google_cloud/fba/formatotroph/check_metabolite/metabolite_status.csv


In [13]:
metabolite

Unnamed: 0,id,name,Status
0,for_c,formate,Present
1,thf_c,"5,6,7,8-Tetrahydrofolate",Present
2,10fthf_c,10-Formyltetrahydrofolate,Present
3,methf_c,"5,10-Methenyltetrahydrofolate",Present
4,mlthf_c,"5,10-Methylenetetrahydrofolate",Present
5,gly_c,Glycine,Present
6,ser__L_c,Serine,Present
7,pyr_c,Pyruvate,Present
8,nadh_c,Nicotinamide adenine dinucleotide,Present
9,nad_c,Nicotinamide adenine dinucleotide - reduced,Present


# check reaction in model

In [20]:
import pandas as pd
import cobra

# Assuming 'model' and 'stoimax' are already loaded

def create_reaction_string(metabolites, coefficients):
    reactants = []
    products = []
    for met_id, coefficient in zip(metabolites, coefficients):
        if coefficient < 0:  # Reactant
            reactants.append(f"{-coefficient} {met_id}")
        elif coefficient > 0:  # Product
            products.append(f"{coefficient} {met_id}")
    # Assuming all reactions are reversible for simplicity
    return " + ".join(reactants) + " <--> " + " + ".join(products)

def check_reaction_in_model(reaction_str, model):
    for reaction in model.reactions:
        model_reaction_str = reaction.build_reaction_string(use_metabolite_names=False)
        # Check both the original and reverse reaction strings
        if model_reaction_str == reaction_str:
            return True
        # Additional logic for reversing the reaction string could be implemented here
    return False

# List of metabolite IDs from the first column
metabolite_ids = stoimax.iloc[:, 0].tolist()

# Initialize a list to store reaction presence information
reaction_presence = []

# Iterate over each reaction (column) in the stoimax DataFrame, excluding the first column
for reaction_id in stoimax.columns[1:]:
    coefficients = stoimax[reaction_id].apply(pd.to_numeric, errors='coerce').tolist()  # Convert coefficients to numeric
    reaction_str = create_reaction_string(metabolite_ids, coefficients)
    is_present = check_reaction_in_model(reaction_str, model)
    reaction_presence.append((reaction_id, reaction_str, is_present))

# Convert the list to a DataFrame
reaction_presence_df = pd.DataFrame(reaction_presence, columns=['Reaction ID', 'Reaction String', 'Is Present in Model'])

# Display or save the DataFrame
print(reaction_presence_df)
reaction_presence_df.to_csv('/home/jupyter/google_cloud/fba/formatotroph/check_reaction', index=False)


  Reaction ID                                    Reaction String  \
0   MeFtfL_ex  1 for_c + 1 thf_c + 1 atp_c <--> 1 10fthf_c + ...   
1    MeFch_ex        1 10fthf_c + 1 h_c <--> 1 methf_c + 1 h2o_c   
2   MeMdtA_ex    1 methf_c + 1 nadph_c <--> 1 mlthf_c + 1 nadp_c   
3    EcGcv_en  1 mlthf_c + 1 nadh_c + 1 co2_c + 1 nh4_c <--> ...   
4   EcGlyA_en  1 mlthf_c + 1 gly_c + 1 h2o_c <--> 1 thf_c + 1...   
5   MeSdaA_ex                  1 ser__L_c <--> 1 pyr_c + 1 nh4_c   
6    PdFdh_ex          1 for_c + 1 nad_c <--> 1 nadh_c + 1 co2_c   

   Is Present in Model  
0                False  
1                False  
2                False  
3                False  
4                False  
5                False  
6                False  


IsADirectoryError: [Errno 21] Is a directory: '/home/jupyter/google_cloud/fba/formatotroph/check_reaction'