# Creating metabolic presence/absence matrices from AGORA2 .json files
10-29-25 Casey Martin + Laurie Lyon

In [19]:
import json
from pathlib import Path

In [38]:
# Path to directory with unzipped agora2 models
MODEL_DIR = "../data/models/agora201_refseq216_genus_1/a4ce3649-c8b8-4893-8bd2-4886aa1f40e8/data/"
# Specific .json model to test getting annotations
TEST_FILE = "Bacteroides.json"

In [39]:
with open(MODEL_DIR + TEST_FILE) as f:
    data = json.load(f)

In [40]:
# COUNT ALL ANNOTATION TYPES FOR EACH REACTION
def get_annotation_types(gemm_json):
    annotation_counts = {}
    for reaction in gemm_json["reactions"]:
        #guard against reactions with no annotations
        if "annotation" not in reaction:
            continue    
        # count number of annotations within each annotation type found
        for annotation in reaction["annotation"].keys():
            annotation_counts[annotation] = annotation_counts.get(annotation, 0) + 1
    return annotation_counts

annotation_counts = get_annotation_types(data)

print(f"# of reactions for {TEST_FILE}:", len(data["reactions"]))
annotation_counts

# of reactions for Bacteroides.json: 4477


{'ec-code': 1414,
 'metanetx.reaction': 1430,
 'sbo': 4473,
 'seed.reaction': 2116,
 'kegg.reaction': 291,
 'rhea': 218}

Different types of reaction annotations (from most to least observed): 
 - sbo (Systems Biology Ontology)
 - seed.reaction
 - metanetx.reaction
 - ec-code (Enzype Commission number system)
 - kegg.reaction
 - rhea

In [None]:
# Create edges for a given reaction
# Edges are created between all pairs of reactants and products
# This is a convenient lie.
# HYPURRRGRAF

def create_reaction_edge_list(reaction):
    reactants = []
    products = []
    edges = []
    for key, value in reaction["metabolites"].items():
        if value < 0:
            reactants.append(key)
        else:
            products.append(key)

    for reactant in reactants:
        for product in products:
            edges.append((reactant, product, reaction["id"]))
    return edges

# Do this for all reactions in a GEMM JSON

def create_model_edge_list(gemm_json):
    model_edges = []
    for reaction in gemm_json["reactions"]:
        reaction_edges = create_reaction_edge_list(reaction)
        model_edges.extend(reaction_edges)
    return model_edges

edge_list = create_model_edge_list(data)


In [43]:
len(edge_list)

26543

### Need to figure out how to put edge lists into FactorX to create networks 
### Want to be able to create networks between 2 genera (need to find which OTUs from TLC map to which AGORA genus GEMs)
### Downstream of FactorX --> Calculate SCCs, calculate competitive and cooperative indices