**Bacteria network analysis**

In [8]:
import numpy as np
import matplotlib.pyplot as pl
import pandas as pd
import networkx as nx # import the library
import plotly.graph_objects as go
import community as community_louvain

from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Line3DCollection
import matplotlib.pyplot as plt

In [12]:
import os
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import networkx as nx
#import community.community_louvain as community_louvain
import pandas as pd
import os 
import matplotlib.pyplot as plt 
import networkx as nx 
import community.community_louvain as community_louvain

In [13]:
#!/usr/bin/env python3
"""Build microbe–metabolite–pathway networks for bacteria at each time point.

This script:
- builds a NetworkX graph for each enrich_group timepoint
- colors nodes by type (microorganism / metabolite / pathway)
- colors edges by sign of correlation (red = positive, blue = negative, gray = zero)
- saves PNG and PDF images of each network
- computes degree centrality hubs and saves them to CSV
- runs Louvain community detection and saves community assignments to CSV

It assumes that the following pandas DataFrames are already loaded:
- bact_in_time  (with columns: 'Microorganism', 'enrich_group', ...)
- mo_metab_b    (with columns: 'Microorganism', 'Metabolite', 'Correlation', ...)
- metab_path    (with columns: 'Metabolite', 'Pathway', 'Correlation_y', ...)
- path_mo_b     (with columns: 'Microorganism', 'Pathway', 'Correlation', ...)
"""

import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import networkx as nx
import community.community_louvain as community_louvain


def build_bacteria_networks(bact_in_time, mo_metab_b, metab_path, path_mo_b,
                            output_dir="networks_bacteria"):
    # Create output directories
    os.makedirs(output_dir, exist_ok=True)
    output_hubs_dir = os.path.join(output_dir, "hubs")
    os.makedirs(output_hubs_dir, exist_ok=True)
    output_communities_dir = os.path.join(output_dir, "communities")
    os.makedirs(output_communities_dir, exist_ok=True)

    # Time points
    timepoints = bact_in_time["enrich_group"].unique()

    # Sets for fast membership tests
    all_microbes = set(bact_in_time["Microorganism"].unique())
    all_metabolites = set(metab_path["Metabolite"].unique())
    all_pathways = set(metab_path["Pathway"].unique())

    for t in timepoints:
        print(f"\nProcessing timepoint: {t}")
        # Bacteria present at this time
        bacteria_t = bact_in_time[bact_in_time["enrich_group"] == t]

        # Merge using only microbes at this time
        micro_met_t = mo_metab_b.merge(bacteria_t, on="Microorganism", how="inner")
        print(f"  micro-metabolite rows after merge: {len(micro_met_t)}")

        metab_path_t = metab_path.merge(micro_met_t, on="Metabolite", how="inner")
        path_mo_b_t = path_mo_b.merge(bacteria_t, on="Microorganism", how="inner")

        # Build graph and attach sign and weight attributes to edges
        G = nx.Graph()

        # Microorganism–Metabolite edges
        for _, row in micro_met_t.iterrows():
            u = row["Microorganism"]
            v = row["Metabolite"]
            corr = float(row["Correlation"])
            G.add_edge(
                u, v,
                weight=abs(corr),
                sign=(1 if corr > 0 else -1 if corr < 0 else 0)
            )

        # Metabolite–Pathway edges
        for _, row in metab_path_t.iterrows():
            u = row["Metabolite"]
            v = row["Pathway"]
            corr = float(row["Correlation_y"])
            G.add_edge(
                u, v,
                weight=abs(corr),
                sign=(1 if corr > 0 else -1 if corr < 0 else 0)
            )

        # Pathway–Microorganism edges
        for _, row in path_mo_b_t.iterrows():
            u = row["Microorganism"]
            v = row["Pathway"]
            corr = float(row["Correlation"])
            G.add_edge(
                u, v,
                weight=abs(corr),
                sign=(1 if corr > 0 else -1 if corr < 0 else 0)
            )

        if G.number_of_nodes() == 0:
            print(f"  -> Graph empty for time {t}, skipping save.")
            continue

        # Layout (fixed seed for reproducibility)
        pos = nx.spring_layout(G, seed=42)

        # Node colors and sizes by type
        node_colors = []
        node_sizes = []
        for n in G.nodes():
            if n in bacteria_t["Microorganism"].values:  # microorganisms present at this time
                node_colors.append("#66c2a5")  # microbe
                node_sizes.append(800)
            elif n in all_metabolites:
                node_colors.append("#fc8d62")  # metabolite
                node_sizes.append(600)
            elif n in all_pathways:
                node_colors.append("#8da0cb")  # pathway
                node_sizes.append(700)
            else:
                node_colors.append("lightgray")
                node_sizes.append(400)

        # Edge colors and widths based on sign and weight
        edge_colors = []
        edge_widths = []
        for u, v, d in G.edges(data=True):
            sign = d.get("sign", 0)
            edge_colors.append("red" if sign > 0 else ("blue" if sign < 0 else "gray"))
            w = d.get("weight", 1.0)
            edge_widths.append(max(0.5, min(5.0, w * 2)))  # clamp widths for visibility

        # Draw figure
        fig, ax = plt.subplots(figsize=(12, 9))
        ax.set_title(f"Microbe–Metabolite–Pathway Network at Time {t}", fontsize=14)

        nx.draw_networkx_nodes(G, pos, node_color=node_colors,
                               node_size=node_sizes, ax=ax)
        nx.draw_networkx_edges(G, pos, edge_color=edge_colors,
                               width=edge_widths, ax=ax)
        nx.draw_networkx_labels(G, pos, font_size=8, ax=ax)

        # Legend
        legend_elements = [
            mpatches.Patch(color="#66c2a5", label="Microorganisms"),
            mpatches.Patch(color="#fc8d62", label="Metabolites"),
            mpatches.Patch(color="#8da0cb", label="Pathways"),
            mpatches.Patch(color="red", label="Positive correlation"),
            mpatches.Patch(color="blue", label="Negative correlation"),
            mpatches.Patch(color="gray", label="Zero/unknown correlation"),
        ]
        ax.legend(handles=legend_elements, loc="upper right", bbox_to_anchor=(1.15, 1.0))
        ax.axis("off")

        # Save figure (PNG and PDF)
        filename_png = os.path.join(output_dir, f"network_time_{t}.png")
        filename_pdf = os.path.join(output_dir, f"network_time_{t}.pdf")
        fig.savefig(filename_png, dpi=300, bbox_inches="tight")
        fig.savefig(filename_pdf, dpi=300, bbox_inches="tight")
        plt.close(fig)
        print(f"  -> Saved: {filename_png} and {filename_pdf}")

        # --- Hub detection (degree centrality) ---
        degree_centrality = nx.degree_centrality(G)

        microbe_hubs = {
            n: c for n, c in degree_centrality.items()
            if n in bact_in_time["Microorganism"].values
        }
        metabolite_hubs = {
            n: c for n, c in degree_centrality.items()
            if n in metab_path["Metabolite"].values
        }
        pathway_hubs = {
            n: c for n, c in degree_centrality.items()
            if n in metab_path["Pathway"].values
        }

        def save_hubs_dict(hubs_dict, node_type):
            df = pd.DataFrame(hubs_dict.items(),
                              columns=[node_type, "DegreeCentrality"])
            df = df.sort_values(by="DegreeCentrality", ascending=False)
            df.to_csv(
                os.path.join(output_hubs_dir,
                             f"{node_type}_hubs_time_{t}.csv"),
                index=False
            )

        save_hubs_dict(microbe_hubs, "Microorganism")
        save_hubs_dict(metabolite_hubs, "Metabolite")
        save_hubs_dict(pathway_hubs, "Pathway")

        # --- Community detection (Louvain) ---
        partition = community_louvain.best_partition(G)
        community_df = pd.DataFrame(partition.items(),
                                    columns=["Node", "Community"])
        community_df.to_csv(
            os.path.join(output_communities_dir,
                         f"communities_time_{t}.csv"),
            index=False
        )


if __name__ == "__main__":
    print(
        "This script defines the function 'build_bacteria_networks'.\n"
        "Import it into your analysis notebook and call:\n\n"
        "    build_bacteria_networks(bact_in_time, mo_metab_b, metab_path, path_mo_b)\n"
    )


This script defines the function 'build_bacteria_networks'.
Import it into your analysis notebook and call:

    build_bacteria_networks(bact_in_time, mo_metab_b, metab_path, path_mo_b)



In [14]:
build_bacteria_networks(bact_in_time, mo_metab_b, metab_path, path_mo_b,
                            output_dir="networks_bacteria")


Processing timepoint: T0
  micro-metabolite rows after merge: 31
  -> Saved: networks_bacteria/network_time_T0.png and networks_bacteria/network_time_T0.pdf

Processing timepoint: T1
  micro-metabolite rows after merge: 9
  -> Saved: networks_bacteria/network_time_T1.png and networks_bacteria/network_time_T1.pdf

Processing timepoint: T2
  micro-metabolite rows after merge: 0
  -> Saved: networks_bacteria/network_time_T2.png and networks_bacteria/network_time_T2.pdf

Processing timepoint: T3
  micro-metabolite rows after merge: 0
  -> Saved: networks_bacteria/network_time_T3.png and networks_bacteria/network_time_T3.pdf

Processing timepoint: T5
  micro-metabolite rows after merge: 0
  -> Graph empty for time T5, skipping save.
