In [88]:
#IMPORTS
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import re

#GRAPH OBJECT CREATION FOR SUMMARY STATS

# Read CSV file
data = pd.read_csv(path) #Replace with your own path

# Create an empty graph
G = nx.Graph()

# Iterate through the data and add nodes and edges to the graph
for index, row in data.iterrows():
    odsi = row["ODSI"]
    has_partners = row["Partners"]
    list_of_partners = row["List of Partners"]
    focal_area = row["Focal Area"].lower()  # Get the focal area

    # Add ODSI node to the graph
    G.add_node(odsi, node_type="ODSI", highlight=False, focal_area=focal_area)

    if has_partners.lower() == "yes":
        partners = list_of_partners.split(";")
        for partner in partners:
            partner = partner.strip()
            # Extract the text up to the end of the first parentheses-marked abbreviation
            match = re.search(r'(.*?\(.*?\))', partner)
            if match:
                partner = match.group(1)
            # Add partner node to the graph
            G.add_node(partner, node_type="partner", highlight=False, focal_area=focal_area)

            # Add edge between ODSI and partner
            G.add_edge(odsi, partner, highlight=False)

import pandas as pd
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt

# NUMERICAL ANALYSIS PROGRAMS

#Partners with multiple ODSIs 

partner_edge_count = []

for node, attr in G.nodes(data=True):
    if attr["node_type"] == "partner":
        # Get the edges for this node
        edges = G.edges(node)
        # Filter the edges by the focal area of the connected ODSI nodes
        filtered_edges = [edge for edge in edges if G.nodes[edge[1]]["focal_area"] == 'fisheries extraction']
        num_edges = len(filtered_edges)
        if num_edges > 1:
            partner_edge_count.append((node, num_edges))

df=pd.DataFrame(partner_edge_count, columns=["Partner","Number of Edges"])
df = df.sort_values(by="Number of Edges", ascending=False)

#df.to_csv(path) #Destination path for shared partner edges, change your path

# Summary statistics for number of partners
odsi_edge_count = []
for node, attr in G.nodes(data=True):
    if attr["node_type"] == "ODSI" and attr["focal_area"] == 'biodiversity conservation':  # Replace 'basic science' with your focal area
        num_edges = len(list(G.edges(node)))
        odsi_edge_count.append((node, num_edges))

df = pd.DataFrame(odsi_edge_count, columns=["ODSI", "Partner Count"])

# Calculate mean, median, and standard deviation of number of partners for each ODSI
mean_partner_count = np.mean(df["Partner Count"])
median_partner_count = np.median(df["Partner Count"])
std_partner_count = np.std(df["Partner Count"])

print(f"Mean partner count: {mean_partner_count}")
print(f"Median partner count: {median_partner_count}")
print(f"Standard deviation of partner count: {std_partner_count}")

# Plot a histogram of partner counts for ODSIs using matplotlib
plt.hist(df["Partner Count"], bins=range(0, df["Partner Count"].max()+2, 1), align="left", rwidth=0.8, color='red')
plt.xlabel("Number of partners")
plt.ylabel("Frequency")
plt.title("Histogram of partner counts for Fisheries Extraction ODSIs")
plt.show()

