This script summarizes and visualizes the missing link prediction predictive performance results across the food web database. 

Requires the following results folders: 'Results_Food_Webs_0', 'Results_Food_Webs_1', 'Results_Food_Webs_2', 'Results_Food_Webs_3', 'Results_Food_Webs_4', 'Results_Food_Webs_Aggregated'

Note this was run with Python 3.12.4, numpy 1.26.4, matplotlib 3.8.4, scipy 1.13.1, pandas 2.2.2

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import csv
import os
import scipy.stats
import pickle
import summarize_results_food_webs
import string
import pandas as pd
FONT_SIZE = 17

import warnings
warnings.filterwarnings('ignore')

In [2]:
import matplotlib.font_manager as font_manager
font_dir = ['../../Helvetica']
for font in font_manager.findSystemFonts(font_dir):
    font_manager.fontManager.addfont(font)
plt.rcParams['font.family'] = 'Helvetica'

# short (in processed data) to long name (in results files) dictionary
folder_shorter_names = {'Grand Caricaie Clmown1':'Grand Caricaie  marsh dominated by Cladietum marisci, mown  Clmown1',\
    'Grand Caricaie Clmown2': 'Grand Caricaie  marsh dominated by Cladietum marisci, mown  Clmown2',\
    'Grand Caricaie ClControl1': 'Grand Caricaie  marsh dominated by Cladietum marisci, not mown  ClControl1',\
    'Grand Caricaie ClControl2': 'Grand Caricaie  marsh dominated by Cladietum marisci, not mown  ClControl2',\
    'Grand Caricaie Scmown1': 'Grand Caricaie  marsh dominated by Schoenus nigricans, mown  Scmown1 ',\
    'Grand Caricaie Scmown2': 'Grand Caricaie  marsh dominated by Schoenus nigricans, mown  Scmown2 ',\
    'Grand Caricaie ScControl1': 'Grand Caricaie  marsh dominated by Schoenus nigricans, not mown  ScControl1 ',\
    'Grand Caricaie ScControl2': 'Grand Caricaie  marsh dominated by Schoenus nigricans, not mown  ScControl2 '}

In [3]:
# Check we got through all of the results
num_it = 5
num_webs = 290
full_color = 'chocolate'
struc_color = 'olivedrab'
attr_color = 'cadetblue'

Data_Folder = 'Processed_Data_Disaggregated_Lifestage'
# These results correspond to those on Processed_Data_Disaggregated_Lifestage, Processed_Data_Disaggregated_Lifestage_1, 
# Processed_Data_Disaggregated_Lifestage_2, Processed_Data_Disaggregated_Lifestage_3, Processed_Data_Disaggregated_Lifestage_4
Results_Folders = ['Results_Food_Webs_0', 'Results_Food_Webs_1', 'Results_Food_Webs_2', 'Results_Food_Webs_3', 'Results_Food_Webs_4']
Figure_Folder = 'Figures'
Processing_Folder = 'Data_Processing_Code_Disaggregated_Lifestage'

if not os.path.exists(Figure_Folder):
    os.mkdir(Figure_Folder)

for res_folder in Results_Folders:
    print(res_folder)
    ids_to_skip = summarize_results_food_webs.check_all_results(Data_Folder, res_folder)

Results_Food_Webs_0
count fully missing auc 0
count missing some auc results 0
count fully missing avp 0
count missing some avp results 0
food web ids to still run: []
Results_Food_Webs_1
count fully missing auc 0
count missing some auc results 0
count fully missing avp 0
count missing some avp results 0
food web ids to still run: []
Results_Food_Webs_2
count fully missing auc 0
count missing some auc results 0
count fully missing avp 0
count missing some avp results 0
food web ids to still run: []
Results_Food_Webs_3
count fully missing auc 0
count missing some auc results 0
count fully missing avp 0
count missing some avp results 0
food web ids to still run: []
Results_Food_Webs_4
count fully missing auc 0
count missing some auc results 0
count fully missing avp 0
count missing some avp results 0
food web ids to still run: []


In [4]:
for res_folder in Results_Folders:
    summarize_results_food_webs.food_web_result_to_file(Data_Folder, res_folder, num_it, ids_to_skip, folder_shorter_names)

FileNotFoundError: [Errno 2] No such file or directory: 'Results_Food_Webs_0\\stacking_auc_Grand Caricaie ScControl1 _6.csv'

In [None]:
# ROC-AUC results
fig, ha = plt.subplots(2, 2, figsize=(15,10))
num_it = 25
sp = [(2,2,1),(2,2,3)]
summarize_results_food_webs.food_web_result_plots(Data_Folder, Results_Folders,[],'Overall Average Performance',\
                                                  'auc', 'ROC', num_it, True, True, sp, False, num_webs, folder_shorter_names, FONT_SIZE,\
                                                 full_color, struc_color, attr_color)
sp = [(2,2,2),(2,2,4)]
summarize_results_food_webs.food_web_result_plots(Data_Folder, Results_Folders, [],'Overall Average Performance',\
                                                  'avp', 'PR', num_it, True, True, sp, False, num_webs, folder_shorter_names, FONT_SIZE,\
                                                 full_color, struc_color, attr_color)

for n, ax in enumerate(ha.flat):
    ax.text(-0.1, 1.1, string.ascii_lowercase[n], transform=ax.transAxes, size=FONT_SIZE, fontweight='bold')

plt.tight_layout()
plt.savefig(f'{Figure_Folder}/Food_Web_Database_Overall.pdf',dpi=1000,bbox_inches='tight')
plt.show()

In [None]:
# ROC-AUC results, broken up by ecosystem type
fig, ha = plt.subplots(3, 2, figsize=(15,15))

num_it = 25
with open(os.path.join(Processing_Folder,'fw_metadata.pickle'), 'rb') as handle:
    fw_metadata = pickle.load(handle)
    
i = 1
for eco_type in ['lakes', 'marine','streams','terrestrial aboveground','terrestrial belowground']:
    ids_to_skip = []
    for fw in fw_metadata:
        if fw_metadata[fw]['ecosystem.type'] != eco_type:
            fw_id = str(fw_metadata[fw]['fw_id'])
            if fw_id not in ids_to_skip:
                ids_to_skip.append(fw_id)
    num_skip = len(ids_to_skip)
    print(f"Ecosystem Type: {eco_type}")
    summarize_results_food_webs.food_web_result_plots(Data_Folder, Results_Folders, ids_to_skip, f'{eco_type} ({num_webs-num_skip})'.title(),\
                            'auc', 'ROC', num_it, False, True, [(3,2,i)], True, num_webs, folder_shorter_names, FONT_SIZE,\
                                                     full_color, struc_color, attr_color)
    i+=1
    
ha[-1,-1].axis('off')
for n, ax in enumerate(ha.flat[0:5]):
    ax.text(-0.1, 1.1, string.ascii_lowercase[n], transform=ax.transAxes, size=FONT_SIZE, weight='bold')
plt.tight_layout()
plt.savefig(f'{Figure_Folder}/Food_Web_ROC_AUC_Ecotype.pdf',dpi=1000,bbox_inches='tight')
plt.show()

In [None]:
# PR-AUC results, broken up by ecosystem type
fig, ha = plt.subplots(3, 2, figsize=(15,15))

num_it = 25
with open(os.path.join(Processing_Folder,'fw_metadata.pickle'), 'rb') as handle:
    fw_metadata = pickle.load(handle)
    
i = 1
for eco_type in ['lakes', 'marine','streams','terrestrial aboveground','terrestrial belowground']:
    ids_to_skip = []
    for fw in fw_metadata:
        if fw_metadata[fw]['ecosystem.type'] != eco_type:
            fw_id = str(fw_metadata[fw]['fw_id'])
            if fw_id not in ids_to_skip:
                ids_to_skip.append(fw_id)
    num_skip = len(ids_to_skip)
    print(f"Ecosystem Type: {eco_type}")
    summarize_results_food_webs.food_web_result_plots(Data_Folder, Results_Folders, ids_to_skip,\
                          f'{eco_type} ({num_webs-num_skip})'.title(), 'avp', 'PR', num_it,\
                                                      False, True,[(3,2,i)], True, num_webs, folder_shorter_names, FONT_SIZE,\
                                                     full_color, struc_color, attr_color)
    i+=1
    
ha[-1,-1].axis('off')
for n, ax in enumerate(ha.flat[0:5]):
    ax.text(-0.1, 1.1, string.ascii_lowercase[n], transform=ax.transAxes, size=FONT_SIZE, weight='bold')
plt.tight_layout()
plt.savefig(f'{Figure_Folder}/Food_Web_PR_AUC_Ecotype.pdf',dpi=1000,bbox_inches='tight')
plt.show()

In [None]:
# Aggregated by life stage results
num_it = 5
num_webs = 290
Data_Folder_Aggregated = 'Processed_Data_Aggregated_Lifestage'
Results_Folders_Aggregated = ['Results_Food_Webs_Aggregated']
ids_to_skip = summarize_results_food_webs.check_all_results(Data_Folder_Aggregated, Results_Folders_Aggregated[0])
summarize_results_food_webs.food_web_result_to_file(Data_Folder_Aggregated, Results_Folders_Aggregated[0], num_it, ids_to_skip, folder_shorter_names)

In [None]:
# Results for the 25 food webs that change with aggregation
fig, ha = plt.subplots(2, 2, figsize=(15,10))
to_viz = ['Grand Caricaie  marsh dominated by Cladietum marisci, mown  Clmown1', 'Grand Caricaie  marsh dominated by Cladietum marisci, mown  Clmown2', 'Grand Caricaie  marsh dominated by Cladietum marisci, not mown  ClControl1', 'Grand Caricaie  marsh dominated by Cladietum marisci, not mown  ClControl2', 'Grand Caricaie  marsh dominated by Schoenus nigricans, mown  Scmown1 ', 'Grand Caricaie  marsh dominated by Schoenus nigricans, mown  Scmown2 ', 'Grand Caricaie  marsh dominated by Schoenus nigricans, not mown  ScControl1 ', 'Grand Caricaie  marsh dominated by Schoenus nigricans, not mown  ScControl2 ', 'Chesapeake Bay', 'Carpinteria', 'Hardknott Gill', 'Mill Stream', 'Gearagh', 'Dutch Microfauna food web PlotA', 'Dutch Microfauna food web PlotB', 'Dutch Microfauna food web PlotC', 'Broad Stream', 'Canton Creek', 'Dempsters Stream', 'German Creek', 'Healy Creek', 'Kye Burn', 'Little Kye Burn', 'Stony Stream', 'Skipwith Pond']
with open(os.path.join('Processed_Data_Disaggregated_Lifestage','fw_ids.pickle'),'rb') as f:
    fw_ids_dis = pickle.load(f)
to_viz_ids_dis = []
for fw_name in to_viz:
        to_viz_ids_dis.append(fw_ids_dis[fw_name])
with open(os.path.join('Processed_Data_Aggregated_Lifestage','fw_ids.pickle'),'rb') as f:
    fw_ids_agg = pickle.load(f)
to_viz_ids_agg = []
for fw_name in to_viz:
        to_viz_ids_agg.append(fw_ids_agg[fw_name])
assert to_viz_ids_dis == to_viz_ids_agg, "not same ids"

to_skip = []
for i in range(0,290):
    if i not in to_viz_ids_dis:
        to_skip.append(str(i))

print(to_skip)

# Disaggregated results, filtered down to that 25 - ROC AUC
summarize_results_food_webs.food_web_result_plots(Data_Folder, Results_Folders, to_skip,'Disaggregated',\
                      'auc', 'ROC', 25, True, True, [(2,2,1)], True, num_webs, folder_shorter_names, FONT_SIZE,\
                                                 full_color, struc_color, attr_color)

# Disaggregated results, filtered down to that 25 - PR AUC
summarize_results_food_webs.food_web_result_plots(Data_Folder, Results_Folders, to_skip,'Disaggregated',\
                      'avp', 'PR', 25, True, True, [(2,2,2)], True,  num_webs, folder_shorter_names, FONT_SIZE,\
                                                 full_color, struc_color, attr_color)

# Aggregated results, filtered down to that 25 - ROC AUC
summarize_results_food_webs.food_web_result_plots(Data_Folder_Aggregated, Results_Folders_Aggregated, to_skip,'Aggregated',\
                      'auc', 'ROC', 5, True, True, [(2,2,3)],True,  num_webs, folder_shorter_names, FONT_SIZE,\
                                                 full_color, struc_color, attr_color)

# Aggregated results, filtered down to that 25 - PR AUC
summarize_results_food_webs.food_web_result_plots(Data_Folder_Aggregated, Results_Folders_Aggregated, to_skip,'Aggregated',\
                      'avp', 'PR', 5, True, True, [(2,2,4)],True,  num_webs, folder_shorter_names, FONT_SIZE,\
                                                 full_color, struc_color, attr_color)

for n, ax in enumerate(ha.flat):
    ax.text(-0.1, 1.1, string.ascii_lowercase[n], transform=ax.transAxes, size=FONT_SIZE, weight='bold')
plt.tight_layout()
plt.savefig(f'{Figure_Folder}/Food_Web_Supplemental_Aggregated.pdf',dpi=1000,bbox_inches='tight')
plt.show()

In [None]:
# Combine all the summary results files into one (average across all iterations for a food web / model)
ct = 0 
for res in Results_Folders:
    if ct == 0:
        summary_df = pd.read_csv(os.path.join(res,f'food_web_lp_res_{res}.csv'))
    else:
        summary_df = pd.concat((summary_df, pd.read_csv(os.path.join(res,f'food_web_lp_res_{res}.csv'))))
    ct += 1
grouped_df = summary_df.groupby('net_id').mean()
grouped_df.to_csv(os.path.join('Summarized_Results',f'food_web_lp_res.csv'))