In [1]:
from pathways.scenarios import run_scenarios, load_scenario_table
from pathways.simulation import load_configuration
from pathways.outputs import save_scenario_result_to_pandas

In [2]:
import pandas as pd
import re
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np

%matplotlib inline

In [3]:
from pathlib import Path
datadir = Path("tests/test_summaries")
basic_config = load_configuration(datadir / "config.yml")
rate_scenario_table = load_scenario_table(datadir / "contamination_rate_estimation.csv")
inspection_scenario_table = load_scenario_table(datadir / "inspection_scenarios.csv")
consignment_scenario_table = load_scenario_table(datadir / "consignment_scenarios.csv")

In [4]:
fitted_contamination_rate = rate_scenario_table[0:4]
adjusted_contamination_rate = rate_scenario_table[4:8]

In [5]:
num_consignments = 6082
fitted_contamination_rate_results = run_scenarios(
    config=basic_config,
    scenario_table=rate_scenario_table,
    seed=42,
    num_simulations=5,
    num_consignments=num_consignments,
    detailed=False,
)

]
20
[1]
[7]
[14]
13
[1]
[0]
[0]
369
[1]
[121]
[242]
36
[1]
[13]
[26]
5
[1]
[1]
[2]
5
[1]
[1]
[2]
1
[1]
[0]
[0]
65
[1]
[8]
[16]
1
[1]
[0]
[0]
1050
[2]
[197]
[394 395]
1
[1]
[0]
[0]
5
[1]
[0]
[0]
42
[1]
[14]
[28]
16
[1]
[6]
[12]
1
[1]
[0]
[0]
5
[1]
[1]
[2]
5
[1]
[1]
[2]
6
[1]
[0]
[0]
21
[1]
[9]
[18]
1
[1]
[0]
[0]
20
[1]
[0]
[0]
4
[1]
[0]
[0]
24
[1]
[0]
[0]
4
[1]
[0]
[0]
7
[1]
[2]
[4]
8
[1]
[2]
[4]
13
[1]
[1]
[2]
12
[1]
[2]
[4]
45
[1]
[3]
[6]
5
[1]
[0]
[0]
2388
[1]
[228]
[456]
68
[1]
[24]
[48]
14
[1]
[1]
[2]
5
[1]
[0]
[0]
15
[1]
[2]
[4]
7
[1]
[1]
[2]
31
[1]
[7]
[14]
115
[1]
[34]
[68]
70
[1]
[7]
[14]
12
[1]
[0]
[0]
3
[1]
[0]
[0]
30
[1]
[8]
[16]
1053
[1]
[165]
[330]
775
[1]
[333]
[666]
29
[1]
[1]
[2]
25
[1]
[11]
[22]
9
[1]
[3]
[6]
13
[1]
[3]
[6]
1
[1]
[0]
[0]
71
[1]
[3]
[6]
21
[1]
[1]
[2]
17
[1]
[5]
[10]
1
[1]
[0]
[0]
6
[1]
[0]
[0]
189
[1]
[76]
[152]
7
[1]
[2]
[4]
18
[1]
[3]
[6]
27
[1]
[3]
[6]
51
[1]
[8]
[16]
6
[1]
[1]
[2]
30
[2]
[0]
[0 1]
60
[1]
[18]
[36]
21
[1]
[5]
[10]
216
[1]
[68]
[136

In [6]:
df_fitted = save_scenario_result_to_pandas(
    fitted_contamination_rate_results,
    config_columns=[
        "name",
        "consignment name",
        "inspection name",
        "contamination/contamination_rate/parameters",
        "contamination/arrangement",
        "contamination/clustered/distribution",
        "contamination/clustered/max_contaminated_units_per_cluster",
    ],
    result_columns=[
        "true_contamination_rate",
        "false_neg",
        "intercepted",
        "total_missed_contaminants",
        "total_intercepted_contaminants",
        "avg_boxes_opened_completion",
    ],
)

In [7]:
df_fitted['consignment failure rate'] = df_fitted["intercepted"] / num_consignments
df_fitted

Unnamed: 0,name,consignment name,inspection name,contamination/contamination_rate/parameters,contamination/arrangement,contamination/clustered/distribution,contamination/clustered/max_contaminated_units_per_cluster,true_contamination_rate,false_neg,intercepted,total_missed_contaminants,total_intercepted_contaminants,avg_boxes_opened_completion,consignment failure rate
0,contamination rate 1,random mean 0.003 sd 0.034,box hypergeometric 0.1 random,"[0.0078, 2.5796]",random,,,0.002922,96.0,254.0,37998.8,444266.8,13.894443,0.041763
1,contamination rate 2,clustered mean 0.003 sd 0.034,box hypergeometric 0.1 random,"[0.0078, 2.5796]",clustered,random,2.0,0.003002,83.6,257.0,39569.0,591384.6,13.894443,0.042256
2,contamination rate 3,random mean 0.0004 sd 0.003,box hypergeometric 0.1 random,"[0.01777, 44.4089]",random,,,0.000383,181.4,283.2,43835.8,27689.0,13.894443,0.046564
3,contamination rate 4,clustered mean 0.0004 sd 0.003,box hypergeometric 0.1 random,"[0.01777, 44.4089]",clustered,random,2.0,0.000381,180.2,294.2,71576.4,22075.0,13.894443,0.048372
4,contamination rate 3,random mean 0.0018 sd 0.0199,box hypergeometric 0.1 random,"[0.0082, 4.529]",random,,,0.001849,92.0,233.8,46717.0,252766.4,13.894443,0.038441
5,contamination rate 4,clustered mean 0.0018 sd 0.0199,box hypergeometric 0.1 random,"[0.0082, 4.529]",clustered,random,2.0,0.001658,98.8,230.8,48398.0,322053.2,13.894443,0.037948


In [8]:
# Format dataframe
column_names = ["consignment name", "inspection name", "beta parameters", "contaminant arrangement", "cluster distribution", "infested boxes per cluster", "simulated contamination rate (mean)", "consignment failure rate"]
df_contamination_pretty = df_fitted.iloc[:,[1,2,3,4,5,6,7,13]]
# contaminate_validation_df_pretty.iloc[:,5] = contaminate_validation_df_pretty.iloc[:,5].astype(str)

# beta_parameters = contaminate_validation_df.iloc[12:16,7]
# contaminate_validation_df_pretty.iloc[12:16,5] = beta_parameters
df_contamination_pretty.columns = column_names
df_contamination_pretty.iloc[:,6] = df_contamination_pretty.iloc[:,6].round(decimals=4)
df_contamination_pretty.iloc[:,7] = df_contamination_pretty.iloc[:,7].round(decimals=4)
df_contamination_pretty

Unnamed: 0,consignment name,inspection name,beta parameters,contaminant arrangement,cluster distribution,infested boxes per cluster,simulated contamination rate (mean),consignment failure rate
0,random mean 0.003 sd 0.034,box hypergeometric 0.1 random,"[0.0078, 2.5796]",random,,,0.0029,0.0418
1,clustered mean 0.003 sd 0.034,box hypergeometric 0.1 random,"[0.0078, 2.5796]",clustered,random,2.0,0.003,0.0423
2,random mean 0.0004 sd 0.003,box hypergeometric 0.1 random,"[0.01777, 44.4089]",random,,,0.0004,0.0466
3,clustered mean 0.0004 sd 0.003,box hypergeometric 0.1 random,"[0.01777, 44.4089]",clustered,random,2.0,0.0004,0.0484
4,random mean 0.0018 sd 0.0199,box hypergeometric 0.1 random,"[0.0082, 4.529]",random,,,0.0018,0.0384
5,clustered mean 0.0018 sd 0.0199,box hypergeometric 0.1 random,"[0.0082, 4.529]",clustered,random,2.0,0.0017,0.0379


In [None]:
num_consignments = 6082
inspection_scenario_results = run_scenarios(
    config=basic_config,
    scenario_table=inspection_scenario_table,
    seed=42,
    num_simulations=5,
    num_consignments=num_consignments,
    detailed=False,
)

In [None]:
df_inspections = save_scenario_result_to_pandas(
    inspection_scenario_results,
    config_columns=[
        "name",
        "inspection/unit",
        "inspection/sample_strategy",
        "inspection/proportion/value",
        "inspection/hypergeometric/detection_level",
        "inspection/selection_strategy",
        "inspection/cluster/cluster_selection",

    ],
    result_columns=[
        "true_contamination_rate",
        "max_missed_contamination_rate",
        "avg_missed_contamination_rate",
        "max_intercepted_contamination_rate",
        "avg_intercepted_contamination_rate",
        "avg_boxes_opened_completion",
        "avg_boxes_opened_detection",
        "avg_items_inspected_completion",
        "avg_items_inspected_detection",
        "false_neg",
        "intercepted",
        "total_missed_contaminants",
        "total_intercepted_contaminants",
    ],
)

In [None]:
df_inspections['consignment failure rate'] = df_inspections["intercepted"] / num_consignments
contaminated_consignments = df_inspections["false_neg"] + df_inspections["intercepted"]
df_inspections["inspection success rate"] = df_inspections["intercepted"] / contaminated_consignments
df_inspections

In [None]:
# Format dataframe
column_names = ["name", "inspection unit", "sample strategy", "sample parameter", "selection strategy", "cluster selection", "avg contamination rate", "max missed contamination rate", "avg missed contamination rate", "max intercepted contamination rate", "avg intercepted contamination rate", "boxes opened completion", "boxes opened detection", "items inspected completion", "items inspected detection", "missed", "intercepted", "missed contaminants", "intercepted contamininants", "consignment failure rate", "inspection success rate", "sample size method", "selection method"]


In [None]:

df_inspections_pretty = df_inspections.loc[:, df_inspections.columns != 'inspection/hypergeometric/detection_level']
hypergeometric_parameters = df_inspections.iloc[[0,1,2,3,4,5,6,7,12,13,14,15],4]
df_inspections_pretty.iloc[[0,1,2,3,4,5,6,7,12,13,14,15],3] = hypergeometric_parameters
df_inspections_pretty.iloc[:,3] = df_inspections_pretty.iloc[:,3].astype(str)

df_inspections_pretty["sample size method"] = df_inspections_pretty[['inspection/sample_strategy', 'inspection/proportion/value']].agg(' '.join, axis=1)
df_inspections_pretty["selection method"] = df_inspections_pretty[["inspection/unit", 'inspection/selection_strategy', 'inspection/cluster/cluster_selection']].agg(' '.join, axis=1)

df_inspections_pretty.columns = column_names
df_inspections_pretty.iloc[:,6:11] = df_inspections_pretty.iloc[:,6:11].round(decimals=4)
df_inspections_pretty.iloc[:,11:19] = df_inspections_pretty.iloc[:,11:19].astype(int)
df_inspections_pretty.iloc[:,19:21] = df_inspections_pretty.iloc[:,19:21].round(decimals=4)
df_inspections_pretty.columns
df_inspections_pretty.iloc[:18,[0,1,21,22,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]]

In [None]:
colors = {"hypergeometric 0.1": "#1f78b4", "hypergeometric 0.05": "#a6cee3", "proportion 0.02":"#b2df8a"}
patch_1 = mpatches.Patch(color="#a6cee3", label="hypergeometric 0.05")
patch_2 = mpatches.Patch(color="#1f78b4", label="hypergeometric 0.1")
patch_3 = mpatches.Patch(color="#b2df8a", label="proportion 0.02")

In [None]:
plt.figure(figsize=(18, 9), dpi=150)
plt.subplot(121)
plt.subplots_adjust(wspace=0.65,left=0.22,right=0.95)
plt.barh(df_inspections_pretty["name"], df_inspections_pretty["inspection success rate"], color=df_inspections_pretty['sample size method'].replace(colors))
plt.title("Inspection Success Rate", fontsize=24)
plt.ylabel("inspection method", fontsize=20)
plt.xlabel("rate", fontsize=20)
plt.yticks(ticks=np.arange(18),labels=df_inspections_pretty["selection method"], fontsize=20)
plt.xticks(fontsize=18)
plt.subplot(122)
plt.barh(df_inspections_pretty["name"], df_inspections_pretty["missed contaminants"], color=df_inspections_pretty['sample size method'].replace(colors))
plt.title("Missed Contaminants", fontsize=24)
plt.xlabel("contaminated items", fontsize=20)
plt.yticks(ticks=np.arange(18),labels=df_inspections_pretty["selection method"], fontsize=20)
plt.xticks(ticks=[0,100000,300000,500000],fontsize=18)
plt.savefig("inspection_scenario_plots_1.png")
plt.show()

plt.figure(figsize=(18, 9), dpi=150)
plt.subplot(121)
plt.subplots_adjust(wspace=0.65,left=0.22,right=0.95)
plt.barh(df_inspections_pretty["name"], df_inspections_pretty["boxes opened completion"], color=df_inspections_pretty['sample size method'].replace(colors))
plt.title("Boxes Opened", fontsize=24)
plt.ylabel("inspection method", fontsize=20)
plt.xlabel("boxes", fontsize=20)
plt.yticks(ticks=np.arange(18),labels=df_inspections_pretty["selection method"], fontsize=20)
plt.xticks(fontsize=18)
plt.subplot(122)
plt.barh(df_inspections_pretty["name"], df_inspections_pretty["items inspected completion"], color=df_inspections_pretty['sample size method'].replace(colors))
plt.title("Items Inspected", fontsize=24)
plt.xlabel("items", fontsize=20)
plt.legend(handles=[patch_1,patch_2,patch_3], loc = "lower right", fontsize=20)
plt.yticks(ticks=np.arange(18),labels=df_inspections_pretty["selection method"], fontsize=20)
plt.xticks(ticks=[0,1000,2000,3000,4000],fontsize=18)
plt.savefig("inspection_scenario_plots_2.png")
plt.show()



In [None]:
num_consignments = 6082
consignment_scenario_results = run_scenarios(
    config=basic_config,
    scenario_table=consignment_scenario_table[10:],
    seed=42,
    num_simulations=5,
    num_consignments=num_consignments,
    detailed=False,
)

In [None]:
df_consignments = save_scenario_result_to_pandas(
    consignment_scenario_results,
    config_columns=[
        "name",
        "consignment name",
        "consignment/boxes/min",
        "consignment/boxes/max",
        "consignment/items_per_box/default",
        "contamination/contamination_unit",
        "contamination/contamination_rate/distribution",
        "contamination/contamination_rate/parameters",
        "contamination/arrangement",
        "contamination/clustered/distribution",
        "contamination/clustered/max_contaminated_units_per_cluster",
        "contamination/clustered/random/max_cluster_item_width",
    ],
    result_columns=[
        "true_contamination_rate",
        "max_missed_contamination_rate",
        "avg_missed_contamination_rate",
        "max_intercepted_contamination_rate",
        "avg_intercepted_contamination_rate",
        "avg_boxes_opened_completion",
        "avg_boxes_opened_detection",
        "avg_items_inspected_completion",
        "avg_items_inspected_detection",
        "false_neg",
        "intercepted",
        "total_missed_contaminants",
        "total_intercepted_contaminants",
    ],
)

In [None]:
df_consignments['consignment failure rate'] = df_consignments["intercepted"] / num_consignments
contaminated_consignments = df_consignments["false_neg"] + df_consignments["intercepted"]
df_consignments["inspection success rate"] = df_consignments["intercepted"] / contaminated_consignments
df_consignments["contaminated_consignments"] = contaminated_consignments

In [None]:
# Format dataframe
column_names = ["name", "consignment name", "min boxes", "max boxes", "items per box", "contamination unit", "contamination distribution", "contamination parameters", "contaminant arrangement", "cluster distribution", "max contaminated units per cluster", "cluster width", "avg contamination rate", "max missed contamination rate", "avg missed contamination rate", "max intercepted contamination rate", "avg intercepted contamination rate", "boxes opened completion", "boxes opened detection", "items inspected completion", "items inspected detection", "missed", "intercepted", "missed contaminants", "intercepted contamininants", "consignment failure rate", "inspection success rate", "contaminated_consignments"]


In [None]:

df_consignments_pretty = df_consignments
#hypergeometric_parameters = df_inspections.iloc[[0,1,2,3,4,5,6,7,12,13,14,15],4]
#df_inspections_pretty.iloc[[0,1,2,3,4,5,6,7,12,13,14,15],3] = hypergeometric_parameters
#df_inspections_pretty.iloc[:,3] = df_inspections_pretty.iloc[:,3].astype(str)

#df_inspections_pretty["sample size method"] = df_inspections_pretty[['inspection/sample_strategy', 'inspection/proportion/value']].agg(' '.join, axis=1)
#df_inspections_pretty["selection method"] = df_inspections_pretty[["inspection/unit", 'inspection/selection_strategy', 'inspection/cluster/cluster_selection']].agg(' '.join, axis=1)

df_consignments_pretty.columns = column_names
#df_inspections_pretty.iloc[:,6:11] = df_inspections_pretty.iloc[:,6:11].round(decimals=4)
#df_inspections_pretty.iloc[:,11:19] = df_inspections_pretty.iloc[:,11:19].astype(int)
#df_inspections_pretty.iloc[:,19:21] = df_inspections_pretty.iloc[:,19:21].round(decimals=4)
#df_inspections_pretty.columns
df_consignments_pretty = df_consignments_pretty.iloc[:,[0,1,2,3,4,5,6,7,8,9,10,11,12,14,16,17,19,23,25,26,27]]
df_cargo_config = df_consignments_pretty.loc[0:3,:]
df_contamination_scenarios = df_consignments_pretty.loc[4:14,:]
#df_contaminant_arrangement = df_consignments_pretty.loc[10:14,:]

In [None]:
colors = {"hypergeometric 0.1": "#1f78b4", "hypergeometric 0.05": "#a6cee3", "proportion 0.02":"#b2df8a"}
patch_1 = mpatches.Patch(color="#1f78b4", label="hypergeometric 0.1")
patch_2 = mpatches.Patch(color="#a6cee3", label="hypergeometric 0.05")
patch_3 = mpatches.Patch(color="#b2df8a", label="proportion 0.02")

In [None]:
plt.figure(figsize=(18, 9), dpi=150)
plt.subplot(121)
plt.subplots_adjust(wspace=0.65,left=0.22,right=0.95)
plt.barh(df_cargo_config["name"], df_cargo_config["inspection success rate"])
plt.title("Inspection Success Rate", fontsize=20)
plt.xlabel("rate", fontsize=14)
plt.yticks(ticks=np.arange(4),labels=df_cargo_config["consignment name"], fontsize=14)
plt.subplot(122)
plt.barh(df_cargo_config["name"], df_cargo_config["missed contaminants"])
plt.title("Missed Contaminants", fontsize=20)
plt.xlabel("contaminated items", fontsize=14)
plt.yticks(ticks=np.arange(4),labels=df_cargo_config["consignment name"],fontsize=14)
plt.subplots_adjust(bottom=0.2)
# plt.savefig("cargo_config_scenario_plots_1.png")
plt.show()

plt.figure(figsize=(14, 6), dpi=150)
plt.subplot(121)
plt.barh(df_cargo_config["name"], df_cargo_config["boxes opened completion"])
plt.title("Boxes Opened", fontsize=20)
plt.xlabel("boxes", fontsize=14)
plt.yticks(ticks=np.arange(4),labels=df_cargo_config["consignment name"], fontsize=14)
plt.subplot(122)
plt.barh(df_cargo_config["name"], df_cargo_config["items inspected completion"])
plt.title("Items Inspected", fontsize=20)
plt.xlabel("items", fontsize=14)
#plt.legend(handles=[patch_1,patch_2,patch_3], loc = "upper left", fontsize=14)
plt.yticks(ticks=np.arange(4),labels=df_cargo_config["consignment name"], fontsize=14)
plt.subplots_adjust(bottom=0.2)
#plt.savefig("cargo_config_scenario_plots.png")
plt.show()



In [None]:
plt.figure(figsize=(18, 9), dpi=150)
plt.subplot(121)
plt.subplots_adjust(wspace=0.65,left=0.22,right=0.95)
plt.barh(df_contamination_scenarios["name"], df_contamination_scenarios["inspection success rate"])
plt.title("Inspection Success Rate", fontsize=20)
plt.ylabel("rate", fontsize=18)
plt.xticks(ticks=np.arange(11),labels=df_contamination_scenarios["consignment name"], rotation='vertical', fontsize=14)
plt.subplot(122)
plt.barh(df_contamination_scenarios["name"], df_contamination_scenarios["missed contaminants"])
plt.title("Missed Contaminants", fontsize=20)
plt.ylabel("contaminated items", fontsize=18)
plt.xticks(ticks=np.arange(11),labels=df_contamination_scenarios["consignment name"], rotation='vertical', fontsize=14)
plt.subplots_adjust(bottom=0.4)
plt.savefig("contamination_scenario_plots_1.png")
plt.show()

plt.figure(figsize=(14, 6), dpi=150)
plt.subplot(121)
plt.barh(df_contamination_scenarios["name"], df_contamination_scenarios["avg missed contamination rate"])
plt.title("Avg Missed Contamination Rate", fontsize=20)
plt.ylabel("rate", fontsize=18)
plt.xticks(ticks=np.arange(11),labels=df_contamination_scenarios["consignment name"], rotation='vertical', fontsize=14)
plt.subplot(122)
plt.barh(df_contamination_scenarios["name"], df_contamination_scenarios["avg intercepted contamination rate"])
plt.title("Avg Intercepted Contamination Rate", fontsize=20)
plt.ylabel("rate", fontsize=18)
#plt.legend(handles=[patch_1,patch_2,patch_3], loc = "upper left", fontsize=14)
plt.xticks(ticks=np.arange(11),labels=df_contamination_scenarios["consignment name"], rotation='vertical', fontsize=14)
plt.subplots_adjust(bottom=0.4)
plt.savefig("contamination_scenario_plots_2.png")
plt.show()



In [None]:
plt.figure(figsize=(14, 6), dpi=150)
plt.subplot(121)
plt.bar(df_contaminant_arrangement["name"], df_contaminant_arrangement["inspection success rate"])
plt.title("Inspection Success Rate", fontsize=20)
plt.ylabel("rate", fontsize=18)
plt.xticks(ticks=np.arange(5),labels=df_contaminant_arrangement["consignment name"], rotation='vertical', fontsize=14)
plt.subplot(122)
plt.bar(df_contaminant_arrangement["name"], df_contaminant_arrangement["missed contaminants"])
plt.title("Missed Contaminants", fontsize=20)
plt.ylabel("contaminated items", fontsize=18)
plt.xticks(ticks=np.arange(5),labels=df_contaminant_arrangement["consignment name"], rotation='vertical', fontsize=14)
plt.subplots_adjust(bottom=0.4)
plt.savefig("contaminant_arrangement_scenario_plots_1.png")
plt.show()

plt.figure(figsize=(14, 6), dpi=150)
plt.subplot(121)
plt.bar(df_contaminant_arrangement["name"], df_contaminant_arrangement["avg missed contamination rate"])
plt.title("Avg Missed Contamination Rate", fontsize=20)
plt.ylabel("rate", fontsize=18)
plt.xticks(ticks=np.arange(5),labels=df_contaminant_arrangement["consignment name"], rotation='vertical', fontsize=14)
plt.subplot(122)
plt.bar(df_contaminant_arrangement["name"], df_contaminant_arrangement["avg intercepted contamination rate"])
plt.title("Avg Intercepted Contamination Rate", fontsize=20)
plt.ylabel("rate", fontsize=18)
#plt.legend(handles=[patch_1,patch_2,patch_3], loc = "upper left", fontsize=14)
plt.xticks(ticks=np.arange(5),labels=df_contaminant_arrangement["consignment name"], rotation='vertical', fontsize=14)
plt.subplots_adjust(bottom=0.4)
plt.savefig("contaminant_arrangement_plots_2.png")
plt.show()

