In [245]:
import pandas as pd
import numpy as np
import os
import json
import sys

sys.path.append(os.path.dirname(os.getcwd()))
from utils.data_cleanup_and_validation import *
from utils.species_landings import *
from utils.stock_weights import *
from utils.aggregated_tables import *

In [2]:
parent_dir = os.path.dirname(os.getcwd())
input_dir = os.path.join(parent_dir, "input")
output_dir = os.path.join(parent_dir, os.path.join("output", "clean_data"))

In [3]:
asfis = get_asfis_mappings(input_dir, "ASFIS_sp_2024.csv")["ASFIS"]

code_to_scientific = dict(zip(asfis["Alpha3_Code"], asfis["Scientific_Name"]))

fishstat = pd.read_csv(os.path.join(input_dir, "global_capture_production.csv"))

fishstat = format_fishstat(fishstat, code_to_scientific)

years = list(range(1950, 2022))

In [26]:
landings = pd.read_excel(os.path.join(output_dir, "species_landings.xlsx"))

In [44]:
check_dir = os.path.join(os.path.dirname(os.path.dirname(os.getcwd())), os.path.join("Stock-Status-Models", "overview_output_v6"))

check = pd.read_excel(os.path.join(check_dir, "data_w_landings_by_stock.xlsx"))

In [90]:
sa = pd.read_excel(os.path.join(output_dir, "stock_assessments.xlsx"))

In [91]:
sa[(sa["Area"]==34)&(sa["ASFIS Scientific Name"]=="Pagellus spp")]

Unnamed: 0,Area,ISSCAAP Code,ASFIS Name,ASFIS Scientific Name,Location,Tier,Status,Uncertainty
491,34,33.0,Pandoras NEI,Pagellus spp,Area 34,2,O,
492,34,33.0,Pandoras NEI,Pagellus spp,South,2,O,


In [387]:
weights = pd.read_excel(os.path.join(output_dir, "stock_weights.xlsx"))

In [108]:
proxy_species = pd.read_excel(os.path.join(input_dir, "January overview - one table.xlsx"), sheet_name="Stocks with Status and Tier")
proxy_species = proxy_species[["AREA", "ASFIS Scientific Name", "Location", "Area Landings 2021", "Stock Catch Value", "scientific name w value", "Area catch Alternative"]]

mask1 = proxy_species["Area catch Alternative"].notna()
mask2 = proxy_species["Area Landings 2021"].isna()

415 164


In [183]:
proxy_landings = proxy_species.rename(columns={"AREA": "Area"}).drop_duplicates(["Area", "ASFIS Scientific Name", "Location"])

proxy_landings = proxy_landings.dropna(subset=["scientific name w value", "Stock Catch Value"])

check = pd.merge(species_landings, proxy_landings, on=["Area", "ASFIS Scientific Name", "Location"], how="outer", indicator=True)

c = check[check["_merge"]=="right_only"]

1.0061038819760002

In [180]:
for sn, area in zip(c["ASFIS Scientific Name"], c["Area"]):
    if area not in ["Deep Sea", "Salmon", "Sharks", "Tuna"]:
        mask = species_landings["ASFIS Scientific Name"]==sn
        if sum(mask) > 0:
            m = species_landings[mask]
            print(sn, m["Location"].values)

Chaceon affinis ['Brazil, Uruguay']
Lutjanus analis ['USA South Atlantic and Gulf of Mexico' 'Brazil']
Mugil liza ['Venezuela' 'Brazil']
Mycteroperca bonaci ['Brazil']
Paralichthys patagonicus ['AUFZ']
Prionotus punctatus ['Brazil N' 'Brazil S']
Prionotus punctatus ['Brazil N' 'Brazil S']
Squatina guggenheim ['AUFZ, Brazil']
Engraulis encrasicolus ['Division 9.a' 'Subarea 8' 'SPN/ZoneNorthAB' 'SPS/ZSouth (R Congo)'
 'SPS/ZWest (Ghana, Togo, Benin)' 'Adreatic Sea' 'Aegean Sea' 'Black Sea'
 'Eastern Ionian Sea' 'Gulf of Lion'
 'Ligurian Sea and northern Thrrhenian Sea' 'Northern Alboran Sea'
 'Northern Spain' 'Southern Sicily']
Aristaeomorpha foliacea ['Central Med' 'Strait of Sicily' 'Western Med' 'Mozambique 1'
 'Mozambique 2']
Engraulidae ['Area 57' 'India (A&NI)' 'Malaysia (WCP)']
Loliginidae, Ommastrephidae ['Area 51' 'Area 57']
Loliginidae, Ommastrephidae ['Area 51' 'Area 57']
Loliginidae, Ommastrephidae ['Area 51' 'Area 57']
Loliginidae, Ommastrephidae ['Area 51' 'Area 57']
Paphie

In [473]:
stock_landings = pd.read_excel(os.path.join(output_dir, "stock_landings.xlsx"))

In [199]:
stock_landings[stock_landings["Proxy Species"]=="NEI"]

Unnamed: 0,Area,ASFIS Scientific Name,Location,Proxy Species,Stock Landings 2021


In [198]:
stock_landings[stock_landings["Stock Landings 2021"]==0]

Unnamed: 0,Area,ASFIS Scientific Name,Location,Proxy Species,Stock Landings 2021


In [209]:
sofia = pd.read_excel(os.path.join(input_dir, "sofia2024v2Oct31woTunasFinalchcecksMarch2024.xlsx"), sheet_name="sofia2024")

sofia = sofia.rename(columns={"Name": "ASFIS Name", "Species": "ASFIS Scientific Name", "X2021": "Status"})
sofia = sofia[["Area", "ASFIS Scientific Name", "Status"]]
sofia = sofia[sofia["Area"]!="Tuna"]

# Add tunas separately and combine
sofia_tunas = pd.read_excel(os.path.join(input_dir, "sofia2024v2Oct31woTunasFinalchcecksMarch2024.xlsx"), sheet_name="Tunas_HilarioISSF").loc[:14]
sofia_tunas = sofia_tunas.rename(columns={"Name": "ASFIS Name", "Species": "ASFIS Scientific Name", "X2021": "Status"})
sofia_tunas = sofia_tunas[["Area", "ASFIS Scientific Name", "Status"]]
sofia = pd.concat([sofia, sofia_tunas])

# Convert the multiple statuses to individual observations
sofia["Status List"] = sofia["Status"].apply(convert_status_to_list)
sofia = (
    sofia.explode("Status List")
    .drop(columns="Status")
    .rename(columns={"Status List": "Status"})
)
sofia = sofia.dropna(subset="Status")

In [213]:
sofia["Status"].unique()

array(['O', 'F', 'U', '?', 'N', 'O ((possibly Unknown))'], dtype=object)

In [222]:
sofia[sofia["Status"].isin(["O", "F", "U"])]["Area"].value_counts().to_frame().reset_index()

Unnamed: 0,Area,count
0,31,54
1,57,48
2,71,47
3,21,42
4,37,39
5,47,37
6,34,35
7,27,34
8,67,34
9,81,28


In [357]:
location_to_area["Tuna"]

{'Northern Atlantic Ocean stock': [21, 27, 31, 34],
 'Southern Atlantic Ocean Stock': [41, 47, 48],
 'Mediterranean Albacore stock': [37],
 'Eastern Atlantic Ocean and Mediterranean stock': [27, 34, 37, 47],
 'Western Atlantic Ocean stock': [21, 31, 41],
 'Atlantic Ocean': [21, 27, 31, 34, 37, 41, 47, 48],
 'Eastern Atlantic Ocean stock': [27, 34, 47],
 'Indian Ocean': [51, 57, 58],
 'North Pacific Ocean stock': [61, 67],
 'South Pacific Ocean stock': [81, 87, 88],
 'Eastern Pacific': [67, 77, 87],
 'Western and Central Pacific': [71],
 'Pacific': [61, 67, 77],
 'Southern': [47, 51, 57, 58, 71, 81, 87, 88]}

In [352]:
sofia = pd.read_excel(os.path.join(input_dir, "sofia2024v2Oct31woTunasFinalchcecksMarch2024.xlsx"), sheet_name="sofia2024")

sofia["Species"] = sofia["Species"].fillna(sofia["Name"])
sofia = sofia.rename(columns={"Name": "ASFIS Name", "Species": "ASFIS Scientific Name", "X2021": "Status"})
sofia = sofia[["Area", "ASFIS Scientific Name", "Status"]]
sofia = sofia[sofia["Area"]!="Tuna"]

# Convert the multiple statuses to individual observations
sofia["Status List"] = sofia["Status"].apply(convert_status_to_list)
sofia = (
    sofia.explode("Status List")
    .drop(columns="Status")
    .rename(columns={"Status List": "Status"})
)

# Add tunas separately and combine
# Use tuna sheet from updated_assessment_overview since it contains the locations
# These are same stocks listed in Tunas_HilarioISSF in sofia2024v2Oct31woTunasFinalchcecksMarch2024.xlsx 
# (see column U2021)
sofia_tunas = pd.read_excel(os.path.join(input_dir, "updated_assessment_overview.xlsx"), sheet_name="Tuna")
sofia_tunas["Area"] = "Tuna"
# Update missing locations so we can find areas from location to area map
tuna_mask1 = sofia_tunas["ASFIS Scientific Name"] == "Thunnus orientalis"
tuna_mask2 = sofia_tunas["ASFIS Scientific Name"] == "Thunnus maccoyii"
sofia_tunas.loc[tuna_mask1, "Location"] = "Pacific"
sofia_tunas.loc[tuna_mask2, "Location"] = "Southern"
sofia_tunas = sofia_tunas[["Area", "ASFIS Scientific Name", "Location", "Status"]]

sofia = pd.concat([sofia, sofia_tunas]).reset_index(drop=True)

In [343]:
def get_proxy_name(sn, scientific_names):
    if pd.isna(sn):
        return sn
    if sn in scientific_names:
        return sn
    elif "sp." in sn:
        return sn.replace("sp.", "spp")
    elif "Species" in sn and sn.replace("Species", "spp") in scientific_names:
        return sn.replace("Species", "spp")

    if sn.split(" ")[0] + " spp" in scientific_names:
        return sn.split(" ")[0] + " spp"

    return np.nan

In [344]:
mappings = get_asfis_mappings(input_dir, "ASFIS_sp_2024.csv")
scientific_names = mappings["ASFIS Scientific Names"]
sofia["Proxy"] = sofia["ASFIS Scientific Name"].apply(get_proxy_name, args=(scientific_names,))

In [345]:
sofia_tunas

Unnamed: 0,Area,ASFIS Scientific Name,Status
0,Tunas,Thunnus alalunga,"U,U,O"
1,Tunas,Thunnus thynnus,"F,F"
2,Tunas,Thunnus obesus,F
3,Tunas,Katsuwonus pelamis,"U,U"
4,Tunas,Thunnus albacares,F
5,Tunas,Thunnus alalunga,U
6,Tunas,Thunnus obesus,F
7,Tunas,Katsuwonus pelamis,U
8,Tunas,Thunnus albacares,F
9,Tunas,Thunnus alalunga,"U,U"


In [308]:
sofia_indices = {
    "Area 21": (46, 0, 0),
    "Area27": (40, 0, 0),
    "Area 31": (51, 0, 0),
    "Area34": (71, 0, 0),
    "Area37": (60, 0, 0),
    "Area41": (62, 0, 0),
    "Area47": (44, 0, 0),
    "Area51": (52, 0, 0),
    "Area57": (64, 0, 0),
    "Area 61": (46, 0, 0),
    "Area67": (41, 0, 0),
    "Area71": (63, 0, 0),
    "Area77": (33, 0, 0),
    "area81v2": (38, 0, 0),
    "Area87": (31, 0, 0),
    "Tunas_HilarioISSF": (19, 0, 0)
}

sofia_sheets = sofia_indices.keys()
sofia_sheet_to_area = {sheet: "".join([char for char in sheet if char.isdigit()]) for sheet in sofia_sheets}
sofia_sheet_to_area = {sheet: int(area) if area.isdigit() else area for sheet, area in sofia_sheet_to_area.items()}
sofia_sheet_to_area["area81v2"] = 81
sofia_sheet_to_area["Tunas_HilarioISSF"] = "Tuna"

sofia_file_path = os.path.join(input_dir, "sofia2024v2Oct31woTunasFinalchcecksMarch2024.xlsx")
sofia_sbn_dict = read_stock_data(sofia_file_path, sofia_indices)

# Reformat SOFIA status by number
for sheet, df in sofia_sbn_dict.items():
    sofia_sbn_dict[sheet]["Area"] = sofia_sheet_to_area[sheet]
    sofia_sbn_dict[sheet] = df[["Area", "Overfished", "Fully Fished ", "Under fished"]]
    sofia_sbn_dict[sheet] = sofia_sbn_dict[sheet].rename(
        columns={
            "Overfished": "No. of O",
            "Fully Fished ": "No. of MSF",
            "Under fished": "No. of U"
        }
    )
    sofia_sbn_dict[sheet]["No. of Sustainable"] = sofia_sbn_dict[sheet]["No. of U"] + \
                                                sofia_sbn_dict[sheet]["No. of MSF"]
    sofia_sbn_dict[sheet]["No. of Unsustainable"] = sofia_sbn_dict[sheet]["No. of O"]
    sofia_sbn_dict[sheet]["No. of Stocks"] = sofia_sbn_dict[sheet]["No. of Sustainable"] + \
                                        sofia_sbn_dict[sheet]["No. of Unsustainable"]
    
sofia_sbn = pd.DataFrame()

for sheet, df in sofia_sbn_dict.items():
    if sofia_sbn.empty:
        sofia_sbn = df.copy()
    else:
        sofia_sbn = pd.concat([sofia_sbn, df])
        
sofia_sbn = pd.concat([sofia_sbn, 
                    pd.DataFrame({"Area": "Global"}, index=[len(sofia_sbn)])])

cols_to_sum = ["No. of Stocks", "No. of U", "No. of MSF", "No. of O", "No. of Sustainable", "No. of Unsustainable"]
sofia_sbn.loc[sofia_sbn["Area"]=="Global", cols_to_sum] = sofia_sbn[cols_to_sum].sum().values

pct_cols = []
for col in cols_to_sum:
    sofia_sbn[col] = sofia_sbn[col].astype(int)
    if col != "No. of Stocks":
        pct_col = col.replace("No. ", "") + " (%)"
        pct_cols.append(pct_col)
        sofia_sbn[pct_col] = (sofia_sbn[col] / sofia_sbn["No. of Stocks"]) * 100

sbn_col_order = ["Area"] + cols_to_sum + pct_cols
sofia_sbn = sofia_sbn[sbn_col_order]

Stock assessment sheets: 100%|██████████| 16/16 [00:00<00:00, 16.47it/s]


In [277]:
stock_assessments = pd.read_excel(os.path.join(output_dir, "stock_assessments.xlsx"))

In [312]:
with open(os.path.join(input_dir, "location_to_area.json"), "r") as file:
    location_to_area = json.load(file)

In [315]:
location_to_area.get("", {}).get("Northern Atlantic Ocean stock", [])

[]

In [282]:
sl = pd.merge(species_landings, stock_assessments, on=["Area", "ASFIS Scientific Name", "Location"])

Unnamed: 0,Area,ASFIS Scientific Name,Location,1950,1951,1952,1953,1954,1955,1956,...,2018,2019,2020,2021,Location Match,ISSCAAP Code,ASFIS Name,Tier,Status,Uncertainty


In [321]:
location_to_area["Sharks"]

{'Global/All areas': [21,
  27,
  31,
  34,
  37,
  41,
  47,
  48,
  51,
  57,
  58,
  61,
  67,
  71,
  77,
  81,
  87,
  88],
 'Atlantic North': [21, 27],
 'Atlantic South': [41, 47],
 'Atlantic Northeast': [27],
 'Atlantic Northwest': [21],
 'Atlantic Southern': [41, 47],
 'Indian Ocean': [51, 57],
 'Indian Ocean, Eastern': [57],
 'Indian Ocean, Western': [51],
 'Mediterranean and Black Sea': [37],
 'Pacific Central': [71, 77],
 'Pacific North': [61],
 'Pacific South': [81, 87],
 'Pacific Southeast': [87],
 'Pacific Eastern Central': [77],
 'Pacific Southern': [81, 87],
 'Pacific Western Central': [71],
 'Pacific  Western Central': [71]}

In [283]:
aquaculture = pd.read_csv(os.path.join(input_dir,"global_aquaculture_production.csv"))

In [284]:
aquaculture.columns

Index(['Country (Name)', 'ASFIS species (Name)', 'ASFIS species (Code)',
       'ASFIS species (Scientific name)', 'FAO major fishing area (Code)',
       'Environment (Name)', 'Unit (Name)', 'Unit', '[1950]', 'S',
       ...
       '[2018]', 'S.68', '[2019]', 'S.69', '[2020]', 'S.70', '[2021]', 'S.71',
       '[2022]', 'S.72'],
      dtype='object', length=154)

In [305]:
def compare_status_by_number(update, previous):
    comparison = pd.merge(
        update, previous, on="Area", how="left", suffixes=("_update", "_previous")
    )

    new_columns = []

    for col in comparison.columns:
        if col == "Area":
            new_columns.append(("", col))  # Keeping Area as a separate category
        elif col.endswith("_previous"):
            new_columns.append(
                ("Previous SoSI Categories", col.replace("_previous", ""))
            )
        elif col.endswith("_update"):
            new_columns.append(("Updated SoSI Categories", col.replace("_update", "")))
            
    print(comparison.columns)
    print(new_columns)

    comparison.columns = pd.MultiIndex.from_tuples(new_columns)

    return comparison

def compute_status_by_number(data, group):
    grouped = (
        data.groupby(group)
        .agg(
            **{
                "No. of stocks": (group, "size"),
                "No. of U": ("Status", lambda x: (x == "U").sum()),
                "No. of MSF": ("Status", lambda x: (x == "M").sum()),
                "No. of O": ("Status", lambda x: (x == "O").sum()),
                "No. of Sustainable": (
                    "Status",
                    lambda x: ((x == "U") | (x == "M")).sum(),
                ),
                "No. of Unsustainable": ("Status", lambda x: (x == "O").sum()),
                "U (%)": ("Status", lambda x: (x == "U").mean() * 100),
                "MSF (%)": ("Status", lambda x: (x == "M").mean() * 100),
                "O (%)": ("Status", lambda x: (x == "O").mean() * 100),
                "Sustainable (%)": (
                    "Status",
                    lambda x: ((x == "M") | (x == "U")).mean() * 100,
                ),
                "Unsustainable (%)": ("Status", lambda x: (x == "O").mean() * 100),
            }
        )
        .reset_index()
    )

    # Add a final row with total values
    totals = pd.DataFrame(
        {
            group: ["Global"],
            "No. of stocks": [data.shape[0]],
            "No. of U": [(data["Status"] == "U").sum()],
            "No. of MSF": [(data["Status"] == "M").sum()],
            "No. of O": [(data["Status"] == "O").sum()],
            "No. of Sustainable": [data["Status"].isin(["U", "M"]).sum()],
            "No. of Unsustainable": [(data["Status"] == "O").sum()],
            "U (%)": [((data["Status"] == "U").sum() / data.shape[0]) * 100],
            "MSF (%)": [((data["Status"] == "M").sum() / data.shape[0]) * 100],
            "O (%)": [((data["Status"] == "O").sum() / data.shape[0]) * 100],
            "Sustainable (%)": [
                (
                    ((data["Status"] == "M") | (data["Status"] == "U")).sum()
                    / data.shape[0]
                )
                * 100
            ],
            "Unsustainable (%)": [
                ((data["Status"] == "O").sum() / data.shape[0]) * 100
            ],
        }
    )

    return pd.concat([grouped, totals], ignore_index=True)


In [286]:
sbn = compute_status_by_number(stock_assessments, "Area")

In [435]:
def compute_percent_coverage(
    stock_landings,
    fishstat,
    areas,
    assessment="Update",
    key="ASFIS Scientific Name",
    landings_key="Stock Landings 2021",
    tier=None,
    extra_stocks_map={},
    year=2021,
    location_to_area={}
):
    if tier:
        if tier == "Missing":
            dwl = stock_landings[stock_landings["Tier"].isna()]
        else:
            dwl = stock_landings[stock_landings["Tier"] == tier]
    else:
        dwl = stock_landings.copy()

    percent_coverage = {}

    for area in areas:
        coverage = dwl[dwl["Area"]==area][landings_key].sum()
        
        # See if additional unassessed stocks need to be added to coverage
        extra_stocks_added = []
        if assessment in extra_stocks_map and area in extra_stocks_map[assessment]:
            extra_stocks_tiers = extra_stocks_map[assessment][area]
            
            for t, extra_stocks in extra_stocks_tiers.items():
                if not tier or t == tier:
                    extra_stocks_mask = fishstat[key].isin(extra_stocks)
                    area_mask = fishstat["Area"] == area
                    
                    coverage += fishstat[extra_stocks_mask&area_mask][year].sum()
                    
                    extra_stocks_added += list(extra_stocks)
                    
        # Check if tuna landings need to be added back into area        
        for idx, row in dwl[dwl["Area"] == "Tuna"].iterrows():
            # Make sure not to double count stocks
            if row[key] not in extra_stocks_added:
                areas = location_to_area["Tuna"][row["Location"]]

                if area in areas:
                    coverage += fishstat[
                        (fishstat["ASFIS Scientific Name"] == row[key])
                        & (fishstat["Area"] == area)
                    ][year].sum()

        # Add salmon to Area 67
        if area == 67 and "Salmon" in dwl["Area"].unique():
            salmon_coverage = dwl[(dwl["Area"] == "Salmon")][landings_key].sum()
            coverage += salmon_coverage

        # Check if sharks need to be added to area
        for idx, row in dwl[dwl["Area"] == "Sharks"].iterrows():
            # Make sure not to double count stocks
            if row[key] not in extra_stocks_added:
                areas = location_to_area["Sharks"][row["Location"]]

                if area in areas:
                    coverage += fishstat[
                        (fishstat["ASFIS Scientific Name"] == row["ASFIS Scientific Name"])
                        & (fishstat["Area"] == area)
                    ][year].sum()
        
        # Calculate area's total landings
        total_area_mask = fishstat["Area"].isin([48,58,88]) if area == "48,58,88" else fishstat["Area"]==area
        total_landings = fishstat[total_area_mask][year].sum()
        
        # Add to global total
        if "Global" not in percent_coverage:
            percent_coverage["Global"] = {}
            percent_coverage["Global"]["Coverage"] = coverage
            percent_coverage["Global"]["Total Landings"] = total_landings
        else:
            percent_coverage["Global"]["Coverage"] += coverage
            percent_coverage["Global"]["Total Landings"] += total_landings

        percent_coverage[area] = coverage / total_landings * 100

    percent_coverage["Global"] = (
        percent_coverage["Global"]["Coverage"]
        / percent_coverage["Global"]["Total Landings"]
        * 100
    )
    
    return pd.DataFrame(
        percent_coverage.items(), columns=["Area", f"Coverage (%) {assessment}"]
    )

In [366]:
areas = [area for area in stock_landings["Area"].unique() if isinstance(area, int) or area == "48,58,88"]

In [407]:
with open(os.path.join(input_dir, "ISSCAAP_to_NEI.json"), "w") as file:
    json.dump(isscaap_to_nei, file)

In [427]:
area71_extras = pd.read_excel(os.path.join(input_dir, "updated_assessment_overview.xlsx"))
area71_extras = area71_extras.rename(
    columns={"More appropriate ASFIS Scientific Name": "Check",
             "Scientific name ASFIS": "ASFIS Scientific Name"}
)
area71_extras_mask = area71_extras["Check"] == "to ignore"
area71_extras = area71_extras[area71_extras_mask]

area71_tier1_mask = area71_extras["Tier"] == 1
area71_no_tier_mask = area71_extras["Tier"].isna()

extra_stocks_map = {
    "Update": {
        71: {
            "Tier 1": area71_extras[area71_tier1_mask]["ASFIS Scientific Name"].values,
            "Missing": area71_extras[area71_no_tier_mask]["ASFIS Scientific Name"].values,
        }
    }
}

In [476]:
compute_percent_coverage(stock_landings, fishstat, areas, location_to_area=location_to_area, extra_stocks_map=extra_stocks_map)

Unnamed: 0,Area,Coverage (%) Update
0,Global,85.866404
1,21,87.875003
2,27,88.246094
3,31,77.17632
4,34,74.172215
5,37,67.654526
6,41,83.978753
7,47,95.581597
8,51,92.148003
9,57,95.237234


In [444]:
sofia_landings = pd.read_excel(os.path.join(output_dir, "sofia_landings.xlsx"))

In [448]:
sofia_landings.loc[sofia_landings["Area"].isin([48,58,88]), "Area"] = "48,58,88"

In [449]:
compute_percent_coverage(sofia_landings, fishstat, areas, assessment="Previous", landings_key=2021, key="Proxy", location_to_area=location_to_area)

Unnamed: 0,Area,Coverage (%) Previous
0,Global,65.587482
1,21,60.763422
2,27,81.290613
3,31,61.795931
4,34,68.651355
5,37,76.002335
6,41,69.587475
7,47,82.554358
8,51,71.996675
9,57,43.829164


In [461]:
def compute_percent_coverage_tiers(stock_landings, fishstat, areas, extra_stocks_map={}, location_to_area={}):
    pc_tier1 = compute_percent_coverage(stock_landings, fishstat, areas, tier=1, extra_stocks_map=extra_stocks_map, location_to_area=location_to_area)
    pc_tier2 = compute_percent_coverage(stock_landings, fishstat, areas, tier=2, extra_stocks_map=extra_stocks_map, location_to_area=location_to_area)
    pc_tier3 = compute_percent_coverage(stock_landings, fishstat, areas, tier=3, extra_stocks_map=extra_stocks_map, location_to_area=location_to_area)
    pc_no_tier = compute_percent_coverage(stock_landings, fishstat, areas, tier="Missing", extra_stocks_map=extra_stocks_map, location_to_area=location_to_area)

    pc_update = pd.merge(
        pc_tier1, pc_tier2, on="Area", how="inner", suffixes=("_tier1", "_tier2")
    )
    pc_update = pd.merge(
        pc_update, pc_tier3, on="Area", how="inner", suffixes=("", "_tier3")
    )
    pc_update = pd.merge(
        pc_update, pc_no_tier, on="Area", how="inner", suffixes=("", "_no_tier")
    )

    pc_update.rename(
        columns={
            "Coverage (%) Update_tier1": "Tier 1",
            "Coverage (%) Update_tier2": "Tier 2",
            "Coverage (%) Update": "Tier 3",
            "Coverage (%) Update_no_tier": "No Tier",
        },
        inplace=True,
    )

    pc_update["Total"] = (
        pc_update["Tier 1"]
        + pc_update["Tier 2"]
        + pc_update["Tier 3"]
        + pc_update["No Tier"]
    )

    tuples = [("", "Area")] + [
        ("Coverage (%)", col) for col in pc_update.columns if col != "Area"
    ]
    pc_update.columns = pd.MultiIndex.from_tuples(tuples)

    return pc_update

In [475]:
stock_landings[(stock_landings["ISSCAAP Code"]==63)]

Unnamed: 0,Area,ASFIS Scientific Name,Location,Proxy Species,Stock Landings 2021,ISSCAAP Code,ASFIS Name,Tier,Status,Uncertainty
42,21,Halichoerus grypus,Atlantic,,,63.0,Grey seal,1,M,L
92,21,Pagophilus groenlandicus,Atlantic,,81425.0,63.0,Harp seal,1,M,L


In [474]:
stock_landings = pd.merge(stock_landings, stock_assessments, on=["Area", "ASFIS Scientific Name", "Location"])

In [462]:
pc_tiers = compute_percent_coverage_tiers(stock_landings,
                                          fishstat,
                                            areas,
                                            extra_stocks_map=extra_stocks_map,
                                            location_to_area=location_to_area)

In [463]:
pc_tiers

Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage (%),Coverage (%),Coverage (%),Coverage (%),Coverage (%)
Unnamed: 0_level_1,Area,Tier 1,Tier 2,Tier 3,No Tier,Total
0,Global,56.605372,18.13093,8.923414,2.1618,85.821515
1,21,65.787767,22.087236,0.0,0.0,87.875003
2,27,84.280322,3.79234,0.173432,0.0,88.246094
3,31,51.144221,26.0321,0.0,0.0,77.17632
4,34,66.100674,8.071541,0.0,0.0,74.172215
5,37,67.654526,0.0,0.0,0.0,67.654526
6,41,77.826186,6.152567,0.0,0.0,83.978753
7,47,74.64318,4.114494,16.823924,0.0,95.581597
8,51,53.074104,25.151073,13.922825,0.0,92.148003
9,57,55.061923,34.118646,6.056664,0.0,95.237234


In [370]:
fishstat[fishstat["Area"].isin([48,58,88])][2021].sum()

386662.68000000005

In [382]:
def specify_area(row, location_to_area):
    area, loc = row["Area"], row["Area"]
    
    if area in location_to_area:
        return location_to_area[area].get(loc, area)
    elif area == "48,58,88":
        try:
            sp_area_str = area.split(".")[0]
            return int(sp_area_str)
        except ValueError:
            message = f"Could not cast {sp_area_str} to int for location {loc} in area {area}"
            raise ValueError(message)
    else:
        return area

In [383]:
weights["Area Specific"] = weights[["Area", "Location"]].apply(specify_area, args=(location_to_area,), axis=1)

In [386]:
for loc in weights[weights["Area"]=="48,58,88"]["Location"]:
    print(loc, type(loc))

48.3 <class 'str'>
58.5.2 <class 'str'>
48.2 <class 'str'>
48.3 <class 'str'>
48.4 <class 'str'>
58.4.3a <class 'str'>
58.4.4a <class 'str'>
58.4.4b <class 'str'>
58.5.1 <class 'str'>
58.5.2 <class 'str'>
58.6 <class 'str'>
48.1 <class 'str'>
48.2 <class 'str'>
48.4 <class 'str'>
48.6 <class 'str'>
58.4.1 <class 'str'>
58.4.2 <class 'str'>
58.4.3b <class 'str'>
88.1 <class 'str'>
88.2 <class 'str'>
48 <class 'str'>
58.4.1 <class 'str'>
58.4.2 <class 'str'>


In [389]:
weights

Unnamed: 0,Area,ASFIS Scientific Name,Location,Weight 1,Weight 2,Normalized Weight
0,21,Amblyraja radiata,Gulf of Maine,,8.260000e+03,1.0
1,21,Anarhichas lupus,Gulf of Maine / Georges Bank,,1.509000e+03,1.0
2,21,Anguilla rostrata,Maritimes,,5.000000e+01,1.0
3,21,Arctica islandica,Atlantic Coast,,2.113000e+03,1.0
4,21,Brevoortia tyrannus,Atlantic Coast,,1.463344e+06,1.0
...,...,...,...,...,...,...
2600,Tuna,Thunnus obesus,Indian Ocean,,,1.0
2601,Tuna,Thunnus obesus,Western and Central Pacific,,,1.0
2602,Tuna,Thunnus orientalis,Pacific,,,1.0
2603,Tuna,Thunnus thynnus,Eastern Atlantic Ocean and Mediterranean stock,,,1.0


In [372]:
stock_landings[stock_landings["Area"]=="48,58,88"]

Unnamed: 0,Area,ASFIS Scientific Name,Location,Proxy Species,Stock Landings 2021
2430,485888,Champsocephalus gunnari,48.3,,19.48
2431,485888,Champsocephalus gunnari,58.5.2,,193.16
2432,485888,Dissostichus eleginoides,48.2,,203.242222
2433,485888,Dissostichus eleginoides,48.3,,203.242222
2434,485888,Dissostichus eleginoides,48.4,,203.242222
2435,485888,Dissostichus eleginoides,58.4.3a,,1022.442222
2436,485888,Dissostichus eleginoides,58.4.4a,,1022.442222
2437,485888,Dissostichus eleginoides,58.4.4b,,1022.442222
2438,485888,Dissostichus eleginoides,58.5.1,,1022.442222
2439,485888,Dissostichus eleginoides,58.5.2,,1022.442222


In [376]:
fishstat[(fishstat["Area"]==58)&(fishstat["ASFIS Scientific Name"]=="Euphausia superba")]

Unnamed: 0,ISO3,3-alpha code,Area,Unit,1950,S,1951,S.1,1952,S.2,...,S.68,2019,S.69,2020,S.70,2021,S.71,2022,S.72,ASFIS Scientific Name
3698,CHN,KRI,58.0,Tonnes - live weight,0.0,...,0.0,...,0.0,...,...,,0.0,,0.0,,0.0,,0.0,,Euphausia superba
9090,FRA,KRI,58.0,Tonnes - live weight,0.0,...,0.0,...,0.0,...,...,...,0.0,...,0.0,...,0.0,...,0.0,...,Euphausia superba
13645,JPN,KRI,58.0,Tonnes - live weight,0.0,...,0.0,...,0.0,...,...,...,0.0,...,0.0,...,0.0,...,0.0,...,Euphausia superba
14675,KOR,KRI,58.0,Tonnes - live weight,0.0,...,0.0,...,0.0,...,...,,0.0,,0.0,,0.0,...,0.0,...,Euphausia superba
24144,SUN,KRI,58.0,Tonnes - live weight,0.0,...,0.0,...,0.0,...,...,...,0.0,...,0.0,...,0.0,...,0.0,...,Euphausia superba
