# Model data for shark and ray meat landings and trade applied to 2012-2017 data

In [15]:
#!/usr/bin/env python
# coding: utf-8

# # Model for shark and ray meat landings and trade applied to 2014-2019 data

# In[1]:


import os
import pdb

import arviz as az
import matplotlib as mp
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc as pm
import pytensor.tensor as pyt
import rdata as rd
import seaborn as sns
import xarray as xr
import scipy as sp
from matplotlib.gridspec import GridSpec

# Set figure style.
az.style.use("arviz-darkgrid")
bd = os.getcwd() + "/../Data/"

# Helper functions
def indexall(L):
    poo = []
    for p in L:
        if not p in poo:
            poo.append(p)
    Ix = np.array([poo.index(p) for p in L])
    return poo, Ix


# Helper functions
match = lambda a, b: np.array([b.index(x) if x in b else None for x in a])


def unique(series: pd.Series):
    "Helper function to sort and isolate unique values of a Pandas Series"
    return series.sort_values().unique()


## Load landings data

In [16]:
# In[83]:

#'''
#dnam = bd + "/fishorshark/modeldatsharksraysImportNeg1augmented"
dnam = bd + "modeldatsharksrays_land_trade_augmented"
parsed = rd.parser.parse_file(dnam + ".RData")
converted = rd.conversion.convert(parsed)
tmp = converted["modeldat"]
#'''

#'''
# Matrices
speciesCountryIDMap = (
    tmp["speciesCountryIDMap"]
    .rename({"dim_0": "species", "dim_1": "country", "dim_2": "taxon"})
    .transpose("country", "species", "taxon")
    .sortby("country")
)

logProbPrior = (
    tmp["logProbPrior"]
    .rename({"dim_0": "species", "dim_1": "country", "dim_2": "taxon"})
    .transpose("country", "species", "taxon")
    .sortby("country")
)

priorImportance = (
    tmp["priorImportance"]
    .rename({"dim_0": "species", "dim_1": "country"})
    .transpose("country", "species")
    .sortby("country")
)

'''
dnam = bd + "modeldatsharksrays"
parsed = rd.parser.parse_file(dnam + ".RData")
converted = rd.conversion.convert(parsed)
tmp2 = converted["modeldat"]
#'''


OutputSpeciesCountryMapFull = (
    tmp["OutputSpeciesCountryMapFull"]
    .rename({"dim_0": "species", "dim_1": "country"})
    .transpose("country", "species")
    .sortby("country").sel(species=priorImportance.species)
)



OutputSpeciesCountryMap = (
    tmp["OutputSpeciesCountryMap"]
    .rename({"dim_0": "species", "dim_1": "country"})
    .transpose("country", "species")
    .sortby("country").sel(species=priorImportance.species)
)

SpeciesCommodityMap = (
    tmp["speciesCommodities"]
    .rename({"dim_0": "species", "dim_1": "commodity"})
    .sortby("species").sel(species=priorImportance.species)
)

# Grab key for shark/ray
srkey = pd.read_csv(bd + "taxonomy_20240205.csv")
srkey['group'] = srkey.Superorder.replace('Batoidea','rays').replace('Selachimorpha','sharks').to_numpy()


## Ensure available taxon matches for reported species

In [17]:

#"""
# Initialize species to taxon mapping
SpeciesTaxonMAP = speciesCountryIDMap[0].drop_vars('country')

# Match taxa to species level regardless of taxonomic level of aggregation
for t in speciesCountryIDMap.taxon.values:
    # Iterate over possible species for each taxon
    for s in srkey[srkey.isin([t]).any(axis=1)]['species_binomial'].values:
        try:
            SpeciesTaxonMAP.loc[dict(species=s,taxon=t)] = 1
        except:
            pass

# List of rarely caught species 
drop_spp = priorImportance.species.to_numpy()[priorImportance.max(["country"])<=1]
# Number of species remaining with prior importance less than or equal to 1
priorImportance.species.shape[0]-drop_spp.shape[0]
# Make temporary list of all taxon IDs
tmp_taxon_ = speciesCountryIDMap.taxon.to_numpy()
# Index taxons relative to what gets landed
tmp_TaxonIDx = match(tmp["LandingsID"], list(tmp_taxon_))
# Temporary list of all species IDs
tmp_species_ = speciesCountryIDMap.species.to_numpy()
# Boolean of taxons that are to species level in observed landings
tmp_tindx = pd.Series(tmp_taxon_[tmp_TaxonIDx]).str.count(" ").to_numpy() == 0
# Index of species in taxon that are observed as catches
tmp_species_spp_id = match(tmp_taxon_[tmp_TaxonIDx[tmp_tindx == 0]], list(tmp_species_))
# Unique names of species in taxon that are observed as taxon catches but have prior importance <=1
tmp_spp = np.unique(tmp_species_[tmp_species_spp_id[np.log1p(tmp["allCatch"])[tmp_tindx == 0]>0]])
# Species in drop list that are actually observed as in taxon+species (taxon) list
tmp_spp = list(drop_spp[np.array([x in tmp_spp for x in drop_spp])])

# Grab landings data to see which taxons have catch
tmp_landings = tmp["allCatch"]
tmp_taxon = tmp["LandingsID"]
tmp_country = tmp["country"]
# Iterate over landings to ensure species are avaiable for taxon in country
for l,t,c in zip(tmp_landings,tmp_taxon,tmp_country):
    # Look for species landed with impossible priors
    try:
        if (priorImportance.sel(country=c,species=t)==-999)*(l>0):
            priorImportance.loc[dict(country=c,species=t)] = 2
            tmp_spp += [t]
            #print("Changed "+c+" "+t)
    # Look for taxon landed with no possible species 
    except:
        # Possible species for taxon
        tax_spp = (SpeciesTaxonMAP.sel(taxon=t).species[SpeciesTaxonMAP.sel(taxon=t)==1]).values
        # If all species are impossible for taxon
        if (priorImportance.sel(country=c,species=tax_spp).mean()==-999)*(l>0):
            # Assign to possible species in nation
            if OutputSpeciesCountryMapFull.sel(country=c,species=tax_spp).sum()>0:
                axx = tax_spp[OutputSpeciesCountryMapFull.sel(country=c,species=tax_spp).to_numpy()>0]
                xflax = 'ok'
            else:
                # Assign to most likely spp given global max
                axx = tax_spp[(priorImportance.sel(species=tax_spp).max('country')==priorImportance.sel(species=tax_spp).max())]
                xflax = 'not present'
            priorImportance.loc[dict(country=c,species=axx)] = 2
            tmp_spp += list(axx)
            #print("Impossible "+t+" changed "+c+", spp are "+xflax)
            #print(axx)
        
        # If all species for taxon are below data-reduction cutoff
        elif (priorImportance.sel(country=c,species=tax_spp).max()<2)*(l>0):
            # Assign to possible species in nation
            if OutputSpeciesCountryMapFull.sel(country=c,species=tax_spp).sum()>0:
                axx = tax_spp[OutputSpeciesCountryMapFull.sel(country=c,species=tax_spp).to_numpy()>0]
                xflax = 'ok'
            else:
                # Assign to most likely spp given global max
                axx = tax_spp[(priorImportance.sel(species=tax_spp).max('country')==priorImportance.sel(species=tax_spp).max())]
                xflax = 'not present'
            priorImportance.loc[dict(country=c,species=axx)] = 2
            tmp_spp += list(axx)
            #print("Low "+t+" changed "+c+", spp are "+xflax)
            #print(axx)

# Add in species in field data
field_tmp = np.array(['Atlantoraja cyclhophora', 'Bathtoshia centroura',
       'Callorynchus callorynchus',
       'Dasyatis hypostigma', 'Fontitrygon geijskesi',
       'Hypanus bethalutzae', 'Mobula hypostoma', 'Narcine brasiliensis',
       'Pseudobatos horkelii', 'Pteroplatytrygon violacae',
       'Rhinoptera brasilisensis', 'Rioraja agassizi',
       'Scyliorhinus haekelii', 'Squalus albicaudus', 'Squatina occulta',
       'Zapteryx brevirostris'])
for s in field_tmp:
    if s not in tmp_spp:
        tmp_spp+=[s]

# Species to drop = have prior importance <=1 AND are not actually observed as taxon to the species level
drop_spp = drop_spp[np.array([x not in tmp_spp for x in drop_spp])]

#"""
# Drop rare and unreported species
speciesCountryIDMap = speciesCountryIDMap.drop_sel(species=drop_spp)
priorImportance = priorImportance.drop_sel(species=drop_spp)
logProbPrior = logProbPrior.drop_sel(species=drop_spp)
SpeciesCommodityMap = SpeciesCommodityMap.drop_sel(species=drop_spp)
# Add in unreporting countries

# = = = = = = = = = = = = = = After species drop = = = = = = = = = = = = = #

# Vectors
allCatch = tmp["allCatch"]
cindx = allCatch>0
allCatch = allCatch[cindx]
logCatch = np.log1p(tmp["allCatch"])[cindx]
species_ = logProbPrior.species.to_numpy()
country_ = logProbPrior.country.to_numpy()
CountryIDx = match(tmp["country"][cindx], list(country_))
year_ = ["year_" + str(x) for x in np.unique(tmp["year"][cindx]).astype(int)]
YearIDx = match(tmp["year"][cindx], list(np.unique(tmp["year"][cindx]).astype(int)))
taxon_ = logProbPrior.taxon.to_numpy()
TaxonIDx = match(tmp["LandingsID"][cindx], list(taxon_))
speciesCountryMap = speciesCountryIDMap.groupby("species").max("taxon")
TaxonPRIOR = priorImportance.to_numpy()

# Meat species
#meat_mask = SpeciesCommodityMap.sel(commodity='meat',species=species_).to_numpy()
meat_mask = 1*(SpeciesCommodityMap.sel(commodity=('fins'),species=species_)+SpeciesCommodityMap.sel(commodity=('meat'),species=species_)>0).to_numpy()
meat_mask[meat_mask==0] = -999
meat_mask[meat_mask==1] = 0

## Set up split data

In [18]:
# Calculate average total catch for target of softmax
TotalCatch = np.array([allCatch[CountryIDx==i].sum() for i in range(speciesCountryIDMap.shape[0])])/len(year_)
logTotalCatch = np.log(TotalCatch)

In [19]:
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = #

# Make group vector
group_ = srkey.group[match(species_,list(srkey.species_binomial))].to_numpy()
# Make masks for trade sums
shark_mask = 1*(group_=='sharks')
ray_mask = 1*(group_=='rays')

# Grab only aggregated taxa
taxon_shortlist = taxon_[pd.Series(taxon_).str.count(" ").to_numpy() == 0]

# Split data index
tindx = pd.Series(taxon_[TaxonIDx]).str.count(" ").to_numpy() == 0
# Split landings
logReported_species_landings = logCatch[tindx == 0]
logReported_taxon_landings = logCatch[tindx == 1]
# Split country index
country_spp_id = CountryIDx[tindx == 0]
country_tax_id = CountryIDx[tindx == 1]
# Split year index
year_spp_id = YearIDx[tindx == 0]
year_tax_id = YearIDx[tindx == 1]
# Split species index
species_spp_id = match(taxon_[TaxonIDx[tindx == 0]], list(species_))
# Split taxon index
# taxon_tax_id = TaxonIDx[tindx==1]
taxon_tax_id = match(taxon_[TaxonIDx[tindx == 1]], list(taxon_shortlist))



# Make dataframes for later
sdata = pd.DataFrame(
    {
        "logReported_species_landings": logReported_species_landings,
        "species_spp_id": species_spp_id,
        "country_spp_id": country_spp_id,
        "year_spp_id": year_spp_id,
        "country": country_[country_spp_id],
        "year": np.array(year_)[year_spp_id],
        "species": species_[species_spp_id],
    }
)
txdata = pd.DataFrame(
    {
        "logReported_taxon_landings": logReported_taxon_landings,
        "taxon_tax_id": taxon_tax_id,
        "country_tax_id": country_tax_id,
        "year_tax_id": year_tax_id,
        "country": country_[country_tax_id],
        "year": np.array(year_)[year_tax_id],
        "taxon": taxon_shortlist[taxon_tax_id],
    }
)
txdata = txdata.loc[match(txdata.taxon, list(taxon_shortlist)) != None]

## Set up masking

In [20]:
# Initial mask of species matched to possible taxon
InitTaxonMASK = speciesCountryIDMap.copy()
# Create empty mask to store observed taxa as possible
TaxonMASK = InitTaxonMASK*0

In [21]:
# Reported taxa by country
Obs_tax_data = np.exp(txdata.drop(columns=['year','year_tax_id','country_tax_id','taxon_tax_id']
          ).groupby(['country','taxon']).mean()).rename(columns={"logReported_taxon_landings": "Reported_landings"})

In [22]:
# Iteratre over countries
for c in country_:
    try:
        # Iterate over observed taxa and make possible
        taxes = Obs_tax_data.loc[c].index.values
        for t in taxes:
            for s in species_:
                TaxonMASK.loc[dict(country=c,species=s,taxon=t)]=InitTaxonMASK.loc[dict(country=c,species=s,taxon=t)]
    except:
        #print(c)
        pass

In [23]:
# Make priors using relative odds to proportion total landings
SppPRIOR = priorImportance.to_numpy()
SppPRIORadj = SppPRIOR.copy()
# Re-weight to log-odds scale
SppPRIORadj[SppPRIORadj==-2]=-4.5
SppPRIORadj[SppPRIORadj==-1]=-3.5
SppPRIORadj[SppPRIORadj==0]=-2.5
SppPRIORadj[SppPRIORadj==1]=-1
SppPRIORadj[SppPRIORadj==2]=2
SppPRIORadj[SppPRIORadj==3]=5

# Cut down taxon MASK to match taxon_shortlist dimensions
#TaxonMASK_S = TaxonMASK[:, :, match(taxon_shortlist, list(taxon_))]
TaxonMASK_S = TaxonMASK.sel(taxon=taxon_shortlist)
TaxonMASK_Sx = TaxonMASK_S.to_numpy()

# Negative mask for log-odds zeros
negval = -9
TaxonMASK_NEG = TaxonMASK_S.copy().to_numpy()
TaxonMASK_NEG[TaxonMASK_NEG==0] = negval


# Species weight for countries with no aggregations - huge log-odds so p(species ID)=1 where needed
NoTaxaSppWT = np.zeros(SppPRIOR.shape)
NoTaxaSppWT[(TaxonMASK_NEG != negval).sum(1).sum(1) == 0, :] = abs(negval)

## Match landings and trade data

In [24]:
# Make fdata table to merge with trade model
fdata = pd.DataFrame(
    {
        "year": YearIDx + 2012,
        "country_code": country_[CountryIDx],
        "species": taxon_[TaxonIDx],
        "landed_weight": allCatch,
    }
)

# Add shark/ray group for each taxon in landings
tmp_taxon = fdata.species.unique()
tmp_group = []

for tx in tmp_taxon:
    # taxon at species level
    if tx in srkey.species_binomial.to_numpy():
        tmp_group += [srkey.group[srkey.species_binomial==tx].values[0]]
    elif tx in srkey.Genus.to_numpy():
        tmp_group += [srkey.group[srkey.Genus==tx].values[0]]
    elif tx in srkey.Family.to_numpy():
        tmp_group += [srkey.group[srkey.Family==tx].values[0]]
    elif tx in srkey.Order.to_numpy():
        tmp_group += [srkey.group[srkey.Order==tx].values[0]]
    elif tx in ['Sphyrnidae','Selachimorpha']:
        tmp_group += ['sharks']
    elif tx in ['Elasmobranchii']:
        tmp_group += ['elasmos']
    else:
        print(tx)
fdata['group'] = np.array(tmp_group)[match(fdata.species,list(tmp_taxon))]



## CHECK THAT ADDITIONAL ELASMOS ARE OK WITH RE-EXPORT CALCULATIONS. 
## CHECK ALL LANDINGS AND TRADE DATA TO ENSURE SAME YEAR-LEVEL OBSERVATIONS


# ## Import commodity code table

# Import taxonomic match table for BACI commodity codes and species (MASK)
cdata = pd.read_csv(bd + "comm.code.taxon.match.csv")

# ## Load BACI keys

# Import BACI commodity code key
ckey = pd.read_csv(bd + "product_codes_HS12_V202102.csv")

# Import BACI country keys
kdata = pd.read_csv(bd + "country_codes_V202301.csv")
kdata.country_code = kdata.country_code.values.astype(int)

# TWN doesn't have an ISO code
kdata.loc[
    kdata.country_name_full == "Other Asia, not elsewhere specified", "iso_3digit_alpha"
] = "TWN"


# ## Load BACI seafood trade

# Import overall trade from BACI data
odata = pd.read_csv(bd + "baci.seafood_12-19_ij_all.csv")

# Make them numeric
odata.exporter_i = odata.exporter_i.values.astype(int)
odata.importer_j = odata.importer_j.values.astype(int)

# Add country codes

odata["ISOex_i"] = kdata.iso_3digit_alpha.values[
    match(list(odata.exporter_i.values), list(kdata.country_code.values))
]
odata["ISOim_j"] = kdata.iso_3digit_alpha.values[
    match(list(odata.importer_j.values), list(kdata.country_code.values))
]


# ## Load BACI meat trade

# Import BACI data
tdata = pd.read_csv(bd + "baci.elasmo_HS12_2012-2017.csv")
tdata.head()

# Make them numeric
tdata.exporter_i = tdata.exporter_i.values.astype(int)
tdata.importer_j = tdata.importer_j.values.astype(int)

# Temporary change of code for Italy
tdata.loc[tdata.exporter_i == 381, "exporter_i"] = 380
tdata.loc[tdata.importer_j == 381, "importer_j"] = 380

# Add country names for imports/exports
tdata["ISOex_i"] = kdata.iso_3digit_alpha.values[
    match(list(tdata.exporter_i.values), list(kdata.country_code.values))
]
tdata["ISOim_j"] = kdata.iso_3digit_alpha.values[
    match(list(tdata.importer_j.values), list(kdata.country_code.values))
]


# Add commodity group
tdata['group'] = tdata["hscode_k"].replace({30281: "sharks", 30282: "rays", 30381: "sharks", 30382: "rays"})

In [25]:
### Reset biggest_countries to include only those with landings
biggest_countries = country_

# - - - - - - - - - - - Add BACI total seafood trade
total_seafood_trade = (
    odata[
        ((odata.ISOex_i).isin(biggest_countries))
        & ((odata.ISOim_j).isin(biggest_countries))
    ]
    .groupby(["ISOex_i", "ISOim_j"])
    .sum()["ij_total"]
    .reset_index()
    .set_index("ISOex_i")
    .pivot(columns="ISOim_j")
    .droplevel(0, axis="columns")
    .fillna(0.0)
)

## Deal with re-exports

In [26]:

# # REEXPORTS - NEED TO CHECK THAT RAYS ARE COOL HERE TOO
#
# Currently removes trade that has no possible catch.
#
# NB:
#
# 1. Assumes catches in year t are traded in year t

# Pre-removals copy
tdata_copy = tdata.copy()

# Empty list of identified re-exports
tmp = []
# Unique country list
tmp_c = np.unique(np.array(list(tdata.ISOex_i)+list(tdata.ISOim_j)))
# Unique commodity codes
tmp_u = tdata.group.unique()
# Landings per country per year per commodity
tmp_l = fdata.groupby(["country_code","year","group"]).sum().reset_index().sort_values("landed_weight", ascending=False)

# ====================== Remove re-exports from trade =========================== #
# Iterate over years
for y in tdata.year_t.unique():
    # Grab values for year y 
    trad_ = tdata[tdata.year_t==y].groupby(["ISOex_i",'year_t','hscode_k']).sum().reset_index().sort_values("estimated_live_weight", ascending=False)
    # Grab total trade
    trad = trad_.groupby(["ISOex_i"]).sum().reset_index().sort_values("estimated_live_weight", ascending=False)
    # Grab group trade
    trad_s = trad[trad.group=='sharks']
    trad_r = trad[trad.group=='rays']
    # Grab total landings for year y
    land = tmp_l[tmp_l.year==y]
    # Grab possible group landings for year y
    land_s = land[land.group=='sharks']
    land_r = land[land.group=='rays']
    land_e = land[land.group=='elasmos']
    # Iteratre over countries
    for e in tmp_c:
        if e in land.country_code.unique() and e in trad.ISOex_i.unique():
            # Grab values for exporter e in year y
            tx = trad[trad.ISOex_i==e]
            tx_s = trad_s[trad_s.ISOex_i==e]
            tx_r = trad_r[trad_r.ISOex_i==e]
            # Grab landings
            lx = land[land.country_code==e]
            lx_s = land_s[land_s.country_code==e]
            lx_r = land_r[land_r.country_code==e]
            lx_e = land_e[land_e.country_code==e]

            # If no catches to support trade, make trade zero to remove re-exports
            # Do this first because no catches of any kind trump group specifics
            # justified because the project is about assigning catches within the trade to specific nations
            if sum(lx.landed_weight)==0 and sum(tx.estimated_live_weight)>0:
                tmp += [sum(tx.estimated_live_weight)]
                tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y)] = 0

            # If no catches (group+elasmos) to support shark trade, make trade zero to remove re-exports: 
            if sum(lx_s.landed_weight+lx_e.landed_weight)==0 and sum(tx_s.estimated_live_weight)>0:
                tmp += [sum(tx_s.estimated_live_weight)]
                tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y) & (tdata.group=='sharks')] = 0
            # If no catches (group+elasmos) to support ray trade, make trade zero to remove re-exports:
            if sum(lx_r.landed_weight+lx_e.landed_weight)==0 and sum(tx_r.estimated_live_weight)>0:
                tmp += [sum(tx_r.estimated_live_weight)]
                tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y) & (tdata.group=='rays')] = 0
            # If trade more than catches, make proportional within allowable commodity codes
            elif sum(lx.landed_weight)<sum(tx.estimated_live_weight):
                tmp += [sum(tx.estimated_live_weight)]
                # Grab proportion
                rrx = sum(lx.landed_weight)/sum(tx.estimated_live_weight)
                # Re-scale trade to proportion of total landings
                tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y) & (tdata.group=='sharks')] = tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y) & (tdata.group=='sharks')]*rrx
                tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y) & (tdata.group=='rays')] = tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y) & (tdata.group=='rays')]*rrx
        else:
            tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y)] = 0
            print(e,y)        

# ## Restrict landings, seafood trade and meat trade to biggest countries

# Original
tmp_x = (tdata_copy.estimated_live_weight)
# Updated
tmp_y = (tdata.estimated_live_weight)
# Countries with trade reduced
iredux = (tdata_copy.estimated_live_weight-tdata.estimated_live_weight)>0
# Grap re-exports data
ReExports = pd.DataFrame(zip(tmp_x[iredux],tmp_y[iredux],tdata.ISOex_i[iredux].to_numpy()),columns=['Original','Reduced','exporter'])
ReExports['Net_diff'] = ReExports.Original-ReExports.Reduced
ReExports['Exporter'] = kdata.country_name_abbreviation[[list(kdata.iso_3digit_alpha).index(x) for x in ReExports.exporter]].to_numpy()

# Table of re-exporting countries
tmp = ReExports.groupby(['Exporter']).sum().sort_values(by='Net_diff',ascending=False).drop(columns='exporter')
tmp.to_csv('ReExport_totals.csv')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide

ABW 2012
AFG 2012
AGO 2012
AIA 2012
ALB 2012
AND 2012
ARE 2012
ARM 2012
ASM 2012
ATF 2012
AUT 2012
AZE 2012
BDI 2012
BEN 2012
BFA 2012
BGR 2012
BHR 2012
BHS 2012
BIH 2012
BLM 2012
BLR 2012
BMU 2012
BOL 2012
BRB 2012
BRN 2012
BTN 2012
BWA 2012
CAF 2012
CHE 2012
CIV 2012
CMR 2012
COD 2012
COG 2012
CPV 2012
CUB 2012
CUW 2012
CXR 2012
CYM 2012
CYP 2012
CZE 2012
DJI 2012
DOM 2012
DZA 2012
EGY 2012
EST 2012
FIN 2012
FJI 2012
FLK 2012
FSM 2012
GAB 2012
GEO 2012
GIB 2012
GIN 2012
GMB 2012
GNQ 2012
GRD 2012
GRL 2012
GTM 2012
GUM 2012
GUY 2012
HND 2012
HRV 2012
HUN 2012
IOT 2012
IRN 2012
IRQ 2012
ISL 2012
ISR 2012
JOR 2012
KAZ 2012
KEN 2012
KHM 2012
KIR 2012
KNA 2012
KWT 2012
LAO 2012
LBN 2012
LBR 2012
LBY 2012
LCA 2012
LSO 2012
LTU 2012
LUX 2012
LVA 2012
MAC 2012
MDA 2012
MDG 2012
MDV 2012
MHL 2012
MKD 2012
MLI 2012
MLT 2012
MMR 2012
MNE 2012
MNG 2012
MNP 2012
MUS 2012
MWI 2012
NCL 2012
NGA 2012
NIC 2012
PLW 2012
PNG 2012
POL 2012
PRK 2012
PRY 2012
PYF 2012
QAT 2012
ROU 2012
RWA 2012
SAU 2012
S

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide

LUX 2013
LVA 2013
MAC 2013
MDA 2013
MDG 2013
MDV 2013
MHL 2013
MKD 2013
MLI 2013
MLT 2013
MMR 2013
MNE 2013
MNG 2013
MNP 2013
MUS 2013
MWI 2013
NCL 2013
NGA 2013
NIC 2013
PLW 2013
PNG 2013
POL 2013
PRK 2013
PRY 2013
PYF 2013
QAT 2013
ROU 2013
RWA 2013
SAU 2013
SLB 2013
SLE 2013
SLV 2013
SOM 2013
SPM 2013
SRB 2013
SSD 2013
STP 2013
SUR 2013
SVK 2013
SVN 2013
SWE 2013
SWZ 2013
SXM 2013
SYC 2013
TGO 2013
TKL 2013
TKM 2013
TLS 2013
TON 2013
TUR 2013
UGA 2013
UKR 2013
UZB 2013
VCT 2013
VGB 2013
WLF 2013
ZMB 2013
ZWE 2013
ABW 2014
AFG 2014
AIA 2014
ALB 2014
AND 2014
ARM 2014
ASM 2014
ATF 2014
AUS 2014
AUT 2014
AZE 2014
BDI 2014
BEN 2014
BFA 2014
BGR 2014
BHR 2014
BHS 2014
BIH 2014
BLM 2014
BLR 2014
BMU 2014
BOL 2014
BRB 2014
BRN 2014
BTN 2014
BWA 2014
CAF 2014
CHE 2014
CIV 2014
CMR 2014
COD 2014
COG 2014
COL 2014
CPV 2014
CUB 2014
CUW 2014
CXR 2014
CYM 2014
CYP 2014
CZE 2014
DJI 2014
DOM 2014
DZA 2014
EGY 2014
EST 2014
FIN 2014
FJI 2014
FLK 2014
FSM 2014
GAB 2014
GEO 2014
GIB 2014
GIN 2014
G

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide

ATF 2015
AUT 2015
AZE 2015
BDI 2015
BEN 2015
BFA 2015
BGR 2015
BHR 2015
BHS 2015
BIH 2015
BLM 2015
BLR 2015
BMU 2015
BOL 2015
BRB 2015
BRN 2015
BTN 2015
BWA 2015
CAF 2015
CHE 2015
CIV 2015
CMR 2015
COD 2015
COG 2015
CPV 2015
CUB 2015
CUW 2015
CXR 2015
CYM 2015
CYP 2015
CZE 2015
DJI 2015
DOM 2015
DZA 2015
EGY 2015
EST 2015
FIN 2015
FJI 2015
FLK 2015
FSM 2015
GAB 2015
GEO 2015
GIB 2015
GIN 2015
GMB 2015
GNQ 2015
GRD 2015
GRL 2015
GTM 2015
GUM 2015
GUY 2015
HND 2015
HRV 2015
HUN 2015
IOT 2015
IRQ 2015
ISL 2015
ISR 2015
JOR 2015
KAZ 2015
KEN 2015
KHM 2015
KIR 2015
KNA 2015
KWT 2015
LAO 2015
LBN 2015
LBR 2015
LBY 2015
LCA 2015
LSO 2015
LTU 2015
LUX 2015
LVA 2015
MAC 2015
MDA 2015
MDV 2015
MHL 2015
MKD 2015
MLI 2015
MLT 2015
MMR 2015
MNE 2015
MNG 2015
MNP 2015
MUS 2015
MWI 2015
NCL 2015
NIC 2015
PLW 2015
PNG 2015
POL 2015
PRK 2015
PRY 2015
PYF 2015
QAT 2015
ROU 2015
RWA 2015
SAU 2015
SLB 2015
SLV 2015
SOM 2015
SPM 2015
SRB 2015
SSD 2015
STP 2015
SUR 2015
SVK 2015
SVN 2015
SWZ 2015
SXM 2015
S

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y)] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide

LSO 2016
LTU 2016
LUX 2016
LVA 2016
MAC 2016
MDA 2016
MDG 2016
MDV 2016
MHL 2016
MKD 2016
MLI 2016
MLT 2016
MMR 2016
MNE 2016
MNG 2016
MNP 2016
MUS 2016
MWI 2016
NCL 2016
NGA 2016
NIC 2016
PLW 2016
PNG 2016
POL 2016
PRK 2016
PRY 2016
PYF 2016
QAT 2016
ROU 2016
RWA 2016
SAU 2016
SLB 2016
SLV 2016
SOM 2016
SPM 2016
SRB 2016
SSD 2016
STP 2016
SUR 2016
SVK 2016
SVN 2016
SWE 2016
SWZ 2016
SXM 2016
SYC 2016
TGO 2016
TKL 2016
TKM 2016
TLS 2016
TON 2016
TUR 2016
UGA 2016
UKR 2016
UZB 2016
VCT 2016
VGB 2016
WLF 2016
ZMB 2016
ZWE 2016
ABW 2017
AFG 2017
AIA 2017
ALB 2017
AND 2017
ARM 2017
ASM 2017
ATF 2017
AUT 2017
AZE 2017
BDI 2017
BEN 2017
BFA 2017
BGR 2017
BHR 2017
BHS 2017
BIH 2017
BLM 2017
BLR 2017
BMU 2017
BOL 2017
BRB 2017
BRN 2017
BTN 2017
BWA 2017
CAF 2017
CHE 2017
CIV 2017
CMR 2017
COD 2017
COG 2017
COL 2017
CPV 2017
CUB 2017
CUW 2017
CXR 2017
CYM 2017
CYP 2017
CZE 2017
DJI 2017
DOM 2017
DZA 2017
EGY 2017
EST 2017
FIN 2017
FJI 2017
FLK 2017
FSM 2017
GAB 2017
GEO 2017
GIB 2017
GIN 2017
G

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y) & (tdata.group=='sharks')] = tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y) & (tdata.group=='sharks')]*rrx
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y) & (tdata.group=='rays')] = tdata.estimated_live_weight[(tdata.ISOex_i==e) & (tdata.year_t==y) & (tdata.group=='rays')]*rrx
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a

In [27]:
# Remove zeros from trade
tdata = tdata[tdata.estimated_live_weight!=0]

## Setup landings data

In [28]:
# Split out shark and ray trade
tdata_cut = tdata[
    ((tdata.ISOex_i).isin(biggest_countries))
    & ((tdata.ISOim_j).isin(biggest_countries))
]

# dropping missing values
tdata_cut = tdata_cut.drop("Unnamed: 0", axis="columns").dropna().reset_index(drop=True)
# Calculate mean after sum
tdata_cut['yrcnt'] = np.ones(len(tdata_cut["hscode_k"]))
# Summarize data by importer export commodity
tdata_cut = tdata_cut.groupby(["ISOex_i", "ISOim_j", "hscode_k"]).sum().reset_index()
# Get average per year
tdata_cut['estimated_live_weight'] = tdata_cut['estimated_live_weight']/tdata_cut['yrcnt']

# we just care about whether it's a shark or ray, not if it's also fresh or frozen
tdata_cut["fish_type"] = tdata_cut["hscode_k"].replace(
    {30281: "sharks", 30282: "rays", 30381: "sharks", 30382: "rays"}
)
tdata_cut = tdata_cut.sort_values(["ISOex_i", "ISOim_j", "fish_type"])

# Shark data
tdata_cut_sharks = (
    tdata_cut[tdata_cut.fish_type == "sharks"]
    .groupby(["ISOex_i", "ISOim_j"])
    .sum()
    .reset_index()
)

# Ray data
tdata_cut_rays = (
    tdata_cut[tdata_cut.fish_type == "rays"]
    .groupby(["ISOex_i", "ISOim_j"])
    .sum()
    .reset_index()
)

In [29]:
## Load unreliability score

unreliability_score = pd.read_csv(
    bd + "reporter_reliability_HS12_V202102.csv",
    usecols=["c", "q_unreliability_i", "q_unreliability_j"],
)

# ITA changed code -- because, why not?
unreliability_score.loc[unreliability_score.c == 381, "c"] = 380

# grab ISO codes from odata
unreliability_score = unreliability_score.rename(
    columns={"q_unreliability_i": "exporter", "q_unreliability_j": "importer"}
).merge(odata, left_on="c", right_on="exporter_i")
unreliability_score = (
    unreliability_score[
        ((unreliability_score.ISOex_i).isin(biggest_countries))
        & ((unreliability_score.ISOim_j).isin(biggest_countries))
    ][["ISOex_i", "exporter", "importer"]]
    .groupby("ISOex_i")
    .first()
)

# Check for missing unreliability scores
misx = biggest_countries[
    np.array([x not in unreliability_score.index for x in biggest_countries])
]

# Fill missing unreliablity scores with maximum unreliability value
tmp_maxval = max(unreliability_score.exporter)
if len(misx) > 0:
    for i in misx:
        unreliability_score = pd.concat(
            [
                pd.DataFrame(index=[i], columns=unreliability_score.columns),
                unreliability_score,
            ]
        )
unreliability_score = unreliability_score.sort_index().fillna(tmp_maxval)
unreliability_score = unreliability_score.reindex(
    sorted(unreliability_score.columns), axis=1
)

  unreliability_score = pd.concat(
  unreliability_score = pd.concat(
  unreliability_score = pd.concat(
  unreliability_score = pd.concat(
  unreliability_score = pd.concat(


## Setup taxon masking

In [30]:
# Count taxon aggregations
ntax_country = (
    txdata.groupby(by=(["country", "taxon"]))
    .sum()
    .reset_index()
    .groupby("country")
    .count()
    .taxon
)

# Add Belize
ntax_country["BLZ"] = 0
# Re-order to match country_
ntax_country = ntax_country[country_]

# Taxon by country groupings
taxindx1 = (ntax_country <= 1).to_numpy()
taxindx2 = (ntax_country == 2).to_numpy()
taxindx3 = (ntax_country >= 3).to_numpy()

# Create 3 dimensional mask
TaxonMASK_t1 = TaxonMASK_Sx.copy()
TaxonMASK_t2 = TaxonMASK_Sx.copy()
TaxonMASK_t3 = TaxonMASK_Sx.copy()

# Deactivate countries without <=1, 2, or >=3 taxon groups reported
TaxonMASK_t1[taxindx1 == False, ...] = 0
TaxonMASK_t2[taxindx2 == False, ...] = 0
TaxonMASK_t3[taxindx3 == False, ...] = 0

# Make sure Elasmos bin is positive in countries with no aggregations
NoTaxAgg = (NoTaxaSppWT.sum(1)==0)*1
TaxonMASK_Sx[NoTaxAgg!=1,:,list(taxon_shortlist).index('Elasmobranchii')] = 1

## Define `dims` and `coords`

In [31]:
# some countries can be missing from importers or exporters
# indexing needs to take that into account
# if we used `factorize`, that would be ignored

country_to_idx_map = {country: index for index, country in enumerate(biggest_countries)}
shark_exporter_idx = tdata_cut_sharks["ISOex_i"].map(country_to_idx_map).to_numpy()
shark_importer_idx = tdata_cut_sharks["ISOim_j"].map(country_to_idx_map).to_numpy()
ray_exporter_idx = tdata_cut_rays["ISOex_i"].map(country_to_idx_map).to_numpy()
ray_importer_idx = tdata_cut_rays["ISOim_j"].map(country_to_idx_map).to_numpy()

# You have to be careful when creating shark_trade_matrix:
shark_trade_matrix = (
    tdata_cut_sharks[["ISOex_i", "ISOim_j", "estimated_live_weight"]]
    .set_index("ISOex_i")
    .pivot(columns="ISOim_j")
    .droplevel(0, axis="columns")
)

# Add missing exporters and importers
missing_col = []
for p in country_:
    if not p in shark_trade_matrix.columns.values:
        missing_col.append(p)
missing_col = np.array(missing_col)
shark_trade_matrix[missing_col] = np.NaN
shark_trade_matrix = shark_trade_matrix[country_]
missing_row = shark_trade_matrix.columns.difference(shark_trade_matrix.index)
shark_trade_matrix = shark_trade_matrix.T
shark_trade_matrix[missing_row] = np.NaN
shark_trade_matrix = shark_trade_matrix[country_]
shark_trade_matrix = shark_trade_matrix.T.sort_index().fillna(0)
shark_trade_mask = shark_trade_matrix.to_numpy()
shark_trade_mask[shark_trade_mask>0] = 1
# Add domestic consumption
np.fill_diagonal(shark_trade_mask,1)

# You have to be careful when creating ray_trade_matrix:
ray_trade_matrix = (
    tdata_cut_rays[["ISOex_i", "ISOim_j", "estimated_live_weight"]]
    .set_index("ISOex_i")
    .pivot(columns="ISOim_j")
    .droplevel(0, axis="columns")
)

# Add missing exporters and importers
#missing_col = ray_trade_matrix.index.difference(ray_trade_matrix.columns)
missing_col = []
for p in country_:
    if not p in ray_trade_matrix.columns.values:
        missing_col.append(p)
missing_col = np.array(missing_col)
ray_trade_matrix[missing_col] = np.NaN
ray_trade_matrix = ray_trade_matrix[country_]
missing_row = ray_trade_matrix.columns.difference(ray_trade_matrix.index)
ray_trade_matrix = ray_trade_matrix.T
ray_trade_matrix[missing_row] = np.NaN
ray_trade_matrix = ray_trade_matrix[country_]
ray_trade_matrix = ray_trade_matrix.T.sort_index().fillna(0)
ray_trade_mask = ray_trade_matrix.to_numpy()
ray_trade_mask[ray_trade_mask>0] = 1
# Add domestic consumption
np.fill_diagonal(ray_trade_mask,1)

# Species mask for possible trade (including domestic)
trade_mask = ray_trade_mask[:,None,:]*((group_=='rays')[None,:,None])+shark_trade_mask[:,None,:]*((group_=='sharks')[None,:,None])
trade_mask[trade_mask==0] = -999
trade_mask[trade_mask>0] = 0
# Remove species not used for meat
#trade_mask = trade_mask+meat_mask[None,:,None]
trade_mask[trade_mask<0] = -999

# Mask for trade softmax to zero out species with all -999
NoSPP_Mask = (((trade_mask==-999).sum(2)!=len(country_))*1)

# Mask for blue shark relative odds importer preferences
BSmask = np.zeros(shape=trade_mask[0].shape)
BSmask[list(species_).index('Prionace glauca')] = -999

# Better country labels
biggest_countries_long = kdata.country_name_abbreviation[
    [list(kdata.iso_3digit_alpha).index(x) for x in biggest_countries]
].to_numpy()

# Create matching tensor for priors
SppPRIORadj_idx = SppPRIORadj.copy()
# List of unique prior values
priors_ = list(np.sort(np.unique(SppPRIORadj)))
# Replace prior values with index to OddsCAT
for i in range(len(SppPRIORadj_idx)):
    SppPRIORadj_idx[i] = match(SppPRIORadj_idx[i],priors_)

COORDS = {
    "exporter": biggest_countries,
    "importer": biggest_countries,
    "shark_obs_idx": tdata_cut_sharks.index,
    "ray_obs_idx": tdata_cut_rays.index,
    "direction": ["exports", "imports"],
    "quantity": ["weight", "value"],
    "species": species_,
    "landing_country": country_,
    "taxon": taxon_shortlist,
    "year":year_,
    "OddsCAT":np.unique(SppPRIORadj).astype(str)
}
print("Data loaded!")

Data loaded!
