1. Load official country-specific AWARE 2.0 CFs

In [1]:
import csv
import pandas as pd
fp = "aware_cf_2.0.csv"

with open(fp) as f:
    reader = csv.reader(f)
    data = [row for row in reader]

2. Build a dictionary of CFs

In [2]:
cfs = {}
categories = [
    'Agg_CF_irri_yearly',
    'Agg_CF_non_irri_yearly',
    'Agg_CF_unspecified_yearly',
]

In [3]:
for c, category in enumerate(categories):
    cfs[category] = []
    for row in data[1:]:
        val = row[c+2]
        
        if val == 0 or val=='':
            continue

        val = float(val)
            
        cfs[category].extend([
            {
                "supplier": {
                    "name": "Water", "categories": ('water',), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val * -1
            },
            {
                "supplier": {
                    "name": "Water", "categories":	('water', 'surface water'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val * -1
            },
            {
                "supplier": {
                    "name": "Water", "categories":	('water', 'ground-'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val * -1
            },
            {
                "supplier": {
                    "name": "Water, cooling, unspecified natural origin", "categories": ('natural resource', 'in water'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val
            },
            {
                "supplier": {
                    "name": "Water, lake", "categories":	('natural resource', 'in water'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val
            },
            {
                "supplier": {
                    "name": "Water, river", "categories":	('natural resource', 'in water'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val
            },
            {
                "supplier": {
                    "name": "Water, turbine use, unspecified natural origin", "categories": 	('natural resource', 'in water'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val
            },
            {
                "supplier": {
                    "name": "Water, unspecified natural origin", "categories": 	('natural resource', 'in water'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val
            },
            {
                "supplier": {
                    "name": "Water, well, in ground", "categories": ('natural resource', 'in water'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val
            },
        ])


3. Add weights (region-specific annual consumption) to each CF

In [4]:
pd.set_option('future.no_silent_downcasting', True)
worksheets = {
    "Agg_CF_irri": "pHWC_agri",
    "Agg_CF_non_irri": "pHWC_nonagri",
    "Agg_CF_unspecified": "pHWC_all",
}

column_names = {
    "yearly":"Annual",
}

for category, cf in cfs.items():

    sheet_name = [v for k, v in worksheets.items() if category.startswith(k)][0]

    # read Excel, but ignore 5 first columns
    weight = pd.read_excel(
        "AWARE20_Countries_and_Regions.xlsx",
        sheet_name=sheet_name,
        usecols=[i for i in range(5, 19)],
    )

    weight.set_index("ecoinvent_shortname", inplace=True)

    weight = weight.replace("NotDefined", 0.0)

    col_name = column_names[category.split("_")[-1]]
    weight = weight.loc[:, col_name]

    # turn dataframe to dictionary, using index as keys
    water_use_dict = weight.to_dict()

    for c in cf:
        c["weight"] = water_use_dict.get(c["consumer"]["location"], 0)


4. Load uncertainty data (see uncertainty.ipynb)

In [5]:
# load uncertainty distributions
df_uncertainty = pd.read_csv("distribution_summary_per_country_per_type.csv")

# Initialize the output dictionary
country_distributions = {}

for _, row in df_uncertainty.iterrows():
    country = row["country"]
    water_type = row["type"]
    dist = row["distribution"]

    # Gather the parameters depending on distribution type
    if dist == "uniform":
        params = {
            "minimum": row["minimum"],
            "maximum": row["maximum"]
        }
    elif dist == "discrete_empirical":
        params = {
        "values": [round(v, 3) for v in eval(row["values"])],
        "weights": [round(w, 3) for w in eval(row["weights"])]
    }
    else:
        # Continuous distributions
        params={}
        for p in ["shape_a", "shape_b", "loc", "scale", "minimum", "maximum"]:
            if not pd.isna(row[p]):
                params[p] = row[p]

    # Create inner dict
    entry = {
        "distribution": dist,
        "parameters": params
    }

    # Store under country → type → entry
    if country not in country_distributions:
        country_distributions[country] = {}
    country_distributions[country][water_type] = entry


In [6]:
import copy
import numpy as np
from pprint import pprint

mapping = {
    'Agg_CF_irri_yearly': 'agri',
    'Agg_CF_non_irri_yearly': 'nonagri',
    'Agg_CF_unspecified_yearly': 'unspecified',
}

for category, cf in cfs.items():
    for c in cf:
        if c["consumer"]["location"] in country_distributions:
            country = c["consumer"]["location"]
            if mapping[category] in country_distributions[country]:

                uncertainty = country_distributions[country][mapping[category]]

                if uncertainty["distribution"] == "discrete_empirical":
                    vals = np.array(uncertainty["parameters"]["values"])
                    weights = np.array(uncertainty["parameters"]["weights"])
                    average = np.average(vals, weights=weights)
                    if abs(average) < (abs(c["value"]) * 0.2) or abs(average) > (abs(c["value"]) * 1.2):
                        pass
                    else:
                        c["uncertainty"] = copy.deepcopy(uncertainty)
                else:
                     c["uncertainty"] = copy.deepcopy(uncertainty)

5. Save each category as a separate json file

In [7]:
# save each category as a separate json file
import json

for category in categories:
    with open(f"/Users/romain/GitHub/edges/edges/data/AWARE 2.0_{category.replace('_CF', '').replace('Agg', 'Country')}.json", "w") as f:
        print(f"Saving {category.replace('_CF', '').replace('Agg', 'Country')}")
        json.dump(
            {
                "name": category.replace('_CF', '').replace('Agg', 'Country'),
                "unit": "m3 deprived water-eq.",
                "version": "2.0",
                "exchanges": cfs[category]
            },
            f,
            indent=3
        )

Saving Country_irri_yearly
Saving Country_non_irri_yearly
Saving Country_unspecified_yearly


6. Build a method file that combines agricultural, non-agricultural and unspecified water use

In [8]:
yearly_irri = cfs['Agg_CF_irri_yearly']
yearly_non_irri = cfs['Agg_CF_non_irri_yearly']
yearly_unspecified = cfs['Agg_CF_unspecified_yearly']

In [9]:
import copy

for cf in yearly_irri:
    cf["consumer"]["classifications"] = {
        "CPC": [
            '01',
        ]
    }

    country = cf["consumer"]["location"]
    if country in country_distributions:
        if mapping["Agg_CF_irri_yearly"] in country_distributions[country]:
            uncertainty = country_distributions[country][mapping["Agg_CF_irri_yearly"]]

            if uncertainty["distribution"] == "discrete_empirical":
                vals = np.array(uncertainty["parameters"]["values"])
                weights = np.array(uncertainty["parameters"]["weights"])
                average = np.average(vals, weights=weights)
                if abs(average) < (abs(c["value"]) * 0.9) or abs(average) > (abs(c["value"]) * 1.1):
                    pass
                else:
                    c["uncertainty"] = copy.deepcopy(uncertainty)
            else:
                 c["uncertainty"] = copy.deepcopy(uncertainty)


for cf in yearly_irri:
    if "uncertainty" in cf:
        cf["uncertainty"]["negative"] = int(float(cf["value"]) < 0)
        

for cf in yearly_non_irri:
    cf["consumer"]["classifications"] = {
        "CPC": [
            '02',
            '03',
            '04',
            '1',
            '2',
            '3',
            '4',
            '5',
            '6',
            '7',
            '8',
            '9',
        ]
    }
    country = cf["consumer"]["location"]
    if country in country_distributions:
        if mapping["Agg_CF_non_irri_yearly"] in country_distributions[country]:
            uncertainty = country_distributions[country][mapping["Agg_CF_non_irri_yearly"]]

            if uncertainty["distribution"] == "discrete_empirical":
                vals = np.array(uncertainty["parameters"]["values"])
                weights = np.array(uncertainty["parameters"]["weights"])
                average = np.average(vals, weights=weights)
                if abs(average) < (abs(c["value"]) * 0.9) or abs(average) > (abs(c["value"]) * 1.1):
                    pass
                else:
                    c["uncertainty"] = copy.deepcopy(uncertainty)
            else:
                 c["uncertainty"] = copy.deepcopy(uncertainty)
        
for cf in yearly_non_irri:
    if "uncertainty" in cf:
        cf["uncertainty"]["negative"] = int(float(cf["value"]) < 0)

for cf in yearly_unspecified:
    country = cf["consumer"]["location"]
    if country in country_distributions:
        if mapping["Agg_CF_unspecified_yearly"] in country_distributions[country]:
            uncertainty = country_distributions[country][mapping["Agg_CF_unspecified_yearly"]]

            if uncertainty["distribution"] == "discrete_empirical":
                vals = np.array(uncertainty["parameters"]["values"])
                weights = np.array(uncertainty["parameters"]["weights"])
                average = np.average(vals, weights=weights)
                if abs(average) < (abs(c["value"]) * 0.9) or abs(average) > (abs(c["value"]) * 1.1):
                    pass
                else:
                    c["uncertainty"] = copy.deepcopy(uncertainty)
            else:
                 c["uncertainty"] = copy.deepcopy(uncertainty)

for cf in yearly_unspecified:
    if "uncertainty" in cf:
        cf["uncertainty"]["negative"] = int(float(cf["value"]) < 0)

In [10]:
# save each category as a separate json file
import json


with open(f"/Users/romain/GitHub/edges/edges/data/AWARE 2.0_Country_all_yearly.json", "w") as f:
    json.dump(
        {
            "name": "AWARE 2.0_Country_all_yearly",
            "unit": "m3 deprived water-eq.",
            "version": "2.0",
            "exchanges": yearly_irri + yearly_non_irri + yearly_unspecified,
        },
        f,
        indent=3
    )