In [1]:
import csv
import pandas as pd
fp = "aware_cf_2.0.csv"

In [2]:
with open(fp) as f:
    reader = csv.reader(f)
    data = [row for row in reader]

In [3]:
cfs = {}

In [4]:
categories = data[0][2:]

In [5]:
for c, category in enumerate(categories):
    cfs[category] = []
    for row in data[1:]:
        val = row[c+2]
        
        if val == 0 or val=='':
            continue

        val = float(val)
            
        cfs[category].extend([
            {
                "supplier": {
                    "name": "Water", "categories": ('water',), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val * -1
            },
            {
                "supplier": {
                    "name": "Water", "categories":	('water', 'surface water'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val * -1
            },
            {
                "supplier": {
                    "name": "Water", "categories":	('water', 'ground-'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val * -1
            },
            {
                "supplier": {
                    "name": "Water, cooling, unspecified natural origin", "categories": ('natural resource', 'in water'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val
            },
            {
                "supplier": {
                    "name": "Water, lake", "categories":	('natural resource', 'in water'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val
            },
            {
                "supplier": {
                    "name": "Water, river", "categories":	('natural resource', 'in water'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val
            },
            {
                "supplier": {
                    "name": "Water, turbine use, unspecified natural origin", "categories": 	('natural resource', 'in water'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val
            },
            {
                "supplier": {
                    "name": "Water, unspecified natural origin", "categories": 	('natural resource', 'in water'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val
            },
            {
                "supplier": {
                    "name": "Water, well, in ground", "categories": ('natural resource', 'in water'), "matrix": "biosphere"
                }, "consumer": {
                    "location": row[1], "matrix": "technosphere"
                }, "value": val
            },
        ])


In [6]:
pd.set_option('future.no_silent_downcasting', True)
worksheets = {
    "Agg_CF_irri": "pHWC_agri",
    "Agg_CF_non_irri": "pHWC_nonagri",
    "Agg_CF_unspecified": "pHWC_all",
}

column_names = {
    "yearly":"Annual",
    "jan":"Jan",
    "feb":"Feb",
    "mar":"Mar",
    "apr":"Apr",
    "may":"May",
    "jun":"Jun",
    "jul":"Jul",
    "aug":"Aug",
    "sep":"Sep",
    "oct":"Oct",
    "nov":"Nov",
    "dec":"Dec",
}

for category, cf in cfs.items():

    sheet_name = [v for k, v in worksheets.items() if category.startswith(k)][0]

    # read Excel, but ignore 5 first columns
    weight = pd.read_excel(
        "AWARE20_Countries_and_Regions.xlsx",
        sheet_name=sheet_name,
        usecols=[i for i in range(5, 19)],
    )

    weight.set_index("ecoinvent_shortname", inplace=True)

    weight = weight.replace("NotDefined", 0.0)

    col_name = column_names[category.split("_")[-1]]
    weight = weight.loc[:, col_name]

    # turn dataframe to dictionary, using index as keys
    water_use_dict = weight.to_dict()

    for c in cf:
        c["weight"] = water_use_dict.get(c["consumer"]["location"], 0)


In [7]:
# save each category as a separate json file
import json

for category in categories:
    with open(f"/Users/romain/GitHub/edges/edges/data/AWARE 2.0_{category.replace('_CF', '').replace('Agg', 'Country')}.json", "w") as f:
        print(f"Saving {category.replace('_CF', '').replace('Agg', 'Country')}")
        json.dump(cfs[category], f)

Saving Country_irri_yearly
Saving Country_non_irri_yearly
Saving Country_unspecified_yearly
Saving Country_irri_jan
Saving Country_irri_feb
Saving Country_irri_mar
Saving Country_irri_apr
Saving Country_irri_may
Saving Country_irri_jun
Saving Country_irri_jul
Saving Country_irri_aug
Saving Country_irri_sep
Saving Country_irri_oct
Saving Country_irri_nov
Saving Country_irri_dec
Saving Country_non_irri_jan
Saving Country_non_irri_feb
Saving Country_non_irri_mar
Saving Country_non_irri_apr
Saving Country_non_irri_may
Saving Country_non_irri_jun
Saving Country_non_irri_jul
Saving Country_non_irri_aug
Saving Country_non_irri_sep
Saving Country_non_irri_oct
Saving Country_non_irri_nov
Saving Country_non_irri_dec
Saving Country_unspecified_jan
Saving Country_unspecified_feb
Saving Country_unspecified_mar
Saving Country_unspecified_apr
Saving Country_unspecified_may
Saving Country_unspecified_jun
Saving Country_unspecified_jul
Saving Country_unspecified_aug
Saving Country_unspecified_sep
Savin

In [8]:
# load uncertainty distributions
df_uncertainty = pd.read_csv("distribution_summary_per_country_per_type.csv")

# Initialize the output dictionary
country_distributions = {}

for _, row in df_uncertainty.iterrows():
    country = row["country"]
    water_type = row["type"]
    dist = row["distribution"]

    # Gather the parameters depending on distribution type
    if dist == "uniform":
        params = {
            "minimum": row["minimum"],
            "maximum": row["maximum"]
        }
    elif dist == "discrete_empirical":
        params = {
        "values": [round(v, 3) for v in eval(row["values"])],
        "weights": [round(w, 3) for w in eval(row["weights"])]
    }
    else:
        # Continuous distributions
        params={}
        for p in ["shape_a", "shape_b", "loc", "scale", "minimum", "maximum"]:
            if not pd.isna(row[p]):
                params[p] = row[p]

    # Create inner dict
    entry = {
        "distribution": dist,
        "parameters": params
    }

    # Store under country → type → entry
    if country not in country_distributions:
        country_distributions[country] = {}
    country_distributions[country][water_type] = entry


In [10]:
yearly_irri = cfs['Agg_CF_irri_yearly']
yearly_non_irri = cfs['Agg_CF_non_irri_yearly']
yearly_unspecified = cfs['Agg_CF_unspecified_yearly']

In [13]:
country_distributions

{'DK': {'unspecified': {'distribution': 'uniform',
   'parameters': {'minimum': 1.22, 'maximum': 4.21}},
  'agri': {'distribution': 'uniform',
   'parameters': {'minimum': 1.48, 'maximum': 5.69}},
  'nonagri': {'distribution': 'uniform',
   'parameters': {'minimum': 0.918, 'maximum': 3.66}}},
 'RNA': {'unspecified': {'distribution': 'gamma',
   'parameters': {'shape_a': 0.4569288011616458,
    'loc': 0.2479999999999999,
    'scale': 57.02527580088008,
    'minimum': 0.1,
    'maximum': 100.0}},
  'agri': {'distribution': 'gamma',
   'parameters': {'shape_a': 0.8036948661610195,
    'loc': 0.2409999999999999,
    'scale': 22.108330565140815,
    'minimum': 0.1,
    'maximum': 100.0}},
  'nonagri': {'distribution': 'lognorm',
   'parameters': {'shape_a': 1.723174634394438,
    'loc': 0.274618403281136,
    'scale': 1.9676645045265435,
    'minimum': 0.1,
    'maximum': 100.0}}},
 'UN-AMERICAS': {'unspecified': {'distribution': 'lognorm',
   'parameters': {'shape_a': 2.101654126198027,
  

In [14]:
import copy

for cf in yearly_irri:
    cf["consumer"]["classifications"] = {
        "CPC": [
            '01',
        ]
    }
    if cf["consumer"]["location"] in country_distributions:
        country = cf["consumer"]["location"]
        if "agri" in country_distributions[country]:
            cf["uncertainty"] = copy.deepcopy(country_distributions[country]["agri"])

for cf in yearly_irri:
    if "uncertainty" in cf:
        cf["uncertainty"]["negative"] = int(float(cf["value"]) < 0)
        

for cf in yearly_non_irri:
    cf["consumer"]["classifications"] = {
        "CPC": [
            '02',
            '03',
            '04',
            '1',
            '2',
            '3',
            '4',
            '5',
            '6',
            '7',
            '8',
            '9',
        ]
    }
    if cf["consumer"]["location"] in country_distributions:
        country = cf["consumer"]["location"]
        if "nonagri" in country_distributions[country]:
            cf["uncertainty"] = copy.deepcopy(country_distributions[country]["nonagri"])
        
for cf in yearly_non_irri:
    if "uncertainty" in cf:
        cf["uncertainty"]["negative"] = int(float(cf["value"]) < 0)

for cf in yearly_unspecified:
    if cf["consumer"]["location"] in country_distributions:
        country = cf["consumer"]["location"]
        if "unspecified" in country_distributions[country]:
            cf["uncertainty"] = copy.deepcopy(country_distributions[country]["unspecified"])

for cf in yearly_unspecified:
    if "uncertainty" in cf:
        cf["uncertainty"]["negative"] = int(float(cf["value"]) < 0)

In [15]:
# save each category as a separate json file
import json


with open(f"/Users/romain/GitHub/edges/edges/data/AWARE 2.0_Country_all_yearly.json", "w") as f:
    json.dump(yearly_irri + yearly_non_irri + yearly_unspecified, f, indent=3)

In [16]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import beta
import scipy

# Parameters from your data
shape = 0.3366060121498899
loc = 4.45
scale = 1062623251.2184404

# Define the distribution
dist = scipy.stats.beta(a=shape1, b=shape2, loc=loc, scale=scale)


# Create x range (adjust upper limit based on shape and scale)
x = np.linspace(loc, dist.ppf(0.99), 500)  # up to 99th percentile

# Plot
plt.figure(figsize=(8, 4))
plt.plot(x, dist.pdf(x), 'r-', lw=2, label='Gamma PDF')
plt.title('Gamma Distribution')
plt.xlabel('x')
plt.ylabel('Probability Density')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


NameError: name 'shape1' is not defined