In [None]:
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv("matches_differences.csv")
df["abs_relative_difference"] = abs(df["relative_difference"])
df["copy_in_from"] = df.apply(lambda x: f"{x['copy_in']} -> {x['copy_from']}", axis=1)
df

In [None]:
import seaborn as sns

sns.set()

sns.histplot([x for x in df["relative_difference"] if -0.5 < x < 0.5])

In [None]:
fig, ax = plt.subplots()
sns.ecdfplot(df, x="abs_relative_difference", hue="copy_in_from", ax=ax)
ax.set_xlim(0, 1)

In [None]:
df["relative_difference"].sort_values(key=lambda x: abs(x), ascending=False)

In [None]:
import numpy as np

steps = [0, 0.01, 0.1, 1, 10, np.inf]

for context_str in df["copy_in_from"].unique():
    sub_df = df[df["copy_in_from"] == context_str]
    print(context_str)
    for i in range(len(steps) - 1):
        low, high = steps[i], steps[i + 1]
        print(
            f"{low} < abs_relative_difference < {high} : "
            f"{sum((low < sub_df['abs_relative_difference']) & (sub_df['abs_relative_difference'] < high))}"
        )
    print("\n")

--------------
### Having a look at a match with a high difference to understand where it comes from.

In [None]:
high_rel_diff = df[(df["abs_relative_difference"] > 1)].sort_values(
    "abs_relative_difference", ascending=False
)
high_rel_diff

In [None]:
import brightway2 as bw

bw.projects.set_current("EF calculation")

match_with_high_difference = high_rel_diff.iloc[3]

original = bw.get_activity(eval(match_with_high_difference["original"]))
copy = bw.get_activity(eval(match_with_high_difference["copy"]))
copy["name"]

Computing inventories for original and copy

In [None]:
method = ("IPCC 2013", "climate change", "GWP 100a")

original_lca = bw.LCA(demand={original: 1}, method=method)
original_lca.lci()
original_lca.lcia()

copy_lca = bw.LCA(demand={copy: 1}, method=method)
copy_lca.lci()
copy_lca.lcia()

In [None]:
original_inventory = (
    original_lca.to_dataframe(cutoff=1000000000).groupby("Flow")["Amount"].sum()
)
original_inventory

In [None]:
copy_inventory = (
    copy_lca.to_dataframe(cutoff=1000000000).groupby("Flow")["Amount"].sum()
)
copy_inventory

In [None]:
characterization_factors = {cf[0]: cf[1] for cf in bw.Method(method).load()}

Comparing the original and the copy inventories flow by flow

In [None]:
from tqdm.notebook import tqdm

flows_comparison = []
substances = set()
for key, factor in tqdm(characterization_factors.items()):
    flow = bw.get_activity(key)

    # Avoiding to repeat the flows for each compartment as GWP is compartment independent
    if flow["name"] in substances:
        continue
    else:
        substances.add(flow["name"])

    flows_comparison.append(
        {
            "flow": flow["name"],
            "CF": factor,
            "original_impact": original_inventory.get(flow["name"], 0),
            "copy_impact": copy_inventory.get(flow["name"], 0),
            "original_amount": original_inventory.get(flow["name"], 0) / factor,
            "copy_amount": copy_inventory.get(flow["name"], 0) / factor,
        }
    )

flows_comparison = pd.DataFrame(flows_comparison)

In [None]:
flows_comparison["difference"] = (
    flows_comparison["original_impact"] - flows_comparison["copy_impact"]
)
flows_comparison["relative_difference"] = (
    flows_comparison["difference"] / flows_comparison["original_impact"]
)
flows_comparison = flows_comparison.sort_values("difference")
flows_comparison