In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [2]:
law = pd.read_csv("Dataset/Food_Production.csv")

In [3]:
law.head(10)

Unnamed: 0,Food product,Land use change,Animal Feed,Farm,Processing,Transport,Packging,Retail,Total_emissions,Eutrophying emissions per 1000kcal (gPO₄eq per 1000kcal),...,Freshwater withdrawals per 100g protein (liters per 100g protein),Freshwater withdrawals per kilogram (liters per kilogram),Greenhouse gas emissions per 1000kcal (kgCO₂eq per 1000kcal),Greenhouse gas emissions per 100g protein (kgCO₂eq per 100g protein),Land use per 1000kcal (m² per 1000kcal),Land use per kilogram (m² per kilogram),Land use per 100g protein (m² per 100g protein),Scarcity-weighted water use per kilogram (liters per kilogram),Scarcity-weighted water use per 100g protein (liters per 100g protein),Scarcity-weighted water use per 1000kcal (liters per 1000 kilocalories)
0,Wheat & Rye (Bread),0.1,0.0,0.8,0.2,0.1,0.1,0.1,1.4,,...,,,,,,,,,,
1,Maize (Meal),0.3,0.0,0.5,0.1,0.1,0.1,0.0,1.1,,...,,,,,,,,,,
2,Barley (Beer),0.0,0.0,0.2,0.1,0.0,0.5,0.3,1.1,,...,,,,,,,,,,
3,Oatmeal,0.0,0.0,1.4,0.0,0.1,0.1,0.0,1.6,4.281357,...,371.076923,482.4,0.945482,1.907692,2.897446,7.6,5.846154,18786.2,14450.92308,7162.104461
4,Rice,0.0,0.0,3.6,0.1,0.1,0.1,0.1,4.0,9.514379,...,3166.760563,2248.4,1.207271,6.267606,0.759631,2.8,3.943662,49576.3,69825.77465,13449.89148
5,Potatoes,0.0,0.0,0.2,0.0,0.1,0.0,0.0,0.3,4.754098,...,347.647059,59.1,0.628415,2.705882,1.202186,0.88,5.176471,2754.2,16201.17647,3762.568306
6,Cassava,0.6,0.0,0.2,0.0,0.1,0.0,0.0,0.9,0.708419,...,,0.0,1.355236,14.666667,1.858316,1.81,20.111111,0.0,,
7,Cane Sugar,1.2,0.0,0.5,0.0,0.8,0.1,0.0,2.6,4.820513,...,,620.1,0.911681,,0.581197,2.04,,16438.6,,4683.361823
8,Beet Sugar,0.0,0.0,0.5,0.2,0.6,0.1,0.0,1.4,1.541311,...,,217.7,0.51567,,0.521368,1.83,,9493.3,,2704.643875
9,Other Pulses,0.0,0.0,1.1,0.0,0.1,0.4,0.0,1.6,5.008798,...,203.503036,435.7,0.524927,0.836058,4.565982,15.57,7.272303,22477.4,10498.55208,


In [4]:
law.shape

(43, 23)

In [5]:
law.isna().sum()

Food product                                                                0
Land use change                                                             0
Animal Feed                                                                 0
Farm                                                                        0
Processing                                                                  0
Transport                                                                   0
Packging                                                                    0
Retail                                                                      0
Total_emissions                                                             0
Eutrophying emissions per 1000kcal (gPO₄eq per 1000kcal)                   10
Eutrophying emissions per kilogram (gPO₄eq per kilogram)                    5
Eutrophying emissions per 100g protein (gPO₄eq per 100 grams protein)      16
Freshwater withdrawals per 1000kcal (liters per 1000kcal)       

Cleaning Dataset
- changing long column names
- filling null values 
- replacing spaces in column names with underscore (_)
- replacing longer names with shorter ones for easier analysis

In [21]:
law.rename(columns = {"packging": "packaging"}, inplace = True)

Dealing with null values KNN imputations to avoid values being flat 

In [7]:
from sklearn.impute import KNNImputer

In [8]:
# Select only numeric columns (since KNN works on numbers)
numeric_law = law.select_dtypes(include = ["float64", "int64"])

In [9]:
# Apply KNN imputer (n_neighbors can be tuned, e.g., 3–7)
imputer = KNNImputer(n_neighbors = 5)
numeric_imputed = imputer.fit_transform(numeric_law)

In [10]:
# Put back into a DataFrame with original column names
numeric_imputed_law = pd.DataFrame(numeric_imputed, columns = numeric_law.columns)

In [11]:
# Replace numeric columns in original df with imputed ones
law[numeric_law.columns] = numeric_imputed_law

In [12]:
law.isna().sum()

Food product                                                               0
Land use change                                                            0
Animal Feed                                                                0
Farm                                                                       0
Processing                                                                 0
Transport                                                                  0
Packging                                                                   0
Retail                                                                     0
Total_emissions                                                            0
Eutrophying emissions per 1000kcal (gPO₄eq per 1000kcal)                   0
Eutrophying emissions per kilogram (gPO₄eq per kilogram)                   0
Eutrophying emissions per 100g protein (gPO₄eq per 100 grams protein)      0
Freshwater withdrawals per 1000kcal (liters per 1000kcal)                  0

In [13]:
law.columns = (
    law.columns
      .str.strip()
      .str.lower()
      .str.replace(" ", "_")
      .str.replace(r"[^\w\s]", "", regex = True)
)

In [20]:
law.head(10)

Unnamed: 0,food_product,land_use_change,animal_feed,farm,processing,transport,packaging,retail,total_emissions,eutrophying_per_1000kcal,...,water_per_100g_protein,water_per_kg,GHG_per_1000kcal,GHG_per_100g_protein,land_use_per_1000kcal,land_use_per_kg,land_use_per_100g_protein,scarcity_water_per_kg,scarcity_water_per_100g_protein,scarcity_water_per_1000kcal
0,Wheat & Rye (Bread),0.1,0.0,0.8,0.2,0.1,0.1,0.1,1.4,11.797335,...,1662.177198,367.88,3.001609,7.496959,3.136403,5.614,9.569704,17283.34,59530.957318,15235.807563
1,Maize (Meal),0.3,0.0,0.5,0.1,0.1,0.1,0.0,1.1,11.725664,...,2018.79656,267.9,3.351203,11.792653,3.13393,2.674,15.257124,12795.82,71384.860265,23750.56314
2,Barley (Beer),0.0,0.0,0.2,0.1,0.0,0.5,0.3,1.1,4.128663,...,2018.79656,158.86,1.254337,11.792653,2.396098,1.534,15.257124,6692.4,71384.860265,23750.56314
3,Oatmeal,0.0,0.0,1.4,0.0,0.1,0.1,0.0,1.6,4.281357,...,371.076923,482.4,0.945482,1.907692,2.897446,7.6,5.846154,18786.2,14450.92308,7162.104461
4,Rice,0.0,0.0,3.6,0.1,0.1,0.1,0.1,4.0,9.514379,...,3166.760563,2248.4,1.207271,6.267606,0.759631,2.8,3.943662,49576.3,69825.77465,13449.89148
5,Potatoes,0.0,0.0,0.2,0.0,0.1,0.0,0.0,0.3,4.754098,...,347.647059,59.1,0.628415,2.705882,1.202186,0.88,5.176471,2754.2,16201.17647,3762.568306
6,Cassava,0.6,0.0,0.2,0.0,0.1,0.0,0.0,0.9,0.708419,...,409.248856,0.0,1.355236,14.666667,1.858316,1.81,20.111111,0.0,8087.620337,1347.863535
7,Cane Sugar,1.2,0.0,0.5,0.0,0.8,0.1,0.0,2.6,4.820513,...,330.953646,620.1,0.911681,2.618427,0.581197,2.04,5.836381,16438.6,12383.949651,4683.361823
8,Beet Sugar,0.0,0.0,0.5,0.2,0.6,0.1,0.0,1.4,1.541311,...,440.781883,217.7,0.51567,21.97013,0.521368,1.83,31.29038,9493.3,9321.332348,2704.643875
9,Other Pulses,0.0,0.0,1.1,0.0,0.1,0.4,0.0,1.6,5.008798,...,203.503036,435.7,0.524927,0.836058,4.565982,15.57,7.272303,22477.4,10498.55208,6639.112895


In [15]:
# Dictionary to rename long columns
rename_map = {
    "greenhouse_gas_emissions_per_1000kcal_kgco₂eq_per_1000kcal": "GHG_per_1000kcal",
    "greenhouse_gas_emissions_per_100g_protein_kgco₂eq_per_100g_protein": "GHG_per_100g_protein",
    "land_use_per_1000kcal_m²_per_1000kcal": "land_use_per_1000kcal",
    "land_use_per_kilogram_m²_per_kilogram": "land_use_per_kg",
    "land_use_per_100g_protein_m²_per_100g_protein": "land_use_per_100g_protein",
    "freshwater_withdrawals_per_100g_protein_liters_per_100g_protein": "water_per_100g_protein",
    "freshwater_withdrawals_per_kilogram_liters_per_kilogram": "water_per_kg",
    "freshwater_withdrawals_per_1000kcal_liters_per_1000kcal": "water_per_1000kcal",
    "scarcityweighted_water_use_per_kilogram_liters_per_kilogram": "scarcity_water_per_kg",
    "scarcityweighted_water_use_per_100g_protein_liters_per_100g_protein": "scarcity_water_per_100g_protein",
    "scarcityweighted_water_use_per_1000kcal_liters_per_1000_kilocalories": "scarcity_water_per_1000kcal",
    "eutrophying_emissions_per_1000kcal_gpo₄eq_per_1000kcal": "eutrophying_per_1000kcal",
    "eutrophying_emissions_per_kilogram_gpo₄eq_per_kilogram": "eutrophying_per_kg",
    "eutrophying_emissions_per_100g_protein_gpo₄eq_per_100_grams_protein": "eutrophying_per_100g_protein",
}



In [16]:
# Apply renaming
law = law.rename(columns = rename_map)

In [22]:
law.head(10)

Unnamed: 0,food_product,land_use_change,animal_feed,farm,processing,transport,packaging,retail,total_emissions,eutrophying_per_1000kcal,...,water_per_100g_protein,water_per_kg,GHG_per_1000kcal,GHG_per_100g_protein,land_use_per_1000kcal,land_use_per_kg,land_use_per_100g_protein,scarcity_water_per_kg,scarcity_water_per_100g_protein,scarcity_water_per_1000kcal
0,Wheat & Rye (Bread),0.1,0.0,0.8,0.2,0.1,0.1,0.1,1.4,11.797335,...,1662.177198,367.88,3.001609,7.496959,3.136403,5.614,9.569704,17283.34,59530.957318,15235.807563
1,Maize (Meal),0.3,0.0,0.5,0.1,0.1,0.1,0.0,1.1,11.725664,...,2018.79656,267.9,3.351203,11.792653,3.13393,2.674,15.257124,12795.82,71384.860265,23750.56314
2,Barley (Beer),0.0,0.0,0.2,0.1,0.0,0.5,0.3,1.1,4.128663,...,2018.79656,158.86,1.254337,11.792653,2.396098,1.534,15.257124,6692.4,71384.860265,23750.56314
3,Oatmeal,0.0,0.0,1.4,0.0,0.1,0.1,0.0,1.6,4.281357,...,371.076923,482.4,0.945482,1.907692,2.897446,7.6,5.846154,18786.2,14450.92308,7162.104461
4,Rice,0.0,0.0,3.6,0.1,0.1,0.1,0.1,4.0,9.514379,...,3166.760563,2248.4,1.207271,6.267606,0.759631,2.8,3.943662,49576.3,69825.77465,13449.89148
5,Potatoes,0.0,0.0,0.2,0.0,0.1,0.0,0.0,0.3,4.754098,...,347.647059,59.1,0.628415,2.705882,1.202186,0.88,5.176471,2754.2,16201.17647,3762.568306
6,Cassava,0.6,0.0,0.2,0.0,0.1,0.0,0.0,0.9,0.708419,...,409.248856,0.0,1.355236,14.666667,1.858316,1.81,20.111111,0.0,8087.620337,1347.863535
7,Cane Sugar,1.2,0.0,0.5,0.0,0.8,0.1,0.0,2.6,4.820513,...,330.953646,620.1,0.911681,2.618427,0.581197,2.04,5.836381,16438.6,12383.949651,4683.361823
8,Beet Sugar,0.0,0.0,0.5,0.2,0.6,0.1,0.0,1.4,1.541311,...,440.781883,217.7,0.51567,21.97013,0.521368,1.83,31.29038,9493.3,9321.332348,2704.643875
9,Other Pulses,0.0,0.0,1.1,0.0,0.1,0.4,0.0,1.6,5.008798,...,203.503036,435.7,0.524927,0.836058,4.565982,15.57,7.272303,22477.4,10498.55208,6639.112895


Q1
- Which foods have the highest greenhouse gas emissions per kilogram?

In [22]:
top_ghg = law.sort_values(by = "GHG_per_100g_protein", ascending = False).head(5)

In [23]:
top_ghg["GHG_per_100g_protein"] = top_ghg["GHG_per_100g_protein"].round(2)

In [25]:
fig = px.bar(
    top_ghg,
    x = "food_product",
    y = "GHG_per_100g_protein",
    color = "GHG_per_100g_protein",
    color_continuous_scale = "Reds",
    title = "Top 5 Foods by GHG Emissions per 100g Protein",
    labels = {"GHG_per_100g_protein": "GHG per 100g protein (kgCO2eq)", "food_product": "Food Product"},
    text = "GHG_per_100g_protein"
)

fig.update_layout(
    xaxis_title = "Food Product",
    yaxis_title = "GHG per 100g protein (kgCO2eq)",
    yaxis = dict(
        showticklabels = False
    ),
    template = "plotly_white",
    width = 700,
    height = 500,
    coloraxis_showscale = False

)
fig.show()

Q2
- How does land use differ between plant-based and animal-based foods?

In [81]:
plant_foods = ["Wheat & Rye (Bread)", "Rice", "Oatmeal", "Potatoes", "Cassava", "Other Pulses", "Maize (Meal)", "Barley (Beer)"]
law["food_type"] = law["food_product"].apply(lambda x: "Plant" if x in plant_foods else "Animal")

In [82]:
fig = px.violin(
    data_frame = law,
    x = "food_type",
    y = "land_use_per_kg",
    color = "food_type",
    title = "Land Use per kg: Plant vs Animal Foods",
    labels = {"land_use_per_kg": "Land Use per kg (m²)", "food_type": "Food Type"}
)

fig.update_layout(
    template = "plotly_white",
    legend_title_text = "",
    width = 700,
    height = 500,
    yaxis = dict(
        showticklabels = True,
        title = "Land Use per kg (m²)"
    )

)

Q3
- Which 5 foods use the most freshwater per kilogram?

In [29]:
top_water = law.sort_values(by = "water_per_kg", ascending = False).head(5)

In [31]:
fig = px.bar(
    top_water,
    x = "food_product",
    y = "water_per_kg",
    color = "water_per_kg",
    color_continuous_scale = "Blues",
    title = "Top 5 Foods by GHG Emissions per 100g Protein",
    labels = {"water_per_kg": "GHG per 100g protein (kgCO2eq)", "food_product": "Food Product"},
    text = "water_per_kg"
)

fig.update_layout(
    xaxis_title = "Food Product",
    yaxis_title = "Water Use per kg (liters)",
    yaxis = dict(
        showticklabels = False
    ),
    template = "plotly_white",
    width = 700,
    height = 500,
    coloraxis_showscale = False

)
fig.show()

Q4
- Which foods have the lowest overall environmental impact?

In [77]:
lowest_impact = law.sort_values(by = "total_emissions").head(5)

In [79]:
fig = px.bar(
    lowest_impact,
    x = "food_product",
    y = "total_emissions",
    color = "total_emissions",
    color_continuous_scale = "Greens",
    title = "Foods with Lowest Total Emissions",
    labels = {"total_emissions": "GHG per 100g protein (kgCO2eq)", "food_product": "Food Product"},
    text = "total_emissions"
)

fig.update_layout(
    xaxis_title = "Food Product",
    yaxis_title = "Total Emmision",
    yaxis = dict(
        showticklabels = False
    ),
    template = "plotly_white",
    width = 700,
    height = 500,
    coloraxis_showscale = False

)
fig.show()

Q5
- What are the top five foods with the highest combined emissions from the farm and feed stages?

In [23]:
law["pack_transport"] = law["packaging"] + law["transport"]
law["farm_feed"] = law["farm"] + law["animal_feed"]

In [24]:
top5_farm_feed = law.sort_values(by = "farm_feed", ascending = False).head(5)

In [26]:
top5_farm_feed["farm_feed"] = top5_farm_feed["farm_feed"].round(2)

In [64]:
custom_colors = {
    "Beef (beef herd)": "#3E1E68",
    "Lamb & Mutton": "#5D2F77",
    "Beef (dairy herd)": "#E45A92",
    "Cheese": "#F564A9",
    "Shrimps (farmed)": "#FFACAC"
}

In [84]:
fig1 = px.bar(
    top5_farm_feed,
    x = "food_product",
    y = "farm_feed",
    title = "Top 5 Foods by Farm Feed",
    labels = {
        "farm_feed": "Farm Feed",
        "food_product": "Food Product"
    },
    text = "farm_feed",
    color = "food_product",
    color_discrete_map = custom_colors
)
fig1.update_xaxes(categoryorder = "total descending")
fig1.update_layout(
    template = "plotly_white",
    yaxis = dict(
        showticklabels = False
    ),
    width = 800,
    height = 500, )
fig1.show()


Q6
- What are the top five foods with the highest combined emissions from the packing and transport?

In [32]:
top5_pack_transport = law.sort_values(by = "pack_transport", ascending = False).head(5)

In [36]:
top5_pack_transport["pack_transport"] = top5_pack_transport["pack_transport"].round(2)

In [62]:
custom_colors = {
    "Coffee": "#264653",
    "Olive Oil": "#5A827E",
    "Soybean Oil": "#84AE92",
    "Palm Oil": "#B9D4AA",
    "Sunflower Oil": "#FAFFCA"
}

In [63]:
fig2 = px.bar(
    top5_pack_transport,
    x = "food_product",
    y = "pack_transport",
    title = "Top 5 Foods by Packaging and Transport",
    labels = {
        "pack_transport": "Packaging and Transport",
        "food_product": "Food Product"
    },
    text = "pack_transport",
    color = "food_product",
    color_discrete_map = custom_colors
)
fig2.update_xaxes(categoryorder = "total descending")
fig2.update_layout(
    template = "plotly_white",
    yaxis = dict(
        showticklabels = False
    ),
    width = 700,
    height = 500)
fig2.show()

Q7
- Top five foods that give the best protein with the lowest emissions?

In [74]:
best_protein = law.sort_values(by = "GHG_per_100g_protein")[["food_product", "GHG_per_100g_protein", "water_per_100g_protein"]].head(5)

In [85]:
fig = px.scatter(
    best_protein,
    x = "GHG_per_100g_protein",
    y = "water_per_100g_protein",
    color = "food_product",
    size = [20]*len(best_protein),
    title = "Best Protein with Lowest Emissions & Water Use",
    labels = {
        "GHG_per_100g_protein": "GHG per 100g protein (kgCO2eq)",
        "water_per_100g_protein": "Water per 100g protein (liters)"
    },
    text = "food_product"
)

fig.update_layout(
    template = "plotly_white",
    legend_title_text = "Food Product",
    width = 800,
    height = 500,
    yaxis = dict(
        showticklabels = True 
        )
)
fig.show()