In [114]:
import pandas as pd
import plotly.express as px
import kaleido

# set the displayed rows to 500
pd.set_option('display.max_rows', 500)

In [115]:
df = pd.read_csv('Results_21Mar2022.csv')

In [116]:
df.head()

Unnamed: 0,mc_run_id,grouping,mean_ghgs,mean_land,mean_watscar,mean_eut,mean_ghgs_ch4,mean_ghgs_n2o,mean_bio,mean_watuse,...,sd_eut,sd_ghgs_ch4,sd_ghgs_n2o,sd_bio,sd_watuse,sd_acid,n_participants,sex,diet_group,age_group
0,1,fish_female_20-29,5.421821,8.612772,14165.8624,23.945372,1.164643,0.343663,476.270413,679.923479,...,8.167013,0.632547,0.146827,283.883428,249.472563,8.231176,1329,female,fish,20-29
1,1,fish_female_30-39,5.436673,8.680248,15222.77063,24.305262,1.154118,0.326868,467.923201,709.343785,...,8.467858,0.601941,0.130298,270.247108,277.927269,7.947014,2024,female,fish,30-39
2,1,fish_female_40-49,5.289333,8.467723,15818.80068,24.191105,1.108404,0.310324,438.659832,707.925368,...,8.107006,0.572941,0.119872,252.415063,275.760515,8.103589,1689,female,fish,40-49
3,1,fish_female_50-59,5.16902,8.219199,16962.06009,24.748154,1.032501,0.29813,395.787622,720.825105,...,9.514028,0.509689,0.104731,209.566722,310.422445,8.712428,946,female,fish,50-59
4,1,fish_female_60-69,5.380268,8.570718,18212.66232,25.55943,1.098828,0.310735,421.176867,749.392826,...,9.258068,0.609398,0.113401,264.599257,315.92257,8.616601,492,female,fish,60-69


In [117]:
# aggregate the measures in each monte carlo run by taking the mean
# means are taken first before standardising, want to take average of all monte carlo runs, don't care about variability between monte carlo runs
agg_df = df.groupby(['diet_group', 'age_group', 'sex']).agg(
    {'mean_ghgs': ['mean'], 
     'mean_land': ['mean'],
     'mean_watscar': ['mean'],
     'mean_eut': ['mean'],
     'mean_ghgs_ch4': ['mean'],
     'mean_ghgs_n2o': ['mean'],
     'mean_bio': ['mean'],
     'mean_watuse': ['mean'],
     'mean_acid': ['mean']})

In [118]:
measures = ['mean_ghgs', 'mean_land', 'mean_watscar', 'mean_eut', 'mean_ghgs_ch4', 'mean_ghgs_n2o', 'mean_bio', 'mean_watuse', 'mean_acid']

# calculate Z scores for each measure
for measure in measures:
    agg_df["z_" + measure] = (agg_df[measure] - agg_df[measure].mean()) / agg_df[measure].std()

    # global_mean = df[measure].mean()
    # global_std = df[measure].std()
    # agg_df["z_" + measure] = (agg_df[measure] - global_mean) / global_std

In [119]:
# get column names of all the Z values
cols = list((c[0] for c in list(agg_df.columns.values)[9:]))

# make a copy of the dataframe to append the total impact to
total_impact_df = agg_df.copy()

# sum the Z scores of each measure and append it to total_impact_df
total_impact_df['total_impact'] = total_impact_df[cols].sum(axis=1, numeric_only=True)

# resetting the index to remove the unnecessary groupings
total_impact_df.reset_index(inplace=True)
# also need to remove the second level of the multindex created by the groupby
total_impact_df.columns = total_impact_df.columns.droplevel(1)


In [120]:
total_impact_df.head()

Unnamed: 0,diet_group,age_group,sex,mean_ghgs,mean_land,mean_watscar,mean_eut,mean_ghgs_ch4,mean_ghgs_n2o,mean_bio,...,z_mean_ghgs,z_mean_land,z_mean_watscar,z_mean_eut,z_mean_ghgs_ch4,z_mean_ghgs_n2o,z_mean_bio,z_mean_watuse,z_mean_acid,total_impact
0,fish,20-29,female,4.902947,6.489031,17131.963352,20.985464,0.842864,0.322486,248.135212,...,-0.45224,-0.619829,-0.495698,-0.3667,-0.410964,-0.561071,-0.29587,0.107994,-0.433417,-3.527796
1,fish,20-29,male,5.659689,7.381889,19671.024654,24.162964,0.925972,0.366329,273.612309,...,-0.1829,-0.485563,0.559703,-0.04606,-0.295207,-0.346789,-0.012048,0.670751,-0.133587,-0.2717
2,fish,30-39,female,4.942664,6.515886,17652.62854,20.948442,0.8365,0.321971,249.827684,...,-0.438104,-0.61579,-0.279275,-0.370436,-0.419827,-0.56359,-0.277015,0.110164,-0.389292,-3.243166
3,fish,30-39,male,5.327206,6.977106,18953.095158,22.644064,0.854537,0.343782,262.360838,...,-0.301238,-0.546433,0.261285,-0.199332,-0.394705,-0.456986,-0.137393,0.436964,-0.25765,-1.595487
4,fish,40-49,female,4.92704,6.540682,17729.964756,20.713798,0.841106,0.320413,252.846922,...,-0.443665,-0.612062,-0.247129,-0.394114,-0.413412,-0.571204,-0.24338,0.048542,-0.370041,-3.246464


In [121]:
# select columns used for the treemap
treemap_df = total_impact_df[['diet_group', 'age_group', 'sex', 'total_impact']].copy()

# make all total impact values positive and save it to another column (Z scores can be negative)]
# this is used for treemap area (area can't be negative)
treemap_df.loc[:, 'total_impact_abs'] = treemap_df['total_impact'].abs()

In [122]:
# renames the diet groups and sexes to more representative values
treemap_df['diet_group'] = treemap_df['diet_group'].replace({
    'meat100': 'High Meat', 
    'meat': 'Medium Meat', 
    'meat50': 'Low Meat',
    'vegan': 'Vegan',
    'veggie': 'Vegetarian',
    'fish': 'Fish'})
treemap_df['sex'] = treemap_df['sex'].replace({'male': 'Male', 'female': 'Female'})

In [123]:
treemap_df

Unnamed: 0,diet_group,age_group,sex,total_impact,total_impact_abs
0,Fish,20-29,Female,-3.527796,3.527796
1,Fish,20-29,Male,-0.2717,0.2717
2,Fish,30-39,Female,-3.243166,3.243166
3,Fish,30-39,Male,-1.595487,1.595487
4,Fish,40-49,Female,-3.246464,3.246464
5,Fish,40-49,Male,-2.383545,2.383545
6,Fish,50-59,Female,-2.729887,2.729887
7,Fish,50-59,Male,-2.412383,2.412383
8,Fish,60-69,Female,-1.873912,1.873912
9,Fish,60-69,Male,-2.016391,2.016391


In [None]:
# creates the treemap
# hierarchy: diet group -> age group -> sex
# area: total impact (absolute value), larger rectangles mean more difference (positive or negative) from mean
# colour: total impact, dark green less impact than mean, dark red more impact than mean
fig = px.treemap(data_frame=treemap_df, path=[px.Constant('All Groups'), 'diet_group', 'age_group', 'sex'], values='total_impact_abs', color='total_impact',
           color_continuous_scale=['darkgreen', 'white', '#bb0000']
)

fig.update_layout(
    title="Environmental Impact by Diet Group",
    title_font_size=24,
    coloraxis_colorbar=dict(  # styling for the legend scale bar
        title="Difference From Mean Environmental Impact",
        title_font_size=16,
        tickfont_size=12,
        orientation="h",
        xanchor="center", 
        x=0.5,
        y=-0.3,
        title_side="bottom"
    ),
    width=1500,
    height=685 
)

# show group and total impact on hovering over rectangles (groupings)
fig.update_traces(hovertemplate='Group = %{label}<br>Total Impact = %{color}<extra></extra>')

fig.show()

fig.write_html("treemap_total_impact.html")

In [None]:
import pandas as pd
import plotly.express as px

# Load data
df = pd.read_csv('Results_21Mar2022.csv')

# Aggregate the measures by taking the mean
agg_df = df.groupby(['diet_group', 'age_group', 'sex']).agg(
    {
        'mean_ghgs': ['mean'], 
        'mean_land': ['mean'],
        'mean_watscar': ['mean'],
        'mean_eut': ['mean'],
        'mean_ghgs_ch4': ['mean'],
        'mean_ghgs_n2o': ['mean'],
        'mean_bio': ['mean'],
        'mean_watuse': ['mean'],
        'mean_acid': ['mean']
    }
)
measures = ['mean_ghgs', 'mean_land', 'mean_watscar', 'mean_eut', 'mean_ghgs_ch4', 'mean_ghgs_n2o', 'mean_bio', 'mean_watuse', 'mean_acid']

# Calculate Z-scores for each measure
for measure in measures:
    agg_df["z_" + measure] = (agg_df[measure] - agg_df[measure].mean()) / agg_df[measure].std()

# Reshape the data into long format
agg_df.reset_index(inplace=True)  # Reset index
agg_df.columns = agg_df.columns.droplevel(1)  # Drop second level of MultiIndex
long_df = agg_df.melt(
    id_vars=['diet_group', 'age_group', 'sex'], 
    value_vars=[f"z_{measure}" for measure in measures], 
    var_name='measure', 
    value_name='z_score'
)

# Rename columns for better labels
long_df['diet_group'] = long_df['diet_group'].replace({
    'meat100': 'High Meat', 
    'meat': 'Medium Meat', 
    'meat50': 'Low Meat',
    'vegan': 'Vegan',
    'veggie': 'Vegetarian',
    'fish': 'Fish'
})
long_df['sex'] = long_df['sex'].replace({'male': 'Male', 'female': 'Female'})
long_df['measure'] = long_df['measure'].str.replace('z_', '').str.replace('_', ' ').str.title()

long_df.loc[:, 'z_score_abs'] = long_df['z_score'].abs()

# Create the treemap
fig = px.treemap(
    data_frame=long_df,
    path=[px.Constant('All Groups'), 'diet_group', 'age_group', 'sex', 'measure'],  # Add 'measure' to the hierarchy
    values='z_score_abs',  # Use Z-score as the value
    color='z_score',  # Color by Z-score
    color_continuous_scale=['darkgreen', 'white', '#bb0000']  # Custom color scale
)

# Update layout
fig.update_layout(
    title="Environmental Impact by Measure and Diet Group",
    title_font_size=24,
    coloraxis_colorbar=dict(
        title="Z-Score",
        title_font_size=16,
        tickfont_size=12,
        orientation="h",
        xanchor="center", 
        x=0.5,
        y=-0.05,
        title_side="bottom"
    ),
    width=2000,
    height=2000
)

# Update hover template to show relevant information
fig.update_traces(hovertemplate='Group = %{label}<br>Measure = %{parent}<br>Z-Score = %{color}<extra></extra>')

# Show the plot
fig.show()

fig.write_html("treemap_individual_measures.html")


FileNotFoundError: [Errno 2] No such file or directory: 'Results_21Mar2022.csv'