# Recycling habits in European households: expectation vs. reality

## 0. Preamble and datasets

In [1]:
# If needed, first install Plotly in Anaconda, by opening the Anaconda prompt and executing the following line of code:
# conda install -c plotly plotly
# (see: https://anaconda.org/plotly/plotly)

# Import packages
import pandas as pd
import plotly.express as px
import numpy as np

# Import datasets
# You can find these files under: https://github.com/marie-lefevre/Environment-Graphs/tree/master/map-europe-waste-recycling
file1 = pd.read_excel (r'C:\Users\marie\Desktop\file1_excel.xlsx')
file2 = pd.read_excel (r'C:\Users\marie\Desktop\file2_excel.xlsx')


## 1. Data analysis

In [6]:
# Create df1 based on file1
df1 = file1
# Drop NA row at the last row
df1 = df1.drop(70)
# Keep only data for 2017
df1 = df1.loc[df1['date'] == 2017]
# Set country_code as index
df1.set_index('country_code',inplace=True)
# Understand the structure of df1
#df1.describe()
# Drop the column "date", "date_original" and "country_name_raw"
df1 = df1.drop(["date", 'date_original', "country_name_raw"], axis=1)

# Create df2 based on file2
df2 = file2
# Remove useless columns "code_raw" and "survey_perc"
df2 = df2.drop(["code_raw", 'survey_perc'], axis=1)
# Re-create a clean and accurate percentage of respondents
df2["survey_perc"] = df2["survey_abs"] / df2["pop_tot"]
# Set "country_code" as index
df2.set_index('country_code',inplace=True)
# Remove useless columns "pop_tot" and "survey_abs"
df2 = df2.drop(["pop_tot", 'survey_abs'], axis=1)
# Understand the structure of df2
#df2.describe()

# Merge the 2 datasets (inner join)
dfm = pd.merge(df1, df2, on='country_code', how='inner')
# Reset index
dfm = dfm.reset_index()
# Calculate difference between recycling_rate and survey_perc
dfm["diff_recycling_survey"] = dfm["recycling_rate"] - dfm["survey_perc"]

# Format "Respondent rate" in percentage
dfm["survey_perc"] = pd.Series([round(val, 2) for val in dfm['survey_perc']], index = dfm.index)
dfm["survey_perc"] = pd.Series(["{0:.0f}%".format(val * 100) for val in dfm['survey_perc']], index = dfm.index)
# Format "Recycling rate" in percentage
dfm["recycling_rate"] = pd.Series([round(val, 2) for val in dfm['recycling_rate']], index = dfm.index)
dfm["recycling_rate"] = pd.Series(["{0:.0f}%".format(val * 100) for val in dfm['recycling_rate']], index = dfm.index)

# Create categories for the perception of its own recycling behavior vs. actual recycling rate
conditions = [
    (dfm['diff_recycling_survey'] > 0),
    (dfm['diff_recycling_survey'] <= 0) & (dfm['diff_recycling_survey'] >= ((0+min(dfm["diff_recycling_survey"])) / 2)),
    (dfm['diff_recycling_survey'] < ((0+min(dfm["diff_recycling_survey"])) / 2))
    ]
choices = ['Actual recycling rate is higher than self-reported rate',\
           'Actual recycling rate and self-reported rate are similar',\
           'Self-reported recycling rate is higher than actual rate']
dfm["cat_recycling_perception"] = np.select(conditions, choices, default='null')

# Rename columns
dfm.columns = ["Country code", "Actual recycling rate", "country_name_clean", "Self-reported recycling rate", \
               "diff_recycling_survey", "Perception of own recycling behavior"]

# Display final dataframe 
dfm


Unnamed: 0,Country code,Actual recycling rate,country_name_clean,Self-reported recycling rate,diff_recycling_survey,Perception of own recycling behavior
0,AUT,58%,Austria,49%,0.089747,Actual recycling rate is higher than self-repo...
1,BEL,54%,Belgium,72%,-0.177,Actual recycling rate and self-reported rate a...
2,BGR,35%,Bulgaria,17%,0.17722,Actual recycling rate is higher than self-repo...
3,HRV,24%,Croatia,44%,-0.195294,Actual recycling rate and self-reported rate a...
4,CYP,16%,Cyprus,58%,-0.420838,Self-reported recycling rate is higher than ac...
5,CZE,38%,Czechia,67%,-0.292294,Actual recycling rate and self-reported rate a...
6,DNK,46%,Denmark,69%,-0.232615,Actual recycling rate and self-reported rate a...
7,EST,28%,Estonia,51%,-0.232413,Actual recycling rate and self-reported rate a...
8,UE28,46%,European Union (EU-28),65%,-0.192093,Actual recycling rate and self-reported rate a...
9,FIN,41%,Finland,77%,-0.362277,Self-reported recycling rate is higher than ac...


## 2. Visualization

In [7]:
# Build the figure (interactive map using Plotly)
    
fig = px.choropleth(dfm, 
                    locations='Country code', 
                    color='Perception of own recycling behavior',
                    color_discrete_sequence=["#00CC93", "#636EFA", "#EF553B"],
                    scope="europe",
                    hover_name="country_name_clean",
                    hover_data = ["Self-reported recycling rate", "Actual recycling rate"],
                    projection="natural earth"
                    )

fig.update_layout(
    title_text = 'Which countries are most consistent in their recycling behavior? (EU-28, 2017)',
    annotations=[
        dict(
            x=0.08,
            y=1.07,
            showarrow=False,
            text="Scale based on the comparison between actual recycling rate* and self-reported recycling rate**",
            xref="paper",
            yref="paper"
        ),
        dict(
            x=0.08,
            y=0.09,
            showarrow=False,
            text="*Percentage of municipal waste being recycled and composted",
            xref="paper",
            yref="paper"
        ),
        dict(
            x=0.08,
            y=0.05,
            showarrow=False,
            text="**Percentage of respondents reporting separating most of their waste for recycling in the past 6 months",
            xref="paper",
            yref="paper"
        ),
    ],
    autosize=True,
    margin={"r":0,"t":80,"l":0,"b":0}
)

fig.show()
