In [2]:
import pandas as pd
import streamlit as st
import altair as alt
import matplotlib.pyplot as plt

df_countries = pd.read_csv('countries.csv')

df_countries.head()

Unnamed: 0,Country,Region,Population (millions),HDI,GDP per Capita,Cropland Footprint,Grazing Footprint,Forest Footprint,Carbon Footprint,Fish Footprint,...,Cropland,Grazing Land,Forest Land,Fishing Water,Urban Land,Total Biocapacity,Biocapacity Deficit or Reserve,Earths Required,Countries Required,Data Quality
0,Afghanistan,Middle East/Central Asia,29.82,0.46,$614.66,0.3,0.2,0.08,0.18,0.0,...,0.24,0.2,0.02,0.0,0.04,0.5,-0.3,0.46,1.6,6
1,Albania,Northern/Eastern Europe,3.16,0.73,"$4,534.37",0.78,0.22,0.25,0.87,0.02,...,0.55,0.21,0.29,0.07,0.06,1.18,-1.03,1.27,1.87,6
2,Algeria,Africa,38.48,0.73,"$5,430.57",0.6,0.16,0.17,1.14,0.01,...,0.24,0.27,0.03,0.01,0.03,0.59,-1.53,1.22,3.61,5
3,Angola,Africa,20.82,0.52,"$4,665.91",0.33,0.15,0.12,0.2,0.09,...,0.2,1.42,0.64,0.26,0.04,2.55,1.61,0.54,0.37,6
4,Antigua and Barbuda,Latin America,0.09,0.78,"$13,205.10",,,,,,...,,,,,,0.94,-4.44,3.11,5.7,2


In [3]:
#Cleaning 'GDP per Capita' - remove $/, and convert string value to float
df_countries['GDP per Capita'] = df_countries['GDP per Capita'].str.replace('$', '').str.replace(',', '').astype('float')

In [4]:
#Transforming dataset in order to accomdate countries without component footprint data for the purpose of visualization
df_countries.loc[df_countries['Cropland Footprint'].isnull(), 'No Footprint Component Data'] = df_countries['Total Ecological Footprint']
#New column is created that contains the Total Ecological Footprint values for only countries where cropland is null

In [5]:
#Same thing as above but for biocapacity
df_countries.loc[df_countries['Cropland'].isnull(), 'No Biocapacity Component Data'] = df_countries['Total Ecological Footprint']
#New column is now created that contains the Total Ecological Footprint values for only countries where cropland is null

In [6]:
df_countries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 188 entries, 0 to 187
Data columns (total 23 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Country                         188 non-null    object 
 1   Region                          188 non-null    object 
 2   Population (millions)           188 non-null    float64
 3   HDI                             172 non-null    float64
 4   GDP per Capita                  173 non-null    float64
 5   Cropland Footprint              173 non-null    float64
 6   Grazing Footprint               173 non-null    float64
 7   Forest Footprint                173 non-null    float64
 8   Carbon Footprint                173 non-null    float64
 9   Fish Footprint                  173 non-null    float64
 10  Total Ecological Footprint      188 non-null    float64
 11  Cropland                        173 non-null    float64
 12  Grazing Land                    173 

In [7]:
#Plotting Total Ecological Footprint per capita
alt.Chart(df_countries).transform_fold(
    ['Cropland Footprint', 'Grazing Footprint', 'Forest Footprint', 'Carbon Footprint', 'Fish Footprint', 'No Footprint Component Data'], 
    as_=['Footprint', 'Total Ecological Footprint (per capita)']
).mark_bar().encode(
    x=alt.X('Country:N', sort='-y'),
    y='Total Ecological Footprint (per capita):Q',
    color='Footprint:N'
).interactive()


2021-04-29 22:29:19.711 INFO    numexpr.utils: NumExpr defaulting to 4 threads.


In [8]:
#Plotting Total Biocapacity per capita
alt.Chart(df_countries).transform_fold(
    ['Cropland', 'Grazing Land', 'Forest Land', 'Fishing Water', 'Urban Land', 'No Biocapacity Component Data'], 
    as_=['Biocapacity', 'Total Biocapacity (per capita)']
).mark_bar().encode(
    x=alt.X('Country:N', sort='-y'),
    y='Total Biocapacity (per capita):Q',
    color='Biocapacity:N'
)

Small countries but wealthy countries like Luxembourg and Qatar have a large Total Ecological footprint. What does this indicate? This indicates that the Total Ecological Footprint column (and by extension, the Total Biocapacity column) are actually measured in Total Ecological Footprint PER CAPITA and Total Biocapacity PER CAPITA. An additional transformation on the data must be performed to get the actual Total Ecological Footprint and Biocapacity - multiplying the current footprint values by a countries' population, which I'll do below.

In [9]:
df_countries_totals = df_countries.copy() #Creating a copy of df_countries, which will now hold data represnting the total biocapacity/footprint
df_countries_totals.loc[:, 'Cropland':'Total Biocapacity'] = df_countries.loc[:, 'Cropland':'Total Biocapacity'].multiply(df_countries['Population (millions)'], axis=0)
#Transforming dataset in order to accomdate countries without footprint/biocapacity components for the purpose of visualization
df_countries_totals.loc[df_countries_totals['Cropland Footprint'].isnull(), 'No Footprint Component Data'] = df_countries_totals['Total Ecological Footprint']
df_countries_totals.loc[df_countries_totals['Cropland'].isnull(), 'No Biocapacity Component Data'] = df_countries_totals['Total Biocapacity']

In [10]:
#Plotting Total Ecological Footprint
alt.Chart(df_countries_totals).transform_fold(
    ['Cropland Footprint', 'Grazing Footprint', 'Forest Footprint', 'Carbon Footprint', 'Fish Footprint', 'No Footprint Component Data'], 
    as_=['Footprint', 'Total Ecological Footprint']
).mark_bar().encode(
    x=alt.X('Country:N', sort='-y'),
    y=alt.Y('Total Ecological Footprint:Q', title='Total Ecological Footprint (in millions of gha)'),
    color='Footprint:N'
)

In [11]:
#Plotting total Biocapacity
alt.Chart(df_countries_totals).transform_fold(
    ['Cropland', 'Grazing Land', 'Forest Land', 'Fishing Water', 'Urban Land', 'No Biocapacity Component Data'], 
    as_=['Biocapacity', 'Total Biocapacity']
).mark_bar().encode(
    x=alt.X('Country:N', sort='-y'),
    y=alt.Y('Total Biocapacity:Q', title='Total Biocapacity (in millions of gha)'), 
    color='Biocapacity:N'
)

In [12]:
df_countries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 188 entries, 0 to 187
Data columns (total 23 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Country                         188 non-null    object 
 1   Region                          188 non-null    object 
 2   Population (millions)           188 non-null    float64
 3   HDI                             172 non-null    float64
 4   GDP per Capita                  173 non-null    float64
 5   Cropland Footprint              173 non-null    float64
 6   Grazing Footprint               173 non-null    float64
 7   Forest Footprint                173 non-null    float64
 8   Carbon Footprint                173 non-null    float64
 9   Fish Footprint                  173 non-null    float64
 10  Total Ecological Footprint      188 non-null    float64
 11  Cropland                        173 non-null    float64
 12  Grazing Land                    173 

In [13]:
df_countries_totals.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 188 entries, 0 to 187
Data columns (total 23 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Country                         188 non-null    object 
 1   Region                          188 non-null    object 
 2   Population (millions)           188 non-null    float64
 3   HDI                             172 non-null    float64
 4   GDP per Capita                  173 non-null    float64
 5   Cropland Footprint              173 non-null    float64
 6   Grazing Footprint               173 non-null    float64
 7   Forest Footprint                173 non-null    float64
 8   Carbon Footprint                173 non-null    float64
 9   Fish Footprint                  173 non-null    float64
 10  Total Ecological Footprint      188 non-null    float64
 11  Cropland                        173 non-null    float64
 12  Grazing Land                    173 

In [1]:
#Write dataframes to CSV in order to use in streamlit app
# df_countries.to_csv('countries_per_capita.csv')
# df_countries_totals.to_csv('countries_total.csv')

NameError: name 'df_countries' is not defined