In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Global CO2 Emissions
Below project aims to visualize the trends on total annual carbon dioxide emissions worldwide using `Altair`. This will be done on a global/worldwide scale, and also by continent/region. In addition, we will pick some countries (member countries of the G20), and see how much each contributes to the CO2 emission.

## Data Import and Manipulation
Key important aspects in this section:
* The feature `Entity` is comprised of the following:
    1. Country Names - e.g. United States, Canada, France, etc.
    2. Region/Continents
        - Africa
        - EU-28 - refers members of European Union
        - Europe (other) - refers to Europe excluding EU nations
        - Asia and Pacific (other) - refers to Asian countries excluding China and India
        - Middle East - refers to Middle Eastern countries
        - Americas (other) - not including United States
    3. International transport - refers to aviation and shipping that contributes to global CO2 emissions
    4. Statistical differences - refers to the difference between global emissions and the sum of all national emissions plus bunker fuels.
*  There are missing values in the `Code` column and since we are not going to use this column during  visualization, we will drop this instead.
* Adding new columns to the dataframe: 
    1. `Annual_Emissions` - derived from column `Annual CO₂ emissions (tonnes )` divided by 1 Billion. This will give us a smaller range to work on. Note that the unit for this column is `billion tonnes`.
    2. `DT_Year` - derived from `Year` column.
* Removing columns:
    1. `Code` - there are many missing values in this column; this column is not critical to visualization since it only contains codes unique to each country.
    2. `Annual CO₂ emissions (tonnes )` - this has a very wide range (between 0 - 36 Billion) and is not ideal in visualization as it affects the scaling. 


In [None]:
import pandas as pd
import altair as alt

In [None]:
df = pd.read_csv('/kaggle/input/co2-ghg-emissionsdata/co2_emission.csv',parse_dates=['Year'])
df['Annual_Emissions'] = df['Annual CO₂ emissions (tonnes )']/1000000000
df.drop(['Annual CO₂ emissions (tonnes )', 'Code'],axis=1, inplace=True)
df['DT_Year']=df.Year.dt.year
df.head()

### Create separate dataframe consisting continent/region
I have included China, United States, and International transport in this data because they have significant amount of CO2 emissions.

In [None]:
# Create separate dataframes for each continent
mideast = df[df['Entity'] == 'Middle East']
africa = df[df['Entity'] == 'Africa']
americas = df[df['Entity'] == 'Americas (other)']
apac = df[df['Entity'] == 'Asia and Pacific (other)']
euro = df[df['Entity'] == 'EU-28']
europe = df[df['Entity'] == 'Europe (other)']
usa = df[df['Entity'] == 'United States']
china = df[df['Entity'] == 'China']
transport = df[df['Entity'] == 'International transport']
world = df[df['Entity'] == 'World']

conts = [africa, americas, apac, euro, europe, mideast, usa, china, transport, world]
continents = pd.concat(conts)
print(continents['Entity'].unique())

### Create separate dataframe for G20 countries

In [None]:
g20 = df.query("Entity == ['Argentina', 'Australia', 'Brazil', 'Canada', 'Saudi Arabia','China', 'France', 'Germany', 'India', 'United States','Indonesia', 'Italy', 'Japan', 'Mexico', 'Russia','South Africa', 'South Korea', 'Turkey', 'United Kingdom', 'Spain']")
print(g20['Entity'].unique())

## Data Visualization

### By Region
Below area graph shows the trend of CO2 emissions through the course of time.
* From 1830, CO2 emission in the United States and EU were increasing. This period was part of First Industrial Revolution when most production processes were transitioning to machine-operated. Since then an upward trend has been seen on carbon emissions around the world.
* From 1984, there's a significant reduction in CO2 emissions across Europe. Similar decline in emissions was seen in the United States from 2006.
* From 1950, Chinas's CO2 emission has been on the rise and it is currently the number contributor in the world.
* International transport also contributes to the levels of emission each year. With the current coronavirus pandemic affecting most of the travels around the world, it is interesting to see how this affect the CO2 emissions.

With the downward trend being seen in European Union and  United States, it is important to know what are the mitigation measures and policies they put in place to achieve this.

You can hover through the graph to select the continent/region you want to check.

In [None]:
multi = alt.selection_multi(on='mouseover')

alt.Chart(continents).mark_area(opacity=0.5, line=True).encode(
        x = alt.X('year(Year):T'),
        y = alt.Y('Annual_Emissions', title='Annual Emissions (in billion tonnes)'),
        color = alt.condition(multi,'Entity', alt.value('lightgray'), title='Region'),
        tooltip = [alt.Tooltip('Year:T', format='%Y'),
                   'Entity', 'Annual_Emissions']
).properties(
        width = 600,
        height = 400,
        title='Total Annual CO2 emissions, by region',
        selection = multi
)

### By region: Interactive Bar Graph

Here is another illustration on how each continent/region contributes to the worldwide level of CO2 emissions. The `World` bar graph is the total annual CO2 emission. You can use the slider to select the year and to output the corresponding CO2 emission. 

In [None]:
# Year slider
year_slider = alt.binding_range(min=1751, max=2017, step=1)
slider = alt.selection_single(bind=year_slider, fields=['DT_Year'], name='Select', init={'DT_Year':2017})

# Single selection
click = alt.selection_single()

# Main Chart

alt.Chart(continents).mark_bar().encode(
        x = alt.X('Entity', title='Country/Region'),
        y = alt.Y('Annual_Emissions', title='Annual Emissions (in billion tonnes)'),
        color = alt.condition(click, 'Entity', alt.value('lightgray'),title='Country/Region'),
        tooltip= ['Annual_Emissions']
).properties(
    width=600,
    height=400,
    title='Total Annual CO2 emissions, by region',
    selection=slider
).transform_filter(
    slider
).add_selection(
    click
)

### G20 Countries
Here is an interactive chart to visualize how G20 member countries affects the CO2 emission levels in the world. Same as the previous graph, you can choose the year by using the slider.

In [None]:
# G20 plot

# Year slider
year_slider = alt.binding_range(min=1751, max=2017, step=1)
slider = alt.selection_single(bind=year_slider, fields=['DT_Year'], name='Select', init={'DT_Year':2017})

# Single selection
click = alt.selection_single()

# Main Chart

alt.Chart(g20).mark_bar().encode(
        x = alt.X('Entity', title='Countries'),
        y = alt.Y('Annual_Emissions', title='Annual Emissions (in billion tonnes)'),
        color = alt.condition(click, 'Entity', alt.value('lightgray'), title='Countries'),
        tooltip= ['Annual_Emissions']
).properties(
    width=600,
    height=400,
    title='Total Annual CO2 emissions, by G20 countries',
    selection=slider
).transform_filter(
    slider
).add_selection(
    click
)

### G20 Countries: Trend
You can check the CO2 trend for each G20 country using the dropdown menu.

In [None]:
country_dr = alt.binding_select(options=[None,'Argentina', 'Australia', 'Brazil', 'Canada', 'Saudi Arabia','China', 
                                         'France', 'Germany', 'India', 'United States','Indonesia', 'Italy', 
                                         'Japan', 'Mexico', 'Russia','South Africa', 'South Korea', 'Turkey', 
                                         'United Kingdom', 'Spain'])
country_sl = alt.selection_single(fields=['Entity'], bind=country_dr, name="Country", clear='click')


alt.Chart(g20).mark_area(opacity=0.5, line=True).encode(
        x = alt.X('year(Year):T', title='Year'),
        y = alt.Y('Annual_Emissions:Q', title='Annual Emissions (in billion tonnes)'),
        color = ('Entity'),
        tooltip = [alt.Tooltip('Year:T', format='%Y'),
                   'Entity', 'Annual_Emissions']
).properties(
        width = 600,
        height = 400,
        title = 'Annual CO2 Emissions, by G20 countries'
).add_selection(
    country_sl
).transform_filter(
    country_sl
)