# CO2 Dashboard

In [4]:
# Data processing packages
import numpy as np
import pandas as pd

# Dashboard packages
import panel as pn
pn.extension('tabulator')

# plot packages
import hvplot.pandas

from bokeh.server.server import Server
from bokeh.application import Application
from bokeh.application.handlers.function import FunctionHandler
from co2_dashboard import create_dashboard

ModuleNotFoundError: No module named 'co2_dashboard'

In [None]:
# Load the dataset
df = pd.read_csv('owid-co2-data.csv')

In [None]:
# Display the first five rows of the dataset
df.head()

Unnamed: 0,country,year,iso_code,population,gdp,cement_co2,cement_co2_per_capita,co2,co2_growth_abs,co2_growth_prct,...,share_global_other_co2,share_of_temperature_change_from_ghg,temperature_change_from_ch4,temperature_change_from_co2,temperature_change_from_ghg,temperature_change_from_n2o,total_ghg,total_ghg_excluding_lucf,trade_co2,trade_co2_share
0,Afghanistan,1850,AFG,3752993.0,,,,,,,...,,,,,,,,,,
1,Afghanistan,1851,AFG,3767956.0,,,,,,,...,,0.165,0.0,0.0,0.0,0.0,,,,
2,Afghanistan,1852,AFG,3783940.0,,,,,,,...,,0.164,0.0,0.0,0.0,0.0,,,,
3,Afghanistan,1853,AFG,3800954.0,,,,,,,...,,0.164,0.0,0.0,0.0,0.0,,,,
4,Afghanistan,1854,AFG,3818038.0,,,,,,,...,,0.163,0.0,0.0,0.0,0.0,,,,


# (1) Data Cleaning

- Data cleaning and processing  is the process of preparing data for analysis by removing or correcting errors, handling missing values, This function is used to clean the data by removing any rows that have missing values in either column.

# (2) Exploratory Data Analysis

- For this I will use numpy to find missing values and duplicate entries before starting dashboard prep.

- I will also try to understand the datatypes being used in each column using pandas describe() function.

# (3) Engineering and Visulisation

- In the engineering and visualization phase of the project, the focus is on making the data interactive and creating engaging visualisations for the dashboard. This involves using programming libraries such as ipywidgets to create widgets and sliders that allow users to interact with the data.








In [None]:
# Find the datatypes
df.describe()

Unnamed: 0,year,population,gdp,cement_co2,cement_co2_per_capita,co2,co2_growth_abs,co2_growth_prct,co2_including_luc,co2_including_luc_growth_abs,...,share_global_other_co2,share_of_temperature_change_from_ghg,temperature_change_from_ch4,temperature_change_from_co2,temperature_change_from_ghg,temperature_change_from_n2o,total_ghg,total_ghg_excluding_lucf,trade_co2,trade_co2_share
count,48058.0,39495.0,14564.0,23764.0,22017.0,30308.0,28157.0,25136.0,23320.0,23030.0,...,2593.0,41724.0,37620.0,41724.0,41724.0,37620.0,6354.0,6354.0,4398.0,4397.0
mean,1926.842565,60832230.0,267758600000.0,9.1094,0.066798,391.272161,5.86831,20.055703,534.225708,7.445587,...,19.19987,2.190116,0.002956,0.006886,0.010069,0.000497,790.430981,759.384745,-7.157874,20.36801
std,59.414846,328586700.0,2103151000000.0,66.463548,0.125367,1855.824655,59.845871,687.366449,2164.644277,97.593192,...,33.432368,9.012474,0.016068,0.039698,0.057196,0.002943,3610.53425,3531.641287,269.15622,52.720717
min,1750.0,222.0,49980000.0,0.0,0.0,0.0,-2032.366,-100.0,-187.847,-2334.695,...,0.0,-0.899,-0.001,-0.0,-0.001,0.0,-186.66,0.01,-2367.758,-99.795
25%,1883.0,346437.5,7516679000.0,0.0,0.0,0.183,0.0,-0.7335,5.95075,-0.75275,...,0.249,0.003,0.0,0.0,0.0,0.0,8.45,7.0425,-3.02475,-6.287
50%,1930.0,2456362.0,25979990000.0,0.029,0.008,3.856,0.026,3.7495,27.777,0.071,...,1.431,0.071,0.0,0.0,0.0,0.0,38.285,30.83,1.4785,8.741
75%,1976.0,10080380.0,113294200000.0,0.834,0.093,47.27725,0.873,10.5935,124.37375,2.5685,...,19.405,0.329,0.001,0.001,0.001,0.0,153.595,131.5125,9.124,32.512
max,2022.0,7975105000.0,113630200000000.0,1692.404,2.574,37149.785,1813.064,102318.508,41637.617,2340.859,...,100.0,100.0,0.415,1.113,1.611,0.083,49880.602,48089.621,2187.777,576.482


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48058 entries, 0 to 48057
Data columns (total 79 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   country                                    48058 non-null  object 
 1   year                                       48058 non-null  int64  
 2   iso_code                                   39717 non-null  object 
 3   population                                 39495 non-null  float64
 4   gdp                                        14564 non-null  float64
 5   cement_co2                                 23764 non-null  float64
 6   cement_co2_per_capita                      22017 non-null  float64
 7   co2                                        30308 non-null  float64
 8   co2_growth_abs                             28157 non-null  float64
 9   co2_growth_prct                            25136 non-null  float64
 10  co2_including_luc     

In [None]:
column_entries = df['country'].unique()
print(column_entries)

['Afghanistan' 'Africa' 'Africa (GCP)' 'Albania' 'Algeria' 'Andorra'
 'Angola' 'Anguilla' 'Antarctica' 'Antigua and Barbuda' 'Argentina'
 'Armenia' 'Aruba' 'Asia' 'Asia (GCP)' 'Asia (excl. China and India)'
 'Australia' 'Austria' 'Azerbaijan' 'Bahamas' 'Bahrain' 'Bangladesh'
 'Barbados' 'Belarus' 'Belgium' 'Belize' 'Benin' 'Bermuda' 'Bhutan'
 'Bolivia' 'Bonaire Sint Eustatius and Saba' 'Bosnia and Herzegovina'
 'Botswana' 'Brazil' 'British Virgin Islands' 'Brunei' 'Bulgaria'
 'Burkina Faso' 'Burundi' 'Cambodia' 'Cameroon' 'Canada' 'Cape Verde'
 'Central African Republic' 'Central America (GCP)' 'Chad' 'Chile' 'China'
 'Christmas Island' 'Colombia' 'Comoros' 'Congo' 'Cook Islands'
 'Costa Rica' "Cote d'Ivoire" 'Croatia' 'Cuba' 'Curacao' 'Cyprus'
 'Czechia' 'Democratic Republic of Congo' 'Denmark' 'Djibouti' 'Dominica'
 'Dominican Republic' 'East Timor' 'Ecuador' 'Egypt' 'El Salvador'
 'Equatorial Guinea' 'Eritrea' 'Estonia' 'Eswatini' 'Ethiopia' 'Europe'
 'Europe (GCP)' 'Europe (excl. E

# What does this tell us?

- There are no missing values

- The majority of data types are floats

- There are continent names in the 'country' column, many of which are the same with but with minor differences or specific exclusions.

In [None]:
# Select a Series of True/False values indicating whether~
# each row's 'country' value is equal to 'World'.
df[df['country'] == 'World']

Unnamed: 0,country,year,iso_code,population,gdp,cement_co2,cement_co2_per_capita,co2,co2_growth_abs,co2_growth_prct,...,share_global_other_co2,share_of_temperature_change_from_ghg,temperature_change_from_ch4,temperature_change_from_co2,temperature_change_from_ghg,temperature_change_from_n2o,total_ghg,total_ghg_excluding_lucf,trade_co2,trade_co2_share
47266,World,1750,,7.456641e+08,,,,9.306,,,...,,,,,,,,,,
47267,World,1751,,,,,,9.407,0.101,1.088,...,,,,,,,,,,
47268,World,1752,,,,,,9.505,0.098,1.041,...,,,,,,,,,,
47269,World,1753,,,,,,9.610,0.105,1.108,...,,,,,,,,,,
47270,World,1754,,,,,,9.734,0.123,1.281,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47534,World,2018,,7.683790e+09,1.136302e+14,1565.803,0.204,36766.945,741.491,2.058,...,100.0,100.0,0.399,1.058,1.536,0.079,49585.910,48069.809,0.000,0.0
47535,World,2019,,7.764951e+09,,1615.776,0.208,37040.102,273.158,0.743,...,100.0,100.0,0.404,1.076,1.561,0.081,49880.602,48089.621,0.000,0.0
47536,World,2020,,7.840953e+09,,1633.047,0.208,35007.738,-2032.366,-5.487,...,100.0,100.0,0.410,1.094,1.585,0.082,47513.148,46120.922,0.000,0.0
47537,World,2021,,7.909295e+09,,1692.404,0.214,36816.543,1808.806,5.167,...,100.0,100.0,0.415,1.113,1.611,0.083,,,-0.004,-0.0


## Some minor data processing

- To be safe and for good working practices I will fill in missing NaN values,

- In real world scenarios or in a professional environment, this may not be necessary as this will taking more time for the code to run if we know that there are no missing values.

In [None]:
# Fill in the NaN values and create gdp per capita column
df = df.fillna(0)
df['gdp_per_capita'] = np.where(df['population'] != 0, df['gdp']/ df['population'],0)

In [None]:
# Make DataFrame pipeline interactive
idf = df.interactive()

## CO2 Emission Over Time by Continent

- Now its time to create the interactive parts of the dashboard such as:

    - The slider
    - The widgets

    Note: This will be used to create the pipeline.

In [None]:
# Define panel widgets
year_slider = pn.widgets.IntSlider(name='Year Slider', start=1750, end=2020, step=5, value=1850)
year_slider

BokehModel(combine_events=True, render_bundle={'docs_json': {'40e032cf-4884-4cee-ab30-aa7f3a50ba3f': {'version…

In [None]:
# Radio buttons for CO2 measure
yaxis_co2 = pn.widgets.RadioButtonGroup(
    name='Y axis',
    options=['co2', 'co2_per_capita',],
    button_type='success'
)

In [None]:
# continents for which CO2 data will be displayed
continents = ['World', 'Asia', 'Oceania', 'Europe', 'Africa', 'North America', 'South America', 'Antartica']

# Create a pipeline to calculate the average CO2 emissions for each continent and year
co2_pipeline = (
    idf[
        (idf.year <= year_slider) &
        (idf.country.isin(continents))
    ]
    .groupby(['country', 'year'])[yaxis_co2].mean()
    .to_frame()
    .reset_index()
    .sort_values(by='year')
    .reset_index(drop=True)
)

In [None]:
co2_pipeline

BokehModel(combine_events=True, render_bundle={'docs_json': {'63f9d306-0eb1-4d10-9542-e97c293ad344': {'version…

In [None]:
# Create the plot
co2_plot = co2_pipeline.hvplot(x = 'year', by='country', y=yaxis_co2, line_width=2, title='CO2 Emission by Continent')
co2_plot

BokehModel(combine_events=True, render_bundle={'docs_json': {'09d66250-dce1-4a1e-a0c6-784fe553e998': {'version…

## Table - CO2 Emission Over Time by Continent

- The 'co2_table' variable is a table widget that displays the CO2 emissions data for each continent and year. 

- The table is paginated, with 10 rows per page. 

- The table's width is automatically adjusted to fill the available space.

In [None]:
# Create a table widget to display the CO2 emissions data
co2_table = co2_pipeline.pipe(pn.widgets.Tabulator, pagination='remote', page_size=10, sizing_mode='stretch_width')
co2_table # Display the table widget

BokehModel(combine_events=True, render_bundle={'docs_json': {'ddedba8e-2ca3-4147-b554-5444a855e63a': {'version…

## CO2 vs gdp scatterplot

In [None]:
# Create a pipeline to calculate the average CO2 emissions
co2_vs_gdp_scatterplot_pipeline = (
    idf[
        (idf.year == year_slider) &
        (~ (idf.country.isin(continents)))
    ]
    .groupby(['country', 'year', 'gdp_per_capita'])['co2'].mean()
    .to_frame()
    .reset_index()
    .sort_values(by='year')
    .reset_index(drop=True)
)

In [None]:
co2_vs_gdp_scatterplot_pipeline

BokehModel(combine_events=True, render_bundle={'docs_json': {'cee27232-5690-44e3-a06a-e6eaffd489b3': {'version…

In [None]:
# Scatter plot creation for dashboard using hvplot
co2_vs_gdp_scatterplot = co2_vs_gdp_scatterplot_pipeline.hvplot(x='gdp_per_capita',
                                                                y='co2',
                                                                by='country',
                                                                size=80,
                                                                kind='scatter',
                                                                alpha=0.7,
                                                                legend=False,
                                                                height=500,
                                                                width=500)
co2_vs_gdp_scatterplot

BokehModel(combine_events=True, render_bundle={'docs_json': {'1a222293-08c6-4a67-9cfc-99047851b2a7': {'version…

## Bar Chart with CO2 Sources by Continent

In [None]:
# Bar chart creation 
yaxis_co2_source = pn.widgets.RadioButtonGroup(
    name='Y axis',
    options=['coal_co2', 'oil_co2', 'gas_co2'],
    button_type='success'
)

continents_excl_world = ['World', 'Asia', 'Oceania', 'Europe', 'Africa', 'North America', 'South America', 'Antartica']
co2_source_bar_pipeline = (
    idf[
        (idf.year == year_slider) &
        (idf.country.isin(continents_excl_world))
    ]
    .groupby(['year', 'country'])[yaxis_co2_source].sum()
    .to_frame()
    .reset_index()
    .sort_values(by='year')
    .reset_index(drop=True)
)

In [None]:
co2_source_bar_plot = co2_source_bar_pipeline.hvplot(kind='bar',
                                                    x='country',
                                                    y=yaxis_co2_source,
                                                    title='CO2 Source by Continent')
co2_source_bar_plot

BokehModel(combine_events=True, render_bundle={'docs_json': {'94d3e909-f525-46d5-8ae0-e5153d0bb65c': {'version…

## Creating The Dashboard

- Use of FastListTemplate from the documentation provided by panel.

In [None]:
# Layout via Template
template = pn.template.FastListTemplate(
    title='World CO2 Emission Dashboard',
    sidebar=[pn.pane.Markdown('# CO2 Emissions and Climate Change'),
            pn.pane.Markdown('### CO2 emissions are the primary driver of climate change. It iss widely recognised that to combat this issue we need to understand which contries/ continents drive this issue furthest, we can use complex data analysis to find solutions'),
            pn.pane.PNG('climate.change.png',
            sizing_mode='scale_both'),
            pn.pane.Markdown('## Settings'),
            year_slider],
    main=[pn.Row(pn.Column(yaxis_co2, co2_plot.panel(width=700), margin=(0,25)),co2_table.panel(width=500)),
        pn.Row(pn.Column(co2_vs_gdp_scatterplot.panel(width=600),margin=(0,25)),
            pn.Column(yaxis_co2_source, co2_source_bar_plot.panel(width=600)))],
    accent_base_color = '88d8b0',
    header_background = '88d8b0',
)
#template.show()
template.servable();