library Import

In [None]:
# Importing required libraries
import os
import pandas as pd
import geopandas as gpd
import folium
from branca.colormap import StepColormap
from folium.plugins import MarkerCluster
from ipywidgets import interact, widgets
from IPython.display import display, IFrame, HTML
import matplotlib.pyplot as plt
import io
import base64
import plotly.graph_objects as go
from scipy.stats import gmean

Load and pre-process the dataset

In [None]:
# Load the original dataset
data = pd.read_csv('owid-co2-data.csv')

# Remove rows with missing values in the specified required columns
required_columns = ['country', 'year', 'co2', 'co2_per_capita', 'cement_co2_per_capita', 'coal_co2_per_capita', 'oil_co2_per_capita', 'gas_co2_per_capita', 'flaring_co2_per_capita']
data_cleaned = data.dropna(subset=required_columns)

# Load the country coordinates dataset
coordinates_data = pd.read_csv('country-coordinates-world.csv')

# Rename the columns of coordinates dataset to match the ones in the original dataset
coordinates_data.rename(columns={'Country': 'country', 'Latitude': 'latitude', 'Longitude': 'longitude'}, inplace=True)

# Filter the data_cleaned dataset to include only countries that exist in coordinates_data
filtered_data = data_cleaned[data_cleaned['country'].isin(coordinates_data['country'])]

# Perform the merge to add latitude and longitude columns
data_with_coordinates = pd.merge(
    filtered_data,
    coordinates_data[['country', 'latitude', 'longitude']],
    how='left',
    on='country'
)

# Filter for only the required columns
data_with_coordinates = data_with_coordinates[['country', 'year', 'co2', 'co2_per_capita', 'cement_co2_per_capita', 'coal_co2_per_capita', 'oil_co2_per_capita', 'gas_co2_per_capita', 'flaring_co2_per_capita', 'latitude', 'longitude']]

# Load the GeoJSON file
geojson_url = "https://raw.githubusercontent.com/datasets/geo-countries/master/data/countries.geojson"
geojson_data = gpd.read_file(geojson_url)

# Simplify geometries while keeping properties
geojson_simplified = geojson_data.copy()
geojson_simplified['geometry'] = geojson_simplified['geometry'].simplify(tolerance=0.05, preserve_topology=True)

# Save the simplified GeoJSON, retaining the 'ADMIN' field
geojson_simplified.to_file("simplified_countries.geojson", driver="GeoJSON")

Functions for creating maps and graphs

In [1]:
# Function to create the CO2 line graph as SVG with smaller figure size
from scipy.stats import gmean  # Import geometric mean function

def create_co2_line_graph_svg(country, selected_year):
    country_data = data_with_coordinates[data_with_coordinates['country'] == country]
    country_data = country_data.sort_values(by='year')
    
    # Filter out non-positive values for geometric mean calculation
    co2_values = country_data['co2'].replace(0, pd.NA).dropna()
    if not co2_values.empty:
        geometric_mean = gmean(co2_values)  # Calculate the geometric mean
    else:
        geometric_mean = None

    plt.figure(figsize=(6, 4))  # Smaller figure size for the line graph
    
    # Plot only the line without markers
    plt.plot(country_data['year'], country_data['co2'], linestyle='-', color='b', label='CO₂ Emissions')

    # Highlight the selected year
    if selected_year in country_data['year'].values:
        selected_data = country_data[country_data['year'] == selected_year]
        plt.axvline(x=selected_year, color='r', linestyle='--', label=f'Selected Year: {selected_year}')
        plt.scatter(selected_year, selected_data['co2'].values[0], color='r', zorder=5)

    # Add the geometric mean as a horizontal line
    if geometric_mean:
        plt.axhline(y=geometric_mean, color='g', linestyle='-.', label=f'Geometric Mean: {geometric_mean:.2f}')
        plt.text(country_data['year'].min(), geometric_mean, f'GM: {geometric_mean:.2f}', 
                 color='g', va='bottom', ha='left', fontsize=8)

    plt.title(f"CO₂ Emissions for {country} Over Time", fontsize=12, loc='center')
    plt.xlabel("Year", fontsize=10)
    plt.ylabel("CO₂ Emissions (Million Tonnes)", fontsize=10)
    plt.grid(True)
    plt.legend(fontsize=8)

    # Save the plot to an SVG in memory
    svg_buffer = io.StringIO()
    plt.savefig(svg_buffer, format='svg')
    plt.close()
    svg_data = svg_buffer.getvalue()
    return svg_data


# Function to create a Stacked Area Chart for Sectoral Contributions with smaller figure size
def create_sectoral_contribution_chart_svg(country, selected_year):
    selected_year = int(selected_year)
    # Filter data for the specific country and year
    country_data = data_with_coordinates[
        (data_with_coordinates['country'] == country) & 
        (data_with_coordinates['year'] == selected_year)
    ]
    
    # Ensure data is available
    if country_data.empty:
        print(f"No data available for {country} in {selected_year}")
        # Return an empty SVG or a placeholder to avoid breaking the process
        return "<svg width='200' height='100'><text x='10' y='40' font-size='20'>No data</text></svg>"

    # Sectors and their contributions
    sectors = ['cement_co2_per_capita', 'coal_co2_per_capita', 'oil_co2_per_capita', 'gas_co2_per_capita', 'flaring_co2_per_capita']
    sector_names = ['Cement', 'Coal', 'Oil', 'Gas', 'Flaring']
    sector_data = country_data[sectors].iloc[0]  # Get the sector values for the specific year
    
    # Define colors with more contrast and full opacity
    colors = ['#ff4d4d', '#3399ff', '#33cc33', '#ff9900', '#6600cc']  # Bright red, blue, green, orange, purple
    
    # Create a simple bar chart
    plt.figure(figsize=(6, 4))
    plt.barh(sector_names, sector_data, color=colors, edgecolor='black')
    
    # Add labels and title
    plt.xlabel("CO₂ Emissions (per capita)", fontsize=10)
    plt.ylabel("Sector", fontsize=10)
    plt.title(f"CO₂ Emissions by Sector for {country} in {selected_year}", fontsize=12, loc='center')
    plt.grid(axis='x', linestyle='--', alpha=0.7)
    plt.tight_layout()

    # Save the plot to an SVG in memory
    svg_buffer = io.StringIO()
    plt.savefig(svg_buffer, format='svg')
    plt.close()
    svg_data = svg_buffer.getvalue()
    return svg_data

# Function to generate the HTML for both graphs in a popup with reduced size
def generate_graphs(country, selected_year):
    co2_svg = create_co2_line_graph_svg(country, selected_year)
    sector_svg = create_sectoral_contribution_chart_svg(country, selected_year)

    # Base64 encode both SVGs
    encoded_co2_svg = base64.b64encode(co2_svg.encode('utf-8')).decode('utf-8')
    encoded_sector_svg = base64.b64encode(sector_svg.encode('utf-8')).decode('utf-8')

    # HTML content for the popup with reduced image size
    html_content = f'''
    <div style="display: flex; justify-content: space-between;">
        <div style="margin-right: 10px;">
            <h4>CO₂ Emissions Over Time</h4>
            <img src="data:image/svg+xml;base64,{encoded_co2_svg}" width="300" height="225">
        </div>
        <div>
            <h4>Sectoral Contributions to Emissions</h4>
            <img src="data:image/svg+xml;base64,{encoded_sector_svg}" width="300" height="225">
        </div>
    </div>
    '''
    return html_content

# Function to create a Stacked Bar Chart for Top Emitters by CO₂ Emissions and Sector Contributions using Plotly
def create_bar_chart_top_emitters(selected_year):
    # Filter data for the selected year
    year_data = data_with_coordinates[data_with_coordinates['year'] == selected_year]
    
    # Sort countries by CO2 per capita
    top_emitters = year_data[['country', 'co2_per_capita', 'cement_co2_per_capita', 'coal_co2_per_capita', 
                              'oil_co2_per_capita', 'gas_co2_per_capita', 'flaring_co2_per_capita']].sort_values(
        by='co2_per_capita', ascending=False).head(10)
    
    # Define sectors and their corresponding column names
    sectors = ['cement_co2_per_capita', 'coal_co2_per_capita', 'oil_co2_per_capita', 'gas_co2_per_capita', 'flaring_co2_per_capita']
    sector_names = ['Cement', 'Coal', 'Oil', 'Gas', 'Flaring']
    
    # Get the sector data for the top emitters
    sector_data = top_emitters[sectors].values

    # Create a Plotly stacked bar chart
    fig = go.Figure()

    # Add a bar for each sector with hover data showing the exact value
    for i, sector in enumerate(sector_names):
        fig.add_trace(go.Bar(
            y=top_emitters['country'],
            x=sector_data[:, i],
            name=sector,
            orientation='h',  # Horizontal bars
            hovertemplate=sector + ": %{x:.2f} Tonnes<br>Country: %{y}",  # Hover template to show value and country
        ))

    # Update layout for the chart
    fig.update_layout(
        barmode='stack',  # Stacked bar chart
        title=f"Top 10 Emitters by CO₂ Emissions per Capita and Sectoral Contributions in {selected_year}",
        xaxis_title="CO₂ Emissions per Capita (Tonnes)",
        yaxis_title="Country",
        template="plotly_white",  # Use a light theme
        showlegend=True
    )

    # Show the chart
    fig.show()

# Function to generate the HTML for both graphs in a popup with reduced size
def generate_graphs(country, selected_year):
    co2_svg = create_co2_line_graph_svg(country, selected_year)
    sector_svg = create_sectoral_contribution_chart_svg(country, selected_year)

    # Base64 encode both SVGs
    encoded_co2_svg = base64.b64encode(co2_svg.encode('utf-8')).decode('utf-8')
    encoded_sector_svg = base64.b64encode(sector_svg.encode('utf-8')).decode('utf-8')

    # HTML content for the popup with reduced image size
    html_content = f'''
    <div style="display: flex; justify-content: space-between;">
        <div style="margin-right: 10px;">
            <h4>CO₂ Emissions Over Time</h4>
            <img src="data:image/svg+xml;base64,{encoded_co2_svg}" width="400" height="300">
        </div>
        <div>
            <h4>Sectoral Contributions to Emissions</h4>
            <img src="data:image/svg+xml;base64,{encoded_sector_svg}" width="400" height="300">
        </div>
    </div>
    '''
    return html_content

# Function to create a choropleth map with markers and popups
def create_choropleth_map_with_markers(selected_year):
    # Filter the data for the selected year
    filtered_data = data_with_coordinates[data_with_coordinates['year'] == selected_year].copy()

    # Replace NaN values with -1 to handle "No Data"
    filtered_data['co2_per_capita'] = filtered_data['co2_per_capita'].fillna(-1)

    # Define custom bins and colors
    custom_bins = [0, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20]
    colors = ['#ffffcc', '#ffeda0', '#fed976', '#feb24c', '#fd8d3c', '#f03b20', '#bd0026', '#800026']
    no_data_color = "#d9d9d9"

    # Create the StepColormap for the color mapping
    colormap = StepColormap(
        colors=colors,
        vmin=0,
        vmax=20,
        index=custom_bins
    )

    # Create a dictionary mapping countries to their corresponding color
    country_color_map = {}
    for idx, row in filtered_data.iterrows():
        country_color_map[row['country']] = colormap(row['co2_per_capita']) if row['co2_per_capita'] >= 0 else no_data_color

    # Create the map
    m = folium.Map(location=[20, 0], zoom_start=2, tiles="StamenTonerLite")

    # Add the choropleth layer with manual color mapping
    folium.GeoJson(
        "simplified_countries.geojson",
        name="choropleth",
        style_function=lambda feature: {
            'fillColor': country_color_map.get(feature['properties']['ADMIN'], no_data_color),
            'fillOpacity': 0.7,
            'color': 'black',
            'weight': 0.5
        }
    ).add_to(m)

    # Marker Cluster for interactivity
    marker_cluster = MarkerCluster().add_to(m)

    # Add markers with dynamically generated CO2 graphs and sectoral contributions
    valid_data = filtered_data.dropna(subset=['latitude', 'longitude'])
    for _, row in valid_data.iterrows():
        country_name = row['country']

        # Generate the popup content (with both the CO₂ line graph and sectoral contributions)
        html = generate_graphs(country_name, selected_year)

        # Add marker with popup containing dynamically generated SVG
        folium.Marker(
            location=[row['latitude'], row['longitude']],
            popup=folium.Popup(html, max_width=800)
        ).add_to(marker_cluster)

    return m

# Function to generate all maps
def generate_all_maps():
    maps_folder = "maps"
    os.makedirs(maps_folder, exist_ok=True)
    
    # Get all unique years in the dataset
    unique_years = data_with_coordinates['year'].unique()
    for year in unique_years:
        map_file = os.path.join(maps_folder, f"choropleth_map_{year}.html")
        if not os.path.exists(map_file):
            print(f"Generating map for year {year}...")
            m = create_choropleth_map_with_markers(year)
            m.save(map_file)
        else:
            print(f"Map for year {year} already exists. Skipping.") 

# Uncomment the following line to generate all maps at once
# generate_all_maps()

# Function to display bar chart and map together, and save the map
def display_charts_and_map(selected_year):
    # Ensure selected_year is an integer
    selected_year = int(selected_year)
    
    # Folder for saving maps
    maps_folder = "maps"
    os.makedirs(maps_folder, exist_ok=True)
    
    # Path for the saved map file
    map_file = os.path.join(maps_folder, f"choropleth_map_{selected_year}.html")
    
    # Display Bar Chart for Top Emitters
    print(f"Top Emitters by CO₂ Emissions and Sectoral Contributions in {selected_year}")
    create_bar_chart_top_emitters(selected_year)  # This will directly display the Plotly chart
    
    # Check if map file already exists, otherwise generate it
    if not os.path.exists(map_file):
        print(f"Generating and saving the map for {selected_year}...")
        m = create_choropleth_map_with_markers(selected_year)
        m.save(map_file)
    else:
        print(f"Map for {selected_year} already exists. Loading the saved version...")
    
    # Display the saved map using IFrame
    print(f"Choropleth Map for {selected_year}")
    display(IFrame(map_file, width="100%", height="600px"))

# Create a year slider
year_slider = widgets.IntSlider(
    value=2023,
    min=data_with_coordinates['year'].min(),
    max=data_with_coordinates['year'].max(),
    step=1,
    description="Year:"
)

# Link the slider to display function
interact(display_charts_and_map, selected_year=year_slider)

interactive(children=(IntSlider(value=2023, description='Year:', max=2023, min=1860), Output()), _dom_classes=…

<function __main__.display_charts_and_map(selected_year)>