library Import

In [2]:
# Importing required libraries
import os
import pandas as pd
import geopandas as gpd
import folium
from branca.colormap import StepColormap
from folium.plugins import MarkerCluster
from ipywidgets import interact, widgets
from IPython.display import display, IFrame
import matplotlib.pyplot as plt
import io
import base64
import plotly.graph_objects as go
from scipy.stats import gmean

Load and pre-process the dataset

In [4]:
# Load the original dataset
data = pd.read_csv('owid-co2-data.csv')

# Load the country coordinates dataset
coordinates_data = pd.read_csv('country-coordinates-world.csv')

# Rename the columns of coordinates dataset to match the ones in the original dataset
coordinates_data.rename(columns={'Country': 'country', 'Latitude': 'latitude', 'Longitude': 'longitude'}, inplace=True)

# Filter data to only include rows with valid ISO codes
data_cleaned = data[data['iso_code'].notna()]

# Merge the latitude and longitude from the coordinates dataset to the data_cleaned dataframe based on 'country'
data_with_coordinates = data_cleaned.merge(coordinates_data[['country', 'latitude', 'longitude']], on='country', how='left')

# Filter for only the required columns
data_with_coordinates = data_with_coordinates[['country', 'year', 'iso_code', 'co2', 'co2_per_capita', 'cement_co2_per_capita', 'coal_co2_per_capita', 'oil_co2_per_capita', 'gas_co2_per_capita', 'flaring_co2_per_capita', 'latitude', 'longitude']]

# Replace all NaN values with 0 (if any)
data_with_coordinates = data_with_coordinates.fillna(0)

# Load the GeoJSON file
geojson_url = "https://raw.githubusercontent.com/datasets/geo-countries/master/data/countries.geojson"
geojson_data = gpd.read_file(geojson_url)

# Simplify geometries while keeping properties
geojson_data['geometry'] = geojson_data.geometry.simplify(tolerance=0.05, preserve_topology=True)

geojson_data.rename(columns={'ISO_A3': 'iso_code'}, inplace=True)

# Perform the merge to add the matched ISO code geometries from geojson to the data_with_coordinates
final_geojson = pd.merge(data_with_coordinates, geojson_data[['iso_code', 'geometry']], how='left', on='iso_code')
#print(final_geojson)

# Ensure that the geometry column is a GeoSeries for GeoPandas operations
final_geojson = gpd.GeoDataFrame(final_geojson, geometry='geometry')

Functions for creating maps and graphs

In [6]:
# Function to create the CO2 line graph
def co2_line_graph(country, selected_year):
    # Filter data for the specific country
    country_data = final_geojson[final_geojson['country'] == country]
    country_data = country_data.sort_values(by='year')

    # Ensure CO₂ values are numeric and positive
    co2_values = pd.to_numeric(country_data['co2'], errors='coerce')  # Convert to numeric, set invalids to NaN
    co2_values = co2_values[co2_values > 0]  # Filter out non-positive values
    
    # Assign weights (e.g., years can be used as weights)
    weights = pd.to_numeric(country_data['year'], errors='coerce')  # Convert years to numeric weights
    weights = weights[co2_values.index]  # Align weights with filtered CO2 values

    # Calculate weighted mean if possible
    if not co2_values.empty and not weights.empty:
        try:
            weighted_mean = (weights * co2_values).sum() / weights.sum()
        except ValueError as e:
            print(f"Error calculating weighted mean for {country}: {e}")
            weighted_mean = None
    else:
        weighted_mean = None

    plt.figure(figsize=(6, 4))  # Smaller figure size for the line graph
    
    # Plot CO₂ emissions as a line graph
    plt.plot(country_data['year'], country_data['co2'], linestyle='-', color='b', label='CO₂ Emissions')

    # Highlight the selected year
    if selected_year in country_data['year'].values:
        selected_data = country_data[country_data['year'] == selected_year]
        plt.axvline(x=selected_year, color='r', linestyle='--', label=f'Selected Year: {selected_year}')
        plt.scatter(selected_year, selected_data['co2'].values[0], color='r', zorder=5)

    # Add weighted mean as a horizontal line
    if weighted_mean:
        plt.axhline(y=weighted_mean, color='g', linestyle='-.', label=f'Weighted Mean: {weighted_mean:.2f}')
        plt.text(country_data['year'].min(), weighted_mean, f'WM: {weighted_mean:.2f}', 
                 color='g', va='bottom', ha='left', fontsize=8)

    # Add title, labels, and grid
    plt.title(f"CO₂ Emissions for {country} Over Time", fontsize=12, loc='center')
    plt.xlabel("Year", fontsize=10)
    plt.ylabel("CO₂ Emissions (Million Tonnes)", fontsize=10)
    plt.grid(True)
    plt.legend(fontsize=8)

    # Save the plot to an SVG in memory
    svg_buffer = io.StringIO()
    plt.savefig(svg_buffer, format='svg')
    plt.close()
    svg_data = svg_buffer.getvalue()
    return svg_data

# Function to create a Stacked Area Chart for Sectoral Contributions
def create_sectoral_contribution_chart(country, selected_year):
    selected_year = int(selected_year)
    # Filter data for the specific country and year
    country_data = final_geojson[
        (final_geojson['country'] == country) & 
        (final_geojson['year'] == selected_year)
    ]
    
    # Ensure data is available
    if country_data.empty:
        return "<svg width='200' height='100'><text x='10' y='40' font-size='20'>No data</text></svg>"

    # Sectors and their contributions
    sectors = ['cement_co2_per_capita', 'coal_co2_per_capita', 'oil_co2_per_capita', 'gas_co2_per_capita', 'flaring_co2_per_capita']
    sector_names = ['Cement', 'Coal', 'Oil', 'Gas', 'Flaring']
    sector_data = country_data[sectors].iloc[0]  # Get the sector values for the specific year
    
    # Define colors with more contrast and full opacity
    colors = ['#ff4d4d', '#3399ff', '#33cc33', '#ff9900', '#6600cc']  # Bright red, blue, green, orange, purple
    
    # Create a simple bar chart
    plt.figure(figsize=(6, 4))
    plt.barh(sector_names, sector_data, color=colors, edgecolor='black')
    
    # Add labels and title
    plt.xlabel("CO₂ Emissions (per capita)", fontsize=10)
    plt.ylabel("Sector", fontsize=10)
    plt.title(f"CO₂ Emissions by Sector for {country} in {selected_year}", fontsize=12, loc='center')
    plt.grid(axis='x', linestyle='--', alpha=0.7)
    plt.tight_layout()

    # Save the plot to an SVG in memory
    svg_buffer = io.StringIO()
    plt.savefig(svg_buffer, format='svg')
    plt.close()
    svg_data = svg_buffer.getvalue()
    return svg_data

# Function to generate the HTML for both graphs in a popup
def generate_graphs(country, selected_year, co2_per_capita, total_co2):
    co2_svg = co2_line_graph(country, selected_year)
    sector_svg = create_sectoral_contribution_chart(country, selected_year)

    # Base64 encode both SVGs
    encoded_co2_svg = base64.b64encode(co2_svg.encode('utf-8')).decode('utf-8')
    encoded_sector_svg = base64.b64encode(sector_svg.encode('utf-8')).decode('utf-8')

    # HTML content for the popup
    html_content = f'''
    <div style="display: flex; flex-direction: column; align-items: flex-start;">
        <h4>{country} - {selected_year}</h4>
        <p><strong>CO₂ per Capita:</strong> {co2_per_capita:.2f} tons</p>
        <p><strong>Total CO₂ Emissions:</strong> {total_co2:.2f} million tonnes</p>
        <div style="display: flex; justify-content: space-between; width: 100%;">
            <div style="margin-right: 10px;">
                <h5>CO₂ Emissions Over Time</h5>
                <img src="data:image/svg+xml;base64,{encoded_co2_svg}" width="400" height="300">
            </div>
            <div>
                <h5>Sectoral Contributions to Emissions</h5>
                <img src="data:image/svg+xml;base64,{encoded_sector_svg}" width="400" height="300">
            </div>
        </div>
    </div>
    '''
    return html_content

# Function to create a Stacked Bar Chart for Top Emitters by CO₂ Emissions and Sector Contributions
def bar_chart_top_emitters(selected_year):
    # Filter data for the selected year
    year_data = final_geojson[final_geojson['year'] == selected_year].copy()
    
    # Sort countries by CO₂ emissions per capita
    top_emitters = year_data[['country', 'co2_per_capita', 'cement_co2_per_capita', 'coal_co2_per_capita', 
                              'oil_co2_per_capita', 'gas_co2_per_capita', 'flaring_co2_per_capita']].sort_values(
        by='co2_per_capita', ascending=False).head(10)
    
    # Define sectors and their corresponding column names
    sectors = ['cement_co2_per_capita', 'coal_co2_per_capita', 'oil_co2_per_capita', 'gas_co2_per_capita', 'flaring_co2_per_capita']
    sector_names = ['Cement', 'Coal', 'Oil', 'Gas', 'Flaring']
    
    # Ensure data consistency and avoid errors with empty datasets
    if top_emitters.empty:
        print(f"No data available for the year {selected_year}.")
        return
    
    # Get the sector data for the top emitters
    sector_data = top_emitters[sectors].values

    # Create a Plotly stacked bar chart
    fig = go.Figure()

    # Add a bar for each sector with hover data showing the exact value
    for i, sector in enumerate(sector_names):
        fig.add_trace(go.Bar(
            y=top_emitters['country'],
            x=sector_data[:, i],
            name=sector,
            orientation='h',  # Horizontal bars
            hovertemplate=f"{sector}: {{%x:.2f}} Tonnes<br>Country: {{%y}}",  # Hover template to show value and country
        ))

    # Update layout for the chart
    fig.update_layout(
        barmode='stack',  # Stacked bar chart
        title=f"Top 10 Emitters by CO₂ Emissions per Capita and Sectoral Contributions in {selected_year}",
        xaxis_title="CO₂ Emissions per Capita (Tonnes)",
        yaxis_title="Country",
        template="plotly_white",  # Use a light theme
        showlegend=True,
        height=400  # Ensure chart is large enough for readability
    )

    # Show the chart
    fig.show()

# Function to create a choropleth map with markers and popups
def choropleth_map_with_markers(selected_year):
    # Filter the data for the selected year
    filtered_data = final_geojson[final_geojson['year'] == selected_year].copy()

    # Replace NaN values with -1 to handle "No Data"
    filtered_data['co2_per_capita'] = filtered_data['co2_per_capita'].fillna(-1)
    
    # Define custom bins and colors
    custom_bins = [0, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20, float('inf')]
    colors = ['#ffffcc', '#ffeda0', '#fed976', '#feb24c', '#fd8d3c', '#f03b20', '#bd0026', '#800026', '#4d004b']
    no_data_color = "#d9d9d9"

    # Function to assign color based on CO₂ per capita value
    def assign_color(value):
        if value < 0:
            return no_data_color
        for i, (low, high) in enumerate(zip(custom_bins[:-1], custom_bins[1:])):
            if low <= value < high:
                return colors[i]
        return colors[-1]  # Fallback for values greater than the last bin

    # Create the map
    m = folium.Map(location=[20, 0], zoom_start=2, tiles="StamenTonerLite")

    # Add the choropleth layer with custom color mapping
    folium.GeoJson(
        filtered_data,
        name="choropleth",
        style_function=lambda feature: {
            'fillColor': assign_color(feature['properties'].get('co2_per_capita', -1)),
            'fillOpacity': 0.7,
            'color': 'black',
            'weight': 0.5
        }
    ).add_to(m)

    # Custom HTML Legend
    legend_html = """
    <div style="position: fixed; 
                bottom: 10px; left: 10px; width: 200px; height: auto; 
                background-color: white !important; color: black !important; z-index: 1000; padding: 10px; 
                border: 2px solid grey; border-radius: 5px; box-shadow: 2px 2px 2px rgba(0,0,0,0.5);">
        <h4 style="margin: 0; text-align: center;">CO₂ Emissions</h4>
        <div style="display: flex; flex-direction: column; align-items: flex-start; gap: 4px;">
    """
    
    # Render custom bins and colors explicitly
    for i in range(len(custom_bins) - 1):
        low = custom_bins[i]
        high = custom_bins[i + 1]
        color = colors[i] if i < len(colors) else colors[-1]
        
        # Handle "20+" case for the last bin
        if high == float('inf'):
            legend_html += f"""
                <div style="display: flex; align-items: center; gap: 8px;">
                    <div style="width: 20px; height: 20px; background-color: {color}; 
                                border: 1px solid black; margin-right: 8px; filter: none;"></div>
                    <span style="color: black;">{low}+ tons</span>
                </div>
            """
        else:
            legend_html += f"""
                <div style="display: flex; align-items: center; gap: 8px;">
                    <div style="width: 20px; height: 20px; background-color: {color}; 
                                border: 1px solid black; margin-right: 8px; filter: none;"></div>
                    <span style="color: black;">{low} - {high} tons</span>
                </div>
            """
    
    # Add "No Data" bin
    legend_html += f"""
        <div style="display: flex; align-items: center; gap: 8px;">
            <div style="width: 20px; height: 20px; background-color: {no_data_color}; 
                        border: 1px solid black; filter: none; margin-right: 8px;"></div>
            <span style="color: black;">No Data</span>
        </div>
    """
    
    legend_html += "</div></div>"


    # Add the legend to the map
    m.get_root().html.add_child(folium.Element(legend_html))

    # Marker Cluster for interactivity
    marker_cluster = MarkerCluster().add_to(m)

    # Add markers with dynamically generated CO₂ graphs and sectoral contributions
    valid_data = filtered_data.dropna(subset=['latitude', 'longitude'])
    for _, row in valid_data.iterrows():
        country_name = row['country']
        co2_per_capita = row['co2_per_capita']
        total_co2 = row['co2']
    
        # Generate the popup content (with both the CO₂ line graph, sectoral contributions, and details)
        html = generate_graphs(country_name, selected_year, co2_per_capita, total_co2)
    
        # Add marker with popup containing dynamically generated SVG and CO₂ details
        folium.Marker(
            location=[row['latitude'], row['longitude']],
            popup=folium.Popup(html, max_width=800)
        ).add_to(marker_cluster)

    return m

Function to pre-generate all choropleth maps

In [8]:
# Function to generate all maps
def generate_all_maps():
    maps_folder = "maps"
    os.makedirs(maps_folder, exist_ok=True)
    
    # Get all unique years in the dataset within the range 1980 to 2023
    unique_years = [year for year in final_geojson['year'].unique() if 1985 <= year <= 2023]
    for year in unique_years:
        map_file = os.path.join(maps_folder, f"choropleth_map_{year}.html")
        if not os.path.exists(map_file):
            print(f"Generating map for year {year}...")
            m = choropleth_map_with_markers(year)  # Uses the updated choropleth_map_with_markers function
            m.save(map_file)
        else:
            print(f"Map for year {year} already exists. Skipping.")

# Uncomment the following line to generate all maps at once
#generate_all_maps()

Displaying Maps incl. slider widget

In [10]:
# Function to display bar chart and map together, and save the map
def display_charts_and_map(selected_year):
    # Ensure selected_year is an integer
    selected_year = int(selected_year)
    
    # Folder for saving maps
    maps_folder = "maps"
    os.makedirs(maps_folder, exist_ok=True)
    
    # Path for the saved map file
    map_file = os.path.join(maps_folder, f"choropleth_map_{selected_year}.html")
    
    # Display Bar Chart for Top Emitters
    print(f"Top Emitters by CO₂ Emissions and Sectoral Contributions in {selected_year}")
    bar_chart_top_emitters(selected_year)  # This will directly display the updated Plotly chart
    
    # Check if map file already exists, otherwise generate it
    if not os.path.exists(map_file):
        print(f"Generating and saving the map for {selected_year}...")
        m = choropleth_map_with_markers(selected_year)  # Uses the updated choropleth_map_with_markers function
        m.save(map_file)
    else:
        print(f"Map for {selected_year} already exists. Loading the saved version...")
    
    # Display the saved map using IFrame
    print(f"Choropleth Map for {selected_year}")
    display(IFrame(map_file, width="100%", height="600px"))

# Create a year slider
year_slider = widgets.IntSlider(
    value=final_geojson['year'].max(),  # Default value set to the latest year
    min=final_geojson['year'].min(),   # Minimum year from the dataset
    max=final_geojson['year'].max(),   # Maximum year from the dataset
    step=1,
    description="Year:"
)

# Link the slider to display function
interact(display_charts_and_map, selected_year=year_slider)

interactive(children=(IntSlider(value=2023, description='Year:', max=2023, min=1750), Output()), _dom_classes=…

<function __main__.display_charts_and_map(selected_year)>