In [383]:
import pandas as pd
import altair as alt
from vega_datasets import data
import os

## Chart 1: Per capita CO₂ emissions, 2022
data: https://ourworldindata.org/co2-and-greenhouse-gas-emissions

reference: https://altair-viz.github.io/altair-tutorial/notebooks/09-Geographic-plots.html

reference: https://imagecolorpicker.com

In [385]:
def load_and_prepare_data(iso_mapping_path, emissions_data_path):
    # Load the ISO mapping file (alpha-3, numeric country-code)
    iso_mapping = pd.read_csv(iso_mapping_path)

    # Load CO₂ emissions data
    data_filtered = pd.read_csv(emissions_data_path)

    # Filter emissions data for the year 2022 and select relevant columns
    data_filtered = data_filtered[data_filtered['Year'] == 2022][['Code', 'Entity', 'Annual CO₂ emissions (per capita)']]
    data_filtered.columns = ['country_code', 'country', 'co2_per_capita']  # Rename columns for clarity

    # Merge emissions dataset with ISO mapping using alpha-3 (3-letter code)
    merged_data = pd.merge(data_filtered, iso_mapping, left_on='country_code', right_on='alpha-3')

    # Keep only necessary columns: country-code (numeric ID), country, and CO₂ per capita
    final_data = merged_data[['country-code', 'country', 'co2_per_capita']]

    return final_data

def create_co2_emissions_map(final_data):
    # Load the TopoJSON of countries (world country boundaries)
    countries = alt.topo_feature(data.world_110m.url, 'countries')

    # Create the map using the numeric country-code
    chart = alt.Chart(countries).mark_geoshape(
        stroke='grey'
    ).encode(
        color=alt.condition(
            alt.datum.co2_per_capita != None,
            alt.Color('co2_per_capita:Q', scale=alt.Scale(
                domain=[0, final_data['co2_per_capita'].max()],
                range=[
                    '#f0eae8',  # 0% in white
                    '#653117'   # 100% darkest
                ]
            ), title='CO₂ per Capita'),
            alt.value('lightgray')  # Color for countries without data
        )
    ).transform_lookup(
        lookup='id',  # Link with the numeric field in the TopoJSON
        from_=alt.LookupData(final_data, 'country-code', ['co2_per_capita', 'country']),
        default=alt.value(0)  # Use 0 for countries without data
    ).project(
        'equirectangular'  # Map projection
    ).properties(
        width=800,
        height=500,
        title='Per Capita CO₂ Emissions by Country in 2022'
    ).configure_view(
        stroke=None  # Remove border from the map
    ).configure_legend(
        orient='bottom',
        title=None,
        labelFontSize=12,
        titleFontSize=14
    )

    # Save the chart as an image
    chart.save('images/co2_emissions_map.png')
    
    return chart

#Save and Display
iso_mapping_path = 'data/all.csv'
emissions_data_path = 'data/2.co-emissions-per-capita.csv'

final_data = load_and_prepare_data(iso_mapping_path, emissions_data_path)
map_chart = create_co2_emissions_map(final_data)

map_chart.display()


Description: This map visualizes per capita carbon dioxide (CO₂) emissions across countries for the year 2022, employing a color gradient that highlights the disparities in emissions levels worldwide. By examining this data, we can identify regions with the highest emissions, which may require targeted renewable energy interventions. Understanding these patterns is essential for policymakers aiming to assess the effectiveness of existing renewable energy initiatives and to strategize future investments in clean energy solutions.

## Chart 2: CO₂ Emissions per Capita vs. GDP per Capita (2022)

data: https://ourworldindata.org/grapher/co2-emissions-vs-gdp

Reference: https://altair-viz.github.io/gallery/multifeature_scatter_plot.html

In [386]:
# Option A
def create_gdp_scatter_plot(data):
    # Create the scatter plot
    scatter_plot = alt.Chart(data).mark_circle().encode(
        x=alt.X('GDP per capita:Q', title='GDP per Capita'),
        y=alt.Y('Annual CO₂ emissions (per capita):Q', title='CO₂ Emissions per Capita'),
        color='Entity:N',  # Color by country
        size=alt.Size('Population (historical):Q', title='Population (Historical)', scale=alt.Scale(range=[20, 400]))  
    ).properties(
        title='CO₂ Emissions per Capita vs. GDP per Capita (2022)',
        width=800,
        height=500
    )

    # Add country labels
    # text_labels = scatter_plot.mark_text(
    #     align='left',
    #     baseline='middle',
    #     dx=5,  # Adjust horizontal position
    #     size=10  
    # ).encode(
    #     x='GDP per capita:Q',
    #     y='Annual CO₂ emissions (per capita):Q',
    #     text='Entity:N'  # Use country names as text
    # )

    # Combine the circle chart and the text labels
    final_chart = scatter_plot #+ text_labels

    # Save the chart as an image
    final_chart.save('images/co2_vs_gdp_scatter_plot_2022.png')

    return final_chart

# Save and display
data_path = 'data/co2-emissions-vs-gdp.csv'
data = load_and_prepare_gdp_data(data_path)
gdp_scatter_plot = create_gdp_scatter_plot(data)

# Display the plot in the notebook
gdp_scatter_plot.display()


In [379]:
#Opcion B
def create_gdp_scatter_plot(data):
    # Create the scatter plot
    scatter_plot = alt.Chart(data).mark_circle(size=100).encode(
        x=alt.X('GDP per capita:Q', title='GDP per Capita'),
        y=alt.Y('Annual CO₂ emissions (per capita):Q', title='CO₂ Emissions per Capita'),
        color='Entity:N'  # Color by country
    ).properties(
        title='CO₂ Emissions per Capita vs. GDP per Capita (2022)',
        width=800,
        height=500
    )

    # Create a separate layer for text labels
    text_labels = alt.Chart(data).mark_text(
        align='left',
        baseline='middle',
        dx=5,  # Adjust horizontal position
        size=10  # Font size
    ).encode(
        x='GDP per capita:Q',
        y='Annual CO₂ emissions (per capita):Q',
        text='Entity:N'  # Use country names as text
    )

    # Combine the scatter plot and the text labels
    final_chart = scatter_plot + text_labels

    # Save the chart as an image
    #final_chart.save('images/co2_vs_gdp_scatter_plot_2022.png')

    return final_chart

# Save and display
data_path = 'data/co2-emissions-vs-gdp.csv'
data = load_and_prepare_gdp_data(data_path)
gdp_scatter_plot = create_gdp_scatter_plot(data)

# Display the plot in the notebook
gdp_scatter_plot.display()


Description: This scatter plot illustrates the relationship between CO₂ emissions per capita and GDP per capita for various countries in 2022. Each point represents a country, with the size of the circle indicating its historical population. This visualization helps to identify trends, such as whether wealthier nations tend to have higher carbon emissions, which is crucial for understanding how economic growth impacts environmental sustainability. Insights drawn from this chart can inform strategies for promoting renewable energy in economies that heavily rely on fossil fuels, aligning economic development with environmental objectives.

## Chart3: CO₂ emissions by fuel or industry type, World

data: https://ourworldindata.org/emissions-by-fuel

Reference: https://altair-viz.github.io/gallery/line_chart_with_custom_legend.html

Reference: https://stackoverflow.com/questions/61194028/adding-labels-at-end-of-line-chart-in-altair 

Reference: https://pandas.pydata.org/pandas-docs/version/1.0.0/reference/api/pandas.DataFrame.melt.html

In [387]:
def process_co2_data(filepath):
    # Load the data from the CSV file
    data = pd.read_csv(filepath)

    # Simplify emissions data
    fuel_types = [
        'Annual CO₂ emissions from other industry',
        'Annual CO₂ emissions from flaring',
        'Annual CO₂ emissions from cement',
        'Annual CO₂ emissions from gas',
        'Annual CO₂ emissions from oil',
        'Annual CO₂ emissions from coal'
    ]

    # Convert emissions columns to numeric after removing commas
    for fuel in fuel_types:
        data[fuel] = data[fuel].astype(str).str.replace(',', '').astype(float)

    # Filter for years from 1990 onward
    data = data[data['Year'] >= 1990]

    # Aggregate emissions by Year and sum for each fuel type
    data_aggregated = data.groupby('Year')[fuel_types].sum().reset_index()

    # Melt the DataFrame to a long format for Altair
    data_long = pd.melt(data_aggregated, id_vars=['Year'], value_vars=fuel_types,
                        var_name='fuel_type', value_name='emissions')

    # Clean up the fuel_type column
    data_long['fuel_type'] = data_long['fuel_type'].str.replace('Annual CO₂ emissions from ', '')

    return data_long

def create_line_chart_with_labels(data_long):
    # Convert the 'Year' column to a string (nominal)
    data_long['Year'] = data_long['Year'].astype(str)


    # Define the brown color scheme for the fuel types
    brown_colors = ['#8B4513', '#A0522D', '#D2691E', '#CD853F', '#F4A460', '#DEB887']

    # Create the line chart with brown color scale for different fuel types
    line_chart = alt.Chart(data_long).mark_line().encode(
        x=alt.X('Year:T', title='Year', axis=alt.Axis(labelAngle=-45, grid=False)),
        y=alt.Y('emissions:Q', title='CO₂ Emissions (in billions)',
                axis=alt.Axis(format='~s', titleFontSize=14, labelFontSize=12)),
        color=alt.Color('fuel_type:N', scale=alt.Scale(
            range=['#8B4513', '#A0522D', '#CD853F', '#D2691E', '#DEB887', '#F4A460']),  
            legend=None
        )
    ).properties(
        width=800,
        height=500,
        title='Trends in CO₂ Emissions by Fuel Type'
    )

    # Create labels at the end of each line (at the max Year for each fuel type)
    labels = alt.Chart(data_long).mark_text(align='left', dx=5).encode(
        x=alt.X('Year:T', aggregate='max'),  # Position at the last year
        y=alt.Y('emissions:Q', aggregate={'argmax': 'Year'}),  # Get the emission value for the last year
        text=alt.Text('fuel_type:N'),  # Use fuel type names for labels
        color=alt.Color('fuel_type:N', scale=alt.Scale(
            range=['#8B4513', '#A0522D', '#CD853F', '#D2691E', '#DEB887', '#F4A460'])  # Match colors with lines
        )
    )

    # Combine the line chart and labels
    final_chart = (line_chart + labels).configure_axis(
        labelFontSize=12,
        titleFontSize=14
    ).configure_view(
        stroke=None  # Remove outer border
    )

    # Save the chart as an image
    final_chart.save('images/co2_fuel_trends_with_labels.png')

    return final_chart

# Save and Display
file_path = 'data/co2-by-source.csv'
data_long = process_co2_data(file_path)
line_chart_with_labels = create_line_chart_with_labels(data_long)

line_chart_with_labels.display()


Description: This line chart shows global CO₂ emissions from 1990 onward, broken down by fuel or industry type, including coal, oil, gas, cement, flaring, and other industries. Each line represents the emission trends for a specific fuel type, revealing the dominant sources of emissions over time. This visualization highlights which fuel types contribute most to global CO₂ emissions, aiding in understanding how energy policies and industry practices need to change to mitigate climate impact. Decision-makers can use this data to assess the progress of transitioning away from high-emission fuels toward cleaner energy sources.

## Chart 4: Share of final energy use that comes from renewable sources, 2020

data: https://ourworldindata.org/sdgs/affordable-clean-energy

In [389]:
import pandas as pd
import altair as alt
from vega_datasets import data
import os

def load_and_prepare_renewable_data(iso_mapping_path, renewable_data_path):
    # Load the ISO mapping file (alpha-3, numeric country-code)
    iso_mapping = pd.read_csv(iso_mapping_path)

    # Load renewable energy data
    renewable_data = pd.read_csv(renewable_data_path)

    # Rename columns for easier access
    renewable_data.columns = ['Entity', 'Code', 'Year', 'share']

    # Clean the share column and filter for the year 2020
    renewable_data['share'] = renewable_data['share'].astype(str).str.replace('%', '').astype(float)
    renewable_data = renewable_data[renewable_data['Year'] == 2020][['Code', 'Entity', 'share']]

    # Rename columns for clarity
    renewable_data.columns = ['country_code', 'country', 'renewable_share']

    # Merge renewable dataset with ISO mapping using alpha-3 (3-letter code)
    merged_data = pd.merge(renewable_data, iso_mapping, left_on='country_code', right_on='alpha-3')

    # Keep only necessary columns: country-code (numeric ID), country, and renewable share
    final_data = merged_data[['country-code', 'country', 'renewable_share']]

    return final_data

def create_renewable_energy_map(final_data):
    # Load the TopoJSON of countries
    countries = alt.topo_feature(data.world_110m.url, 'countries')

    # Create the map using the numeric country-code
    chart = alt.Chart(countries).mark_geoshape(
        stroke='grey'
    ).encode(
        color=alt.condition(
            alt.datum.renewable_share != None,
            alt.Color('renewable_share:Q', scale=alt.Scale(
                domain=[0, final_data['renewable_share'].max()],
                range=[
                    '#ffffff',  # 0% in white
                    'green'   # 100% darkest
                ]
            ), title='Renewable Energy Share'),
            alt.value('lightgray')  # Color for countries without data
        )
    ).transform_lookup(
        lookup='id',  # Link with the numeric field in the TopoJSON
        from_=alt.LookupData(final_data, 'country-code', ['renewable_share', 'country']),
        default=alt.value(0)  # Use 0 for countries without data
    ).project(
        'equirectangular'  # Map projection
    ).properties(
        width=800,
        height=500,
        title='Share of Final Energy Consumption from Renewable Sources in 2020'
    ).configure_view(
        stroke=None  # Remove border from the map
    ).configure_legend(
        orient='bottom',
        title=None,
        labelFontSize=12,
        titleFontSize=14
    )

    # Save the chart as an image
    chart.save('images/renewable_energy_map_2020.png')

    return chart  # Return the chart for display

# Save and Display
iso_mapping_path = 'data/all.csv'
renewable_data_path = 'data/share-of-final-energy-consumption-from-renewable-sources (1).csv'

final_data = load_and_prepare_renewable_data(iso_mapping_path, renewable_data_path)
chart = create_renewable_energy_map(final_data)

chart.display()


Description: This choropleth map visualizes the share of each country's final energy consumption that comes from renewable sources in 2020. Countries with higher renewable energy shares are shaded in darker green, indicating a larger reliance on renewable energy, while countries with little are displayed in light green. This global map provides an insightful overview of which nations have made the most progress in integrating renewable energy into their energy mix, essential for assessing international efforts toward sustainability and clean energy transitions.



-------------------------------------------------------------------------------------------------------------------------------

Details: 

Definition of the SDG indicator: Indicator 7.2.1 is “renewable energy share in the total final energy consumption” in the UN SDG framework.

This is measured as consumption of renewable energy – which includes solar, wind, geothermal, hydropower, bioenergy, and marine sources – as a share of final energy consumption. Final energy consumption is defined as the total energy consumption after subtracting non-energy use and energy losses.

Data for this indicator is shown in the interactive visualization.

Target: “By 2030, increase substantially the share of renewable energy in the global energy mix.”

More research: Further data and research on this topic can be found at the Our World in Data topic page on Energy.



## Chart 5 (Scatter): CO₂ Emissions per Capita vs. Share of Electricity Generation from Renewables (2022)

data: https://ourworldindata.org/grapher/co2-per-capita-vs-renewable-electricity

reference: https://altair-viz.github.io/user_guide/marks/text.html

reference: https://altair-viz.github.io/user_guide/generated/core/altair.LabelOverlap.html

In [391]:
def load_and_prepare_data(data_path):
    # Load the data from the CSV file
    data = pd.read_csv(data_path)

    # Define the list of countries to include
    common_countries = [
        'United States', 'Canada', 'Mexico',
        'Brazil', 'Argentina',
        'Germany', 'France', 'United Kingdom', 'Italy', 
        'Spain', 'Netherlands',
        'Nigeria', 'South Africa', 'Kenya',
        'China', 'India', 'Japan', 'South Korea', 'Indonesia',
        'Australia', 'New Zealand',
        'Saudi Arabia', 'United Arab Emirates', 'Iran',
        'Russia', 'Turkey', 'Thailand'
    ]

    # Filter for the year 2022 and relevant columns
    data_filtered = data[data['Year'] == 2022][['Entity', 'Code', 'Year', 
                                                 'Annual CO₂ emissions (per capita)', 
                                                 'Renewables - % electricity']]

    # Rename columns for clarity
    data_filtered.columns = ['country', 'country_code', 'year', 'co2_per_capita', 'renewable_share']

    # Drop rows with NaN values in the relevant columns
    data_filtered = data_filtered.dropna(subset=['co2_per_capita', 'renewable_share'])

    # Filter for the common countries
    data_filtered = data_filtered[data_filtered['country'].isin(common_countries)]

    return data_filtered

def create_scatter_plot(data):
    # Define a color scale for CO₂ emissions per capita (brown to green)
    color_scale = alt.Scale(
        domain=[data['co2_per_capita'].min(), data['co2_per_capita'].max()],
        range=['#8B4513', '#228B22']  # Brown to Green gradient
    )

    # Create the scatter plot with custom color scale
    scatter_plot = alt.Chart(data).mark_circle(size=100).encode(
        x=alt.X('renewable_share:Q', title='Renewable Share of Electricity (%)'),
        y=alt.Y('co2_per_capita:Q', title='CO₂ Emissions per Capita'),
        color=alt.Color('co2_per_capita:Q', scale=color_scale, legend=None),  # Custom color scale and no legend
    ).properties(
        title='CO₂ Emissions per Capita vs. Share of Electricity Generation from Renewables (2022)',
        width=1200,
        height=600
    )

    # Add country labels with dx and dy to adjust text position
    text_labels = scatter_plot.mark_text(
        align='left',
        baseline='middle',
        dx=7,  
        dy=-5  
    ).encode(
        x='renewable_share:Q',
        y='co2_per_capita:Q',
        text='country:N'
    )

    # Combine the scatter plot and the text labels
    final_chart = scatter_plot + text_labels

    # Save the chart as an image
    final_chart.save('images/co2_vs_renewables_scatter.png')

    # Display the scatter plot in the notebook
    return final_chart  

# Save and Display
data_path = 'data/co2-per-capita-vs-renewable-electricity.csv'
data = load_and_prepare_data(data_path)
scatter_plot = create_scatter_plot(data)

scatter_plot.display()


Description: This scatter plot provides a clear visualization of the relationship between CO₂ emissions per capita and the share of electricity generation from renewable sources across several key countries, including the United States. By mapping out how much of a country's energy comes from renewables (on the x-axis) and comparing it to its CO₂ emissions per capita (on the y-axis), decision-makers can assess the impact of renewable energy policies on reducing pollution levels. This visualization supports the goal of understanding how renewable energy adoption influences carbon footprints, which is critical when evaluating policy effectiveness and planning future investments.

## Chart 6(Trends,Lines): Global Renewable Energy Generation by Type (TWh)

data: https://ourworldindata.org/grapher/investment-in-renewable-energy-by-technology

In [392]:
def load_and_prepare_data(filepath):
    # Load the data
    data = pd.read_csv(filepath)
    
    # Filter relevant columns and group by year and energy type
    data_filtered = data[['Year', 'Solar generation - TWh', 'Wind generation - TWh', 
                          'Hydro generation - TWh', 
                          'Other renewables (including geothermal and biomass) electricity generation - TWh']]
    
    # Grouping all energy generation types for the world as a whole
    data_grouped = data_filtered.groupby('Year').sum().reset_index()

    # Unpivot data to have 'energy_type' and 'generation' columns
    melted_data = pd.melt(data_grouped, id_vars=['Year'], 
                          value_vars=['Solar generation - TWh', 'Wind generation - TWh', 
                                      'Hydro generation - TWh', 
                                      'Other renewables (including geothermal and biomass) electricity generation - TWh'],
                          var_name='energy_type', value_name='generation')
    
    # Replace long names with more readable ones
    melted_data['energy_type'] = melted_data['energy_type'].replace({
        'Solar generation - TWh': 'Solar',
        'Wind generation - TWh': 'Wind',
        'Hydro generation - TWh': 'Hydro',
        'Other renewables (including geothermal and biomass) electricity generation - TWh': 'Other Renewables'
    })
    
    return melted_data

def create_trend_chart(data_long):
    # Convert the 'Year' column to a string (nominal)
    data_long['Year'] = data_long['Year'].astype(str)

    # Green scheme for the energy types
    color_scheme = ['#4CAF50', '#388E3C', '#1B5E20', '#A5D6A7']  

    # Create the line chart
    line_chart = alt.Chart(data_long).mark_line().encode(
        x=alt.X('Year:T', title='Year', axis=alt.Axis(labelAngle=-45, grid=False)),  
        y=alt.Y('generation:Q', title='Generation (TWh)',
                axis=alt.Axis(format='~s', titleFontSize=14, labelFontSize=12, grid=True)),  
        color=alt.Color('energy_type:N', scale=alt.Scale(range=color_scheme), legend=None)
    ).properties(
        width=800,
        height=400,
        title='Global Renewable Energy Generation by Type (TWh)'
    )

    # Create labels at the end of each line
    labels = alt.Chart(data_long).mark_text(align='left', dx=5).encode(
        x=alt.X('Year:T', aggregate='max'),  # Position at the last year
        y=alt.Y('generation:Q', aggregate={'argmax': 'Year'}),  # Get the emission value for the last year
        text=alt.Text('energy_type:N'),
        color=alt.Color('energy_type:N', scale=alt.Scale(range=color_scheme))  # Match colors with lines
    )

    # Combine the line chart and labels
    final_chart = (line_chart + labels).configure_axis(
        labelFontSize=12,
        titleFontSize=14
    ).configure_view(
        stroke=None  # Remove outer border
    )

    # Save the chart as an image
    final_chart.save('images/global_renewable_energy_trend.png')

    return final_chart

# Save and Display
data_path = 'data/renewable-energy-gen.csv'
clean_data = load_and_prepare_data(data_path)
trend_chart = create_trend_chart(clean_data)

trend_chart.display()


Description: This line chart shows the trends of renewable energy generation worldwide, broken down by energy type (solar, wind, hydro, and other renewables). It helps illustrate the growth of these energy sources over time, a key element in understanding how the world is moving toward reducing carbon emissions.

This chart aligns with the project's goal of exploring the intersection of pollution levels and renewable energy use. By visualizing the increase in renewable energy generation, it provides valuable data to assess the effectiveness of renewable energy policies. It can also aid decision-makers in understanding the progress made globally and comparing it with pollution and carbon footprint data, allowing for a comprehensive analysis of real-world impacts.

## Chart 7 (Area): Global Renewable Energy Consumption Over Time by Energy Type

data: https://ourworldindata.org/grapher/renewable-energy-consumption

reference: https://altair-viz.github.io/user_guide/marks/area.html

In [393]:
def load_and_prepare_renewable_data(file_path):
    # Load the data
    data = pd.read_csv(file_path)
    
    # Select relevant columns for visualization
    selected_columns = [
        'Entity', 'Year', 'Other renewables (TWh, direct energy)', 
        'Solar (TWh, direct energy)', 'Biofuels (TWh, direct energy)', 
        'Wind (TWh, direct energy)', 'Hydropower (TWh, direct energy)', 
        'Traditional biomass (TWh, direct energy)'
    ]
    
    data_filtered = data[selected_columns]
    data_filtered = data_filtered[data_filtered['Year'] >= 1900]
    
    # Melt the dataframe to long format
    data_melted = data_filtered.melt(
        id_vars=['Entity', 'Year'], 
        value_vars=[
            'Other renewables (TWh, direct energy)', 'Solar (TWh, direct energy)', 
            'Biofuels (TWh, direct energy)', 'Wind (TWh, direct energy)', 
            'Hydropower (TWh, direct energy)', 'Traditional biomass (TWh, direct energy)'
        ], 
        var_name='Energy Type', 
        value_name='Consumption (TWh)'
    )
    
    # Simplify energy type names
    data_melted['Energy Type'] = data_melted['Energy Type'].replace({
        'Other renewables (TWh, direct energy)': 'Other Renewables',
        'Solar (TWh, direct energy)': 'Solar',
        'Biofuels (TWh, direct energy)': 'Biofuels',
        'Wind (TWh, direct energy)': 'Wind',
        'Hydropower (TWh, direct energy)': 'Hydropower',
        'Traditional biomass (TWh, direct energy)': 'Traditional Biomass'
    })
    
    # Remove rows with missing values
    data_melted.dropna(subset=['Consumption (TWh)'], inplace=True)
    
    return data_melted

def create_stacked_area_chart(data_melted):
    # Create the images directory if it doesn't exist
    if not os.path.exists('images'):
        os.makedirs('images')
    
    # Define the green color scheme with additional shades
    color_scheme = ['#014422', '#006727', '#248823', '#25A032', '#A4D279', '#D0E6B1']


    # Generate the area chart
    chart = alt.Chart(data_melted).mark_area().encode(
        x='Year:O',
        y='sum(Consumption (TWh)):Q',
        color=alt.Color('Energy Type:N', scale=alt.Scale(range=color_scheme)),
    ).properties(
        width=800,
        height=500,
        title='Global Renewable Energy Consumption Over Time by Energy Type'
    ).configure_axis(
        labelFontSize=12,
        titleFontSize=14
    ).configure_legend(
        titleFontSize=14,
        labelFontSize=12
    )

    # Save the chart as an image
    chart.save('images/renewable_energy_area_chart.png')
    
    return chart

# Save and Display
file_path = 'data/renewable-energy-consumption.csv'
data_melted = load_and_prepare_renewable_data(file_path)
chart = create_stacked_area_chart(data_melted)

# Display the chart
chart.display()


Description: This stacked area chart shows global renewable energy consumption over time, categorized by energy type (solar, wind, biofuels, hydropower, traditional biomass, and other renewables). The visualization highlights the growing role of renewables in energy production from 1900 onward, illustrating the contribution of each type to the overall energy mix.

This chart supports the project's focus on analyzing the relationship between pollution levels and renewable energy use. By tracking renewable energy consumption over time, decision-makers can better assess how different energy sources contribute to reducing carbon emissions and how investments in these technologies evolve in comparison to other public policies. Understanding these trends helps to evaluate the effectiveness of renewable energy policies in reducing the carbon footprint globally.

## Chart 8: Investment in Renewable Energy by Technology (2019)

data: https://ourworldindata.org/grapher/investment-in-renewable-energy-by-technology

In [394]:
def load_and_prepare_data(data_path):
    # Load the data from the CSV file
    data = pd.read_csv(data_path)

    # Filter for the year 2019 and relevant columns
    data_filtered = data[data['Year'] == 2019][['Entity', 'Code', 'Year',
                                                 'Marine energy', 
                                                 'Small hydropower', 
                                                 'Geothermal energy', 
                                                 'Biofuels', 
                                                 'Biomass and waste-to-energy', 
                                                 'Wind energy', 
                                                 'Solar energy']]
    
    # Melt the DataFrame to have a long format for technologies
    data_long = data_filtered.melt(id_vars=['Entity', 'Code', 'Year'], 
                                    value_vars=['Marine energy', 
                                                'Small hydropower', 
                                                'Geothermal energy', 
                                                'Biofuels', 
                                                'Biomass and waste-to-energy', 
                                                'Wind energy', 
                                                'Solar energy'],
                                    var_name='technology', 
                                    value_name='investment')

    # Remove any unwanted characters from investment (like commas)
    data_long['investment'] = data_long['investment'].astype(str).str.replace(',', '')

    # Convert investment to numeric and drop NaN values
    data_long['investment'] = pd.to_numeric(data_long['investment'], errors='coerce')
    data_long = data_long.dropna(subset=['investment'])  # Drop rows with NaN investment

    return data_long

def create_investment_bar_chart(data):
    # Define the green color scheme
    color_scheme = ['#014422', '#006727', '#248823', '#25A032', '#A4D279', '#D0E6B1']

    # Create the bar chart
    bar_chart = alt.Chart(data).mark_bar().encode(
        y=alt.Y('technology:N', title='Technology', sort='-x'),  # Horizontal bars sorted by investment
        x=alt.X('investment:Q', title='Investment ($)'),
        color=alt.Color('technology:N', scale=alt.Scale(domain=data['technology'].unique(), range=color_scheme)),  
    ).properties(
        title='Investment in Renewable Energy by Technology (2019)',
        width=800,
        height=400
    )

    # Save the bar chart as an image
    bar_chart.save('images/investment_renewable_energy_by_technology_2019.png')
    
    return bar_chart

# Save and Display 
data_path = 'data/investment-in-renewable-energy-by-technology.csv'  
data = load_and_prepare_data(data_path)
bar_chart = create_investment_bar_chart(data)

bar_chart.display()



Description: This bar chart illustrates investment in renewable energy technologies for the year 2019, highlighting various sources such as marine energy, small hydropower, geothermal energy, biofuels, biomass and waste-to-energy, wind energy, and solar energy. The chart provides a clear comparison of funding allocated to each technology, showcasing which areas received the most financial support.

This visualization is important to the project’s objective of exploring the intersection of pollution levels and the use of renewable energy. By analyzing investment patterns, decision-makers can better understand how financial resources are distributed among different renewable technologies, which can inform policies aimed at reducing carbon emissions. This insight helps assess the potential effectiveness of renewable energy initiatives in combating pollution and understanding their relationship to real-life environmental outcomes.

## Chart 9: Carbon Dioxide Emissions by Sector (2022)

data: https://ourworldindata.org/grapher/co-emissions-by-sector

In [396]:
# Load the data
data = pd.read_csv('data/co-emissions-by-sector.csv')

# Filter for the year 2022
data_2022 = data[data['Year'] == 2020]

# Manually create a DataFrame for each sector
sector_data = pd.DataFrame({
    'Sector': ['Buildings', 'Industry', 'Land Use Change & Forestry', 'Other Fuel Combustion', 
               'Transport', 'Manufacturing & Construction', 'Fugitive Emissions', 
               'Electricity & Heat', 'Bunker Fuels'],
    'CO2 Emissions': [
        data_2022['Carbon dioxide emissions from buildings'].sum(),
        data_2022['Carbon dioxide emissions from industry'].sum(),
        data_2022['Carbon dioxide emissions from land use change and forestry'].sum(),
        data_2022['Carbon dioxide emissions from other fuel combustion'].sum(),
        data_2022['Carbon dioxide emissions from transport'].sum(),
        data_2022['Carbon dioxide emissions from manufacturing and construction'].sum(),
        data_2022['Fugitive emissions of carbon dioxide from energy production'].sum(),
        data_2022['Carbon dioxide emissions from electricity and heat'].sum(),
        data_2022['Carbon dioxide emissions from bunker fuels'].sum()
    ]
})

# Define color scheme
color_scheme = ['#5B3A29',  
                '#8B4513',  
                '#A0522D',  
                '#CD853F',  
                '#D2691E',  
                '#DAA520',  
                '#8B0000', 
                '#C04000',  
                '#A0522D']  


# Create a bar chart using Altair
def create_bar_chart(df):
    chart = alt.Chart(df).mark_bar().encode(
        x=alt.X('Sector:N', sort='-y', title='Emission Sector'),
        y=alt.Y('CO2 Emissions:Q', title='CO2 Emissions (tons)'),
        color=alt.Color('Sector:N', scale=alt.Scale(domain=df['Sector'], range=color_scheme)),
    ).properties(
        title='Carbon Dioxide Emissions by Sector (2022)',
        width=600,
        height=400
    )
    
    # Save chart as an image
    chart.save('images/co2_emissions_by_sector_2022.png')
    
    return chart

# Save and Display
bar_chart = create_bar_chart(sector_data)
bar_chart.display()


Description: This bar chart visualizes carbon dioxide emissions by sector for the year 2022, categorizing emissions from areas such as buildings, industry, transport, and electricity and heat production. Each sector's total emissions are represented, highlighting which areas contribute most to the overall carbon footprint.

This chart is important to the project’s exploration of the relationship between pollution levels and the use of renewable energy in the United States. By identifying the sectors responsible for the highest emissions, stakeholders can target specific areas for improvement through enhanced renewable energy policies and initiatives. 

In [397]:
%%html
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Pollution and Renewable Energy Visualizations</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            margin: 20px;
            padding: 20px;
            background-color: #f9f9f9;
        }
        h1 {
            text-align: center;
            color: #333;
        }
        p {
            text-align: center;
            color: #555;
            margin-bottom: 30px;
        }
        .image-row {
            display: flex;
            justify-content: center;
            margin-bottom: 20px;
        }
        .image-row img {
            margin: 0 10px;
            max-width: 300px;
            border: 2px solid #ddd;
            border-radius: 5px;
        }
    </style>
</head>
<body>

    <h1>Visualizations of Pollution Levels and Renewable Energy Use</h1>
    <p>This collection of visualizations explores the intersection of pollution levels and renewable energy adoption in the World. Each image provides insights into the effectiveness of renewable energy policies and their relationship with carbon emissions.</p>

    <div class="image-row">
        <img src="images/co2_emissions_by_sector_2022.png" alt="Carbon Dioxide Emissions by Sector (2022)">
        <img src="images/co2_emissions_map.png" alt="CO2 Emissions Map">
        <img src="images/co2_fuel_trends_with_labels.png" alt="CO2 Fuel Trends with Labels">
    </div>

    <div class="image-row">
        <img src="images/co2_vs_gdp_scatter_plot_2022.png" alt="CO2 vs GDP Scatter Plot (2022)">
        <img src="images/co2_vs_renewables_scatter.png" alt="CO2 vs Renewables Scatter">
        <img src="images/global_renewable_energy_trend.png" alt="Global Renewable Energy Trend">
    </div>

    <div class="image-row">
        <img src="images/investment_renewable_energy_by_technology_2019.png" alt="Investment in Renewable Energy by Technology (2019)">
        <img src="images/renewable_energy_area_chart.png" alt="Renewable Energy Area Chart">
        <img src="images/renewable_energy_map_2020.png" alt="Renewable Energy Map (2020)">
    </div>

</body>
</html>

