In [26]:
import pandas as pd
import altair as alt
from vega_datasets import data
import os


## Chart 1: Carbon Dioxide Emissions by Sector (2022)

data: https://ourworldindata.org/grapher/co-emissions-by-sector

reference: https://altair-viz.github.io/gallery/stacked_bar_chart_with_text.html

In [27]:
# Load and prepare the data
data = pd.read_csv('../data/co-emissions-by-sector.csv')
data_2020 = data[data['Year'] == 2020]  

# Create DataFrame for the sector data
sector_data = pd.DataFrame({
    'Sector': ['Buildings', 'Industry', 'Land Use Change & Forestry', 'Other Fuel Combustion', 
               'Transport', 'Manufacturing & Construction', 'Fugitive Emissions', 
               'Electricity & Heat', 'Bunker Fuels'],
    'CO2 Emissions': [
        data_2020['Carbon dioxide emissions from buildings'].sum(),
        data_2020['Carbon dioxide emissions from industry'].sum(),
        data_2020['Carbon dioxide emissions from land use change and forestry'].sum(),
        data_2020['Carbon dioxide emissions from other fuel combustion'].sum(),
        data_2020['Carbon dioxide emissions from transport'].sum(),
        data_2020['Carbon dioxide emissions from manufacturing and construction'].sum(),
        data_2020['Fugitive emissions of carbon dioxide from energy production'].sum(),
        data_2020['Carbon dioxide emissions from electricity and heat'].sum(),
        data_2020['Carbon dioxide emissions from bunker fuels'].sum()
    ]
})

# Convert emissions to millions of tons (Mt)
sector_data['CO2 Emissions (Mt)'] = sector_data['CO2 Emissions'] / 1e6  

# Sort the DataFrame by CO2 Emissions in descending order
sector_data = sector_data.sort_values(by='CO2 Emissions (Mt)', ascending=False)

# Create horizontal bar chart
def create_simplified_bar_chart(df):
    chart = alt.Chart(df).mark_bar(color='#776962').encode(
        y=alt.Y('Sector:N', sort='-x', title='Emission Sector'),  
        x=alt.X('CO2 Emissions (Mt):Q', title='CO₂ Emissions (millions of tons)')  
    ).properties(
        title='Carbon Dioxide Emissions by Sector (2020)',
        width=600,
        height=400
    )
    
    # Create the text labels for the totals
    text = chart.mark_text(
        align='left',
        baseline='middle',
        dx=5  
    ).encode(
        text=alt.Text('CO2 Emissions (Mt):Q', format=',.0f')  # Truncate to an integer
    )
    
    # Combine the bars and the text
    final_chart = chart + text
    
    return final_chart

#Display
bar_chart = create_simplified_bar_chart(sector_data)
bar_chart.display()


## Chart 2: Per capita CO₂ emissions, 2020
data: https://ourworldindata.org/co2-and-greenhouse-gas-emissions

reference: https://altair-viz.github.io/altair-tutorial/notebooks/09-Geographic-plots.html

reference: https://imagecolorpicker.com

reference: https://altair-viz.github.io/gallery/london_tube.html

In [28]:
import pandas as pd
import altair as alt
from vega_datasets import data
import os

def load_and_prepare_data(iso_mapping_path, emissions_data_path):
    # Load the ISO mapping file (alpha-3, numeric country-code)
    iso_mapping = pd.read_csv(iso_mapping_path)

    # Load CO₂ emissions data
    data_filtered = pd.read_csv(emissions_data_path)

    # Filter emissions data for the year 2020 and select relevant columns
    data_filtered = data_filtered[data_filtered['Year'] == 2020][['Code', 'Entity', 'Annual CO₂ emissions (per capita)']]
    data_filtered.columns = ['country_code', 'country', 'co2_per_capita']  # Rename columns for clarity

    # Merge emissions dataset with ISO mapping using alpha-3 (3-letter code)
    merged_data = pd.merge(data_filtered, iso_mapping, left_on='country_code', right_on='alpha-3')

    # Keep only necessary columns: country-code (numeric ID), country, and CO₂ per capita
    final_data = merged_data[['country-code', 'country', 'co2_per_capita']]

    return final_data

def create_co2_emissions_map(final_data):
    labeled_countries = {
        'China': (104.1954, 35.8617),
        'United States': (-95.7129, 37.0902),
        'Russia': (105.3188, 61.5240),
        'Japan': (138.2529, 36.2048),
        'Germany': (10.4515, 51.1657),
        'United Kingdom': (-3.4360, 55.3781),
        'India': (78.9629, 20.5937),
        'France': (2.2137, 46.6034),
        'Italy': (12.5674, 41.8719),
        'Canada': (-106.3468, 56.1304),
        'South Korea': (127.7669, 35.9078),
        'Brazil': (-51.9253, -14.2350)
    }

    # Convert labeled countries to a DataFrame
    labels_df = pd.DataFrame({
        'country': labeled_countries.keys(),
        'longitude': [coords[0] for coords in labeled_countries.values()],
        'latitude': [coords[1] for coords in labeled_countries.values()]
    })

    # Load the TopoJSON of countries (world country boundaries)
    countries = alt.topo_feature(data.world_110m.url, 'countries')

    # Create the map using the numeric country-code
    chart = alt.Chart(countries).mark_geoshape(
        stroke= 'grey'
    ).encode(
        color=alt.Color('co2_per_capita:Q',
                        scale=alt.Scale(
                            domain=[2.5, 5.5, 10.5, 22.5, final_data['co2_per_capita'].max()],
                            range=['#faf9f6', '#dcd8d2', '#a79d8f', '#776962', '#3d3931']  
                        ),
                        legend=alt.Legend(title='CO₂ Emissions per Capita (tons)')
                        )
    ).transform_lookup(
        lookup='id',  # Link with the numeric field in the TopoJSON
        from_=alt.LookupData(final_data, 'country-code', ['co2_per_capita', 'country']),
        default=alt.value(0)  # Use 0 for countries without data
    ).project(
        'equirectangular'  # Map projection
    ).properties(
        width=800,
        height=500,
        title='Per Capita CO₂ Emissions by Country in 2020'
    )

    # Add labels for specific countries
    labels = alt.Chart(labels_df).mark_text(
        fontSize=10,
        color='#0e0c0b',
        dx=5,
        dy=-5
    ).encode(
        longitude='longitude:Q',
        latitude='latitude:Q',
        text='country:N'
    )

    # Combine the map and labels
    final_chart = (chart + labels).configure_legend(
        orient='bottom',
        labelFontSize=14,
        titleFontSize=14
    ).configure_view(
        stroke=None  # Remove border from the map
    )

    return final_chart

# Display
iso_mapping_path = '../data/all.csv'
emissions_data_path = '../data/2.co-emissions-per-capita.csv'

final_data = load_and_prepare_data(iso_mapping_path, emissions_data_path)
map_chart = create_co2_emissions_map(final_data)

map_chart.display()


## Chart 3: CO₂ Emissions per Capita vs. GDP per Capita (2020)

data: https://ourworldindata.org/grapher/co2-emissions-vs-gdp

Reference: https://altair-viz.github.io/gallery/multifeature_scatter_plot.html

In [29]:
# Define labeled countries
labeled_countries = [
    'China',
    'United States',
    'Russia',
    'Japan',
    'Germany',
    'United Kingdom',
    'India',
    'France',
    'Italy',
    'Canada',
    'South Korea',
    'Brazil'
]

# Load and prepare data function
def load_and_prepare_gdp_data(data_path):
   
    data = pd.read_csv(data_path)

    # Filter out countries with GDP per capita greater than 65,000 and remove 'World'
    filtered_data = data[(data['GDP per capita'] <= 65000) & (data['Entity'] != 'World')]

    # Filter for the year 2020 and remove rows with NaN in GDP per capita or Annual CO₂ emissions (per capita)
    filtered_data = filtered_data[(filtered_data['Year'] == 2020) & 
                                  (filtered_data['GDP per capita'].notna()) & 
                                  (filtered_data['Annual CO₂ emissions (per capita)'].notna()) &
                                  (filtered_data['Population (historical)'].notna())]

    return filtered_data

# Create scatter plot with highlighted countries
def create_gdp_scatter_plot(data):

    # Create a base layer of transparent points for labels
    base_layer = alt.Chart(data).mark_circle(size=100, opacity=0).encode(
        x=alt.X('GDP per capita:Q', title='GDP per Capita'),
        y=alt.Y('Annual CO₂ emissions (per capita):Q', title='CO₂ Emissions per Capita')
    )
    
    # Create the main layer with points sized by population
    main_layer = alt.Chart(data).mark_circle().encode(
        x=alt.X('GDP per capita:Q'),
        y=alt.Y('Annual CO₂ emissions (per capita):Q'),
        size=alt.Size('Population (historical):Q', scale=alt.Scale(range=[10, 1000]), title='Population'),
        color=alt.condition(
            (alt.datum.Entity == 'China') | 
            (alt.datum.Entity == 'United States') | 
            (alt.datum.Entity == 'Russia') | 
            (alt.datum.Entity == 'Japan') | 
            (alt.datum.Entity == 'Germany') | 
            (alt.datum.Entity == 'United Kingdom') | 
            (alt.datum.Entity == 'India') | 
            (alt.datum.Entity == 'France') | 
            (alt.datum.Entity == 'Italy') | 
            (alt.datum.Entity == 'Canada') | 
            (alt.datum.Entity == 'South Korea') | 
            (alt.datum.Entity == 'Brazil'),  
            alt.value('#492f21'),  # Color for highlighted countries
            alt.value('lightgrey')  # Color for the rest
        ))

    # Add labels for specific countries
    text_labels = base_layer.mark_text(
        align='right',
        baseline='bottom',
        dx=50  
    ).encode(
        text=alt.condition(
            (alt.datum.Entity == 'China') | 
            (alt.datum.Entity == 'United States') | 
            (alt.datum.Entity == 'Russia') | 
            (alt.datum.Entity == 'Japan') | 
            (alt.datum.Entity == 'Germany') | 
            (alt.datum.Entity == 'United Kingdom') | 
            (alt.datum.Entity == 'India') | 
            (alt.datum.Entity == 'France') | 
            (alt.datum.Entity == 'Italy') | 
            (alt.datum.Entity == 'Canada') | 
            (alt.datum.Entity == 'South Korea') | 
            (alt.datum.Entity == 'Brazil'),  
            'Entity:N',  
            alt.value('')  # No label for other countries
        )
    )

    # Combine the layers
    final_chart = (main_layer + text_labels).properties(
        title='CO₂ Emissions per Capita vs. GDP per Capita (2020)',
        width=800,
        height=500
    )

    return final_chart

# Display
data_path = '../data/co2-emissions-vs-gdp.csv'
data = load_and_prepare_gdp_data(data_path)
gdp_scatter_plot = create_gdp_scatter_plot(data)


gdp_scatter_plot.display()


## Chart4: CO₂ emissions by fuel or industry type, World

data: https://ourworldindata.org/emissions-by-fuel

Reference: https://altair-viz.github.io/gallery/line_chart_with_custom_legend.html

Reference: https://stackoverflow.com/questions/61194028/adding-labels-at-end-of-line-chart-in-altair 

Reference: https://pandas.pydata.org/pandas-docs/version/1.0.0/reference/api/pandas.DataFrame.melt.html

In [30]:
def process_co2_data(filepath):
    # Load the data 
    data = pd.read_csv(filepath)

    # Simplify emissions data
    fuel_types = [
        'Annual CO₂ emissions from other industry',
        'Annual CO₂ emissions from flaring',
        'Annual CO₂ emissions from cement',
        'Annual CO₂ emissions from gas',
        'Annual CO₂ emissions from oil',
        'Annual CO₂ emissions from coal'
    ]

    # Convert emissions columns to numeric after removing commas
    for fuel in fuel_types:
        data[fuel] = data[fuel].astype(str).str.replace(',', '').astype(float)

    # Filter for years from 1990 onward
    data = data[data['Year'] >= 1990]

    # Aggregate emissions by Year and sum for each fuel type
    data_aggregated = data.groupby('Year')[fuel_types].sum().reset_index()

    # Melt the DataFrame to a long format for Altair
    data_long = pd.melt(data_aggregated, id_vars=['Year'], value_vars=fuel_types,
                        var_name='fuel_type', value_name='emissions')

    # Clean up the fuel_type column
    data_long['fuel_type'] = data_long['fuel_type'].str.replace('Annual CO₂ emissions from ', '')

    return data_long

def create_line_chart_with_labels(data_long):
    # Convert the 'Year' column to a string (nominal)
    data_long['Year'] = data_long['Year'].astype(str)

    # Color scheme for the fuel types
    selected_colors = ['#776962', '#a79d8f', '#cbbfa3', '#c5c3c1', '#ddc6a7', '#553C2E']

    # Create the line chart 
    line_chart = alt.Chart(data_long).mark_line().encode(
        x=alt.X('Year:T', title='Year', axis=alt.Axis(labelAngle=-45, grid=False)),
        y=alt.Y('emissions:Q', title='CO₂ Emissions (in billions)',
                axis=alt.Axis(format='~s', titleFontSize=14, labelFontSize=12)),
        color=alt.Color('fuel_type:N', scale=alt.Scale(
            range=selected_colors), 
            legend=None
        )
    ).properties(
        width=800,
        height=500,
        title='Trends in CO₂ Emissions by Fuel Type'
    )

    # Create labels at the end of each line 
    labels = alt.Chart(data_long).mark_text(align='left', dx=5).encode(
        x=alt.X('Year:T', aggregate='max'),  
        y=alt.Y('emissions:Q', aggregate={'argmax': 'Year'}),  
        text=alt.Text('fuel_type:N'),  
        color=alt.Color('fuel_type:N', scale=alt.Scale(
            range=selected_colors)  
        )
    )

    # Combine the line chart and labels
    final_chart = (line_chart + labels).configure_axis(
        labelFontSize=12,
        titleFontSize=14
    ).configure_view(
        stroke=None 
    )

    return final_chart

#Display
file_path = '../data/co2-by-source.csv'
data_long = process_co2_data(file_path)
line_chart_with_labels = create_line_chart_with_labels(data_long)

line_chart_with_labels.display()


## Chart 5: Share of final energy use that comes from renewable sources, 2020

data: https://ourworldindata.org/sdgs/affordable-clean-energy

In [31]:
import pandas as pd
import altair as alt
from vega_datasets import data
import os

# Function to load and prepare renewable energy data
def load_and_prepare_renewable_data(iso_mapping_path, renewable_data_path):
    # Load the ISO mapping file (alpha-3, numeric country-code)
    iso_mapping = pd.read_csv(iso_mapping_path)

    # Load renewable energy data
    renewable_data = pd.read_csv(renewable_data_path)

    # Rename columns for easier access
    renewable_data.columns = ['Entity', 'Code', 'Year', 'share']

    # Clean the share column and filter for the year 2020
    renewable_data['share'] = renewable_data['share'].astype(str).str.replace('%', '').astype(float)
    renewable_data = renewable_data[renewable_data['Year'] == 2020][['Code', 'Entity', 'share']]

    # Rename columns for clarity
    renewable_data.columns = ['country_code', 'country', 'renewable_share']

    # Merge renewable dataset with ISO mapping using alpha-3 (3-letter code)
    merged_data = pd.merge(renewable_data, iso_mapping, left_on='country_code', right_on='alpha-3')

    # Keep only necessary columns: country-code (numeric ID), country, and renewable share
    final_data = merged_data[['country-code', 'country', 'renewable_share']]

    return final_data

def create_renewable_energy_map(final_data):
    # Load the TopoJSON of countries
    countries = alt.topo_feature(data.world_110m.url, 'countries')

    # Define countries to label and their coordinates
    labeled_countries = {
        'China': (104.1954, 35.8617),
        'United States': (-95.7129, 37.0902),
        'Russia': (105.3188, 61.5240),
        'Japan': (138.2529, 36.2048),
        'Germany': (10.4515, 51.1657),
        'United Kingdom': (-3.4360, 55.3781),
        'India': (78.9629, 20.5937),
        'France': (2.2137, 46.6034),
        'Italy': (12.5674, 41.8719),
        'Canada': (-106.3468, 56.1304),
        'South Korea': (127.7669, 35.9078),
        'Brazil': (-51.9253, -14.2350)
    }

    # Convert labeled countries to a DataFrame
    labels_df = pd.DataFrame({
        'country': labeled_countries.keys(),
        'longitude': [coords[0] for coords in labeled_countries.values()],
        'latitude': [coords[1] for coords in labeled_countries.values()]
    })

    # Create the map using the numeric country-code
    chart = alt.Chart(countries).mark_geoshape(
        stroke='grey'
    ).encode(
        color=alt.condition(
            alt.datum.renewable_share > 0,
            alt.Color('renewable_share:Q', scale=alt.Scale(
                range=[
                    '#ffffff',
                    '#eff8f5',
                    '#dff2eb',
                    '#d0ebe1',
                    '#c0e4d7',
                    '#b0decd',
                    '#a0d7c3',
                    '#90d0b9',
                    '#81c9af',
                    '#71c3a5',
                    '#61bc9b'  # 100% in darkest green
                ]
            ), title='Renewable Energy Share (%)', legend=alt.Legend(
                gradientLength=300,  
                titleFontSize=14,
                labelFontSize=12,
                tickCount=5  
            )),
            alt.value('white')  #Distinct color for 0% renewable share
        )
    ).transform_lookup(
        lookup='id',  # Link with the numeric field in the TopoJSON
        from_=alt.LookupData(final_data, 'country-code', ['renewable_share', 'country']),
        default=alt.value(0)  # Use 0 for countries without data
    ).project(
        'equirectangular'  # Map projection
    ).properties(
        width=800,
        height=500,
        title='Share of Final Energy Consumption from Renewable Sources in 2020'
    )

    # Add labels for specific countries
    labels = alt.Chart(labels_df).mark_text(
        fontSize=10,
        color='#0e0c0b',
        dx=5,
        dy=-5
    ).encode(
        longitude='longitude:Q',
        latitude='latitude:Q',
        text='country:N'
    )

    # Combine the map and labels
    final_chart = (chart + labels).configure_legend(
        orient='bottom',
        titleFontSize=14,
        labelFontSize=12,
        tickCount=5  
    ).configure_view(
        stroke=None 
    )

    return final_chart  

#Display
iso_mapping_path = '../data/all.csv'
renewable_data_path = '../data/share-of-final-energy-consumption-from-renewable-sources (1).csv'

final_data = load_and_prepare_renewable_data(iso_mapping_path, renewable_data_path)
chart = create_renewable_energy_map(final_data)


chart.display()


## Chart 6: CO₂ Emissions per Capita vs. Share of Electricity Generation from Renewables (2022)

data: https://ourworldindata.org/grapher/co2-per-capita-vs-renewable-electricity

reference: https://altair-viz.github.io/user_guide/marks/text.html

reference: https://altair-viz.github.io/user_guide/generated/core/altair.LabelOverlap.html

In [32]:
def load_and_prepare_data(data_path):
    # Load the data 
    data = pd.read_csv(data_path)

    # Define the list of countries to include
    common_countries = [
        'China',
        'United States',
        'Russia',
        'Japan',
        'Germany',
        'United Kingdom',
        'India',
        'France',
        'Italy',
        'Canada',
        'South Korea',
        'Brazil'
    ]

    # Filter for the year 2020 and relevant columns
    data_filtered = data[data['Year'] == 2020][['Entity', 'Code', 'Year', 
                                                 'Annual CO₂ emissions (per capita)', 
                                                 'Renewables - % electricity']]

    # Rename columns for clarity
    data_filtered.columns = ['country', 'country_code', 'year', 'co2_per_capita', 'renewable_share']

    # Drop rows with NaN values in the relevant columns
    data_filtered = data_filtered.dropna(subset=['co2_per_capita', 'renewable_share'])

    # Filter for the common countries
    data_filtered = data_filtered[data_filtered['country'].isin(common_countries)]

    return data_filtered

def create_scatter_plot(data):
    # Get the min and max CO₂ emissions for the filtered data
    min_co2 = data['co2_per_capita'].min()
    max_co2 = data['co2_per_capita'].max()

    # Define a color scale for CO₂ emissions per capita (brown to green)
    color_scale = alt.Scale(
        domain=[min_co2, max_co2],
        range=['#25A032', '#A0522D']  
    )

    # Create the scatter plot 
    scatter_plot = alt.Chart(data).mark_circle(size=250, stroke='#404040', strokeWidth=2).encode(  
        x=alt.X('renewable_share:Q', title='Renewable Share of Electricity (%)'),
        y=alt.Y('co2_per_capita:Q', title='CO₂ Emissions per Capita'),
        color=alt.Color('co2_per_capita:Q', scale=color_scale, legend=None),  
    ).properties(
        title='CO₂ Emissions per Capita vs. Share of Electricity Generation from Renewables (2020)',
        width=1200,
        height=600
    )

    # Add country labels 
    text_labels = scatter_plot.mark_text(
        align='left',
        baseline='middle',
        dx=10,  
        dy=-5  
    ).encode(
        x='renewable_share:Q',
        y='co2_per_capita:Q',
        text='country:N'
    )

    # Combine the scatter plot and the text labels
    final_chart = scatter_plot + text_labels

    return final_chart  

# Display
data_path = '../data/co2-per-capita-vs-renewable-electricity.csv'
data = load_and_prepare_data(data_path)
scatter_plot = create_scatter_plot(data)

scatter_plot.display()


## Chart 7(Trends,Lines): Global Renewable Energy Generation by Type (TWh)

data: https://ourworldindata.org/grapher/modern-renewable-energy-consumption

## Chart 8 (Area): Global Renewable Energy Consumption Over Time by Energy Type

data: https://ourworldindata.org/grapher/renewable-energy-consumption

reference: https://altair-viz.github.io/user_guide/marks/area.html

## Chart 9: Investment in Renewable Energy by Technology (2019)

data: https://ourworldindata.org/grapher/investment-in-renewable-energy-by-technology

In [33]:
# Define the color scheme for energy types
color_scheme = {
    'Other Renewables': '#3e6a67',     
    'Hydropower': '#4cc3cc',            
    'Wind': '#61bc9b',                  
    'Solar': '#a6ae46'                 
}

def load_and_prepare_data(filepath):
    data = pd.read_csv(filepath)
    data['Year'] = pd.to_numeric(data['Year'], errors='coerce').astype('Int64')
    data_filtered = data[['Year', 'Solar generation - TWh', 'Wind generation - TWh', 
                          'Hydro generation - TWh', 
                          'Other renewables (including geothermal and biomass) electricity generation - TWh']]
    data_filtered = data_filtered[data_filtered['Year'] >= 1990]
    data_grouped = data_filtered.groupby('Year').sum().reset_index()

    melted_data = pd.melt(data_grouped, id_vars=['Year'], 
                          value_vars=['Solar generation - TWh', 'Wind generation - TWh', 
                                      'Hydro generation - TWh', 
                                      'Other renewables (including geothermal and biomass) electricity generation - TWh'],
                          var_name='energy_type', value_name='generation')
    
    melted_data['energy_type'] = melted_data['energy_type'].replace({
        'Solar generation - TWh': 'Solar',
        'Wind generation - TWh': 'Wind',
        'Hydro generation - TWh': 'Hydropower',
        'Other renewables (including geothermal and biomass) electricity generation - TWh': 'Other Renewables'
    })
    
    return melted_data

def create_trend_chart(data_long):
    data_long['Year'] = data_long['Year'].astype(str)
    line_chart = alt.Chart(data_long).mark_line().encode(
        x=alt.X('Year:O', title='Year', axis=alt.Axis(labelAngle=-45, values=[str(y) for y in range(1990, 2025, 5)])),  
        y=alt.Y('generation:Q', title='Generation (TWh)',
                axis=alt.Axis(format='~s', titleFontSize=14, labelFontSize=12, grid=True)),  
        color=alt.Color('energy_type:N', scale=alt.Scale(domain=list(color_scheme.keys()), range=list(color_scheme.values())), legend=None)
    ).properties(width=800, height=400, title='Global Renewable Energy Generation by Type (TWh)')
    
    labels = alt.Chart(data_long).mark_text(align='left', dx=5).encode(
        x=alt.X('Year:O', aggregate='max'),
        y=alt.Y('generation:Q', aggregate={'argmax': 'Year'}),
        text=alt.Text('energy_type:N'),
        color=alt.Color('energy_type:N', scale=alt.Scale(domain=list(color_scheme.keys()), range=list(color_scheme.values())))
    )
    final_chart = (line_chart + labels).configure_axis(labelFontSize=12, titleFontSize=14).configure_view(stroke=None)
    final_chart.save('images/global_renewable_energy_trend.png')
    return final_chart

def load_and_prepare_renewable_data(file_path):
    data = pd.read_csv(file_path)
    data_filtered = data[['Entity', 'Year', 'Other renewables (TWh, direct energy)', 
                          'Solar (TWh, direct energy)', 'Biofuels (TWh, direct energy)', 
                          'Wind (TWh, direct energy)', 'Hydropower (TWh, direct energy)', 
                          'Traditional biomass (TWh, direct energy)']]
    
    data['Year'] = pd.to_numeric(data['Year'], errors='coerce').astype('Int64')
    data_filtered = data_filtered[data_filtered['Year'] >= 1990]
    
    # Melt the DataFrame and merge Geothermal and Biofuels into Other Renewables
    data_melted = data_filtered.melt(id_vars=['Entity', 'Year'], 
                                     value_vars=['Other renewables (TWh, direct energy)', 
                                                 'Solar (TWh, direct energy)', 
                                                 'Biofuels (TWh, direct energy)', 
                                                 'Wind (TWh, direct energy)', 
                                                 'Hydropower (TWh, direct energy)', 
                                                 'Traditional biomass (TWh, direct energy)'],
                                     var_name='Energy Type', value_name='Consumption (TWh)')
    
    data_melted['Energy Type'] = data_melted['Energy Type'].replace({
        'Other renewables (TWh, direct energy)': 'Other Renewables',
        'Solar (TWh, direct energy)': 'Solar',
        'Biofuels (TWh, direct energy)': 'Other Renewables',  # Merge Biofuels into Other Renewables
        'Wind (TWh, direct energy)': 'Wind',
        'Hydropower (TWh, direct energy)': 'Hydropower',
        'Traditional biomass (TWh, direct energy)': None  # Exclude Traditional Biomass
    })
    
    # Remove rows with None in 'Energy Type'
    data_melted = data_melted[data_melted['Energy Type'].notna()]
    
    data_melted.dropna(subset=['Consumption (TWh)'], inplace=True)
    return data_melted

def create_stacked_area_chart(data_melted):
    chart = alt.Chart(data_melted).mark_area().encode(
        x=alt.X('Year:O', title='Year', axis=alt.Axis(labelAngle=-45, values=[str(y) for y in range(1990, 2025, 5)])),
        y='sum(Consumption (TWh)):Q',
        color=alt.Color('Energy Type:N', scale=alt.Scale(domain=list(color_scheme.keys()), range=list(color_scheme.values()))),
    ).properties(width=800, height=500, title='Global Renewable Energy Consumption Over Time by Energy Type')
    
    chart.save('images/renewable_energy_area_chart.png')
    return chart

def load_and_prepare_investment_data(data_path):
    data = pd.read_csv(data_path)
    
    # Filter for the year 2019 and relevant columns
    data_filtered = data[data['Year'] == 2019][['Entity', 'Code', 'Year',
                                                 'Marine energy', 
                                                 'Small hydropower', 
                                                 'Geothermal energy', 
                                                 'Biofuels', 
                                                 'Biomass and waste-to-energy', 
                                                 'Wind energy', 
                                                 'Solar energy']]
    
    # Melt the DataFrame to long format and standardize technology names
    data_long = data_filtered.melt(id_vars=['Entity', 'Code', 'Year'], 
                                    value_vars=['Marine energy', 'Small hydropower', 
                                                'Geothermal energy', 'Biofuels', 
                                                'Biomass and waste-to-energy', 
                                                'Wind energy', 'Solar energy'],
                                    var_name='technology', value_name='investment')

    # Standardize the names of each energy type
    data_long['technology'] = data_long['technology'].replace({
        'Marine energy': 'Other Renewables',
        'Small hydropower': 'Hydropower',
        'Geothermal energy': 'Other Renewables',  # Merge Geothermal into Other Renewables
        'Biofuels': 'Other Renewables',  # Merge Biofuels into Other Renewables
        'Biomass and waste-to-energy': 'Other Renewables',
        'Wind energy': 'Wind',
        'Solar energy': 'Solar'
    })

    # Ensure 'investment' values are strings to handle commas, then convert to numeric
    data_long['investment'] = data_long['investment'].astype(str).str.replace(',', '')
    data_long['investment'] = pd.to_numeric(data_long['investment'], errors='coerce')
    
    # Drop rows with NaN investment values
    data_long = data_long.dropna(subset=['investment'])

    return data_long

def create_investment_bar_chart(data):
    # Create the bar chart with consistent colors and labels
    bar_chart = alt.Chart(data).mark_bar().encode(
        y=alt.Y('technology:N', title='Technology', sort='-x'),  
        x=alt.X('investment:Q', title='Investment ($)', axis=alt.Axis(format='~s')),  #Format for large numbers
        color=alt.Color('technology:N', scale=alt.Scale(domain=list(color_scheme.keys()), range=list(color_scheme.values())), legend=None),  
    ).properties(
        title='Investment in Renewable Energy by Technology (2019)',
        width=800,
        height=400
    )

    # Create the text labels 
    text = bar_chart.mark_text(
        align='left',
        baseline='middle',
        dx=5  
    ).encode(
        text=alt.Text('investment:Q', format=',.0f')  # Truncate
    )
    
    # Combine the bars and the text labels
    final_chart = bar_chart + text
    
    return final_chart

# Load, prepare, and display the charts
#Chart7
data_path = '../data/renewable-energy-gen.csv'
clean_data = load_and_prepare_data(data_path)
trend_chart = create_trend_chart(clean_data)
trend_chart.display()

#Chart8
data_path_2 = '../data/renewable-energy-consumption.csv'
data_melted = load_and_prepare_renewable_data(data_path_2)
area_chart = create_stacked_area_chart(data_melted)
area_chart.display()

#Chart9
data_path_investment = '../data/investment-in-renewable-energy-by-technology.csv'  
investment_data = load_and_prepare_investment_data(data_path_investment)
investment_bar_chart = create_investment_bar_chart(investment_data)
investment_bar_chart.display()
