In [125]:
# Imports
import pandas as pd
import plotly.io as pio
import plotly.graph_objs as go
import plotly.express as px

ems = pd.read_csv('../../datasets/emissions.csv')
sus = pd.read_csv('../../datasets/sus.csv')
popu = pd.read_csv('../../datasets/popu.csv')


In [126]:
## Preprocessing

# Filter the dataset
filtered_ems = ems[((ems['Item'] == 'All sectors with LULUCF') | (ems['Item'] == 'All sectors without LULUCF')) & (ems['Element'] == 'Emissions (CO2)')]

# Select the required columns
filtered_ems = filtered_ems[['Area', 'Item', '2000', '2010', '2015', '2020']]

# Write to a new dataset
filtered_ems.to_csv('emissions.csv', index=False)


# Get unique values of 'Entity' and 'Land Area(Km2)'
area_values = sus[['Entity', 'Land Area(Km2)']]

# Write to a new dataset
area_values.to_csv('landAreas.csv', index=False)


# Get unique value of 'Country/Territory' and '2000 Population' to '2020 Population'
pop_values = popu[['Country/Territory', '2000 Population', '2010 Population', '2015 Population', '2020 Population']]

# Write to a new dataset
pop_values.to_csv('populations.csv', index=False)


In [127]:
# Load the new datasets
ems = pd.read_csv('emissions.csv')
area = pd.read_csv('landAreas.csv')
pop = pd.read_csv('populations.csv')

# Select the years to calculate the relative values for
years = ['2000', '2010', '2015', '2020']

# Merge ems with area
merge = ems.merge(area, left_on='Area', right_on='Entity', how='outer')

# Merge the result with pop
merge = merge.merge(pop, left_on='Area', right_on='Country/Territory', how='outer')

# Remove rows with any empty cells from merge
merge = merge.dropna()

# Calculate the relative values and store them in new columns
for year in years:
    merge[year+'/area'] = (merge[year] / merge['Land Area(Km2)'])
for year in years:
    merge[year+'/pop'] = (merge[year] / merge[year+' Population'])

merge = merge.drop(columns=['Entity', 'Country/Territory'])
merge = merge[merge['Area'] != 'Bermuda']


merge = merge.drop_duplicates()

# Write merge to a new CSV file
merge.to_csv('merge.csv', index=False)


In [128]:
# Plotting the graph

# Select the years to plot
years = ['2000', '2010', '2015', '2020']

# Create a new Figure
fig = go.Figure()

# Add a bar for each year
for year in years:
    fig.add_trace(go.Bar(
        x=mergeN['Area'],
        y=mergeN[year+'rel'],
        name=year
    ))

# Change the bar mode to group
fig.update_layout(barmode='group')

# Add labels and title
fig.update_layout(
    title='Values for the years 2000-2010-2015-2020',
    xaxis_title='Country',
    yaxis_title='Value'
)

# Show the plot
fig.show()

In [129]:
df = pd.read_csv('merge.csv')

# Calculate the average of the relative values for each country
merge['avg'] = merge[[year+ for year in years]].mean(axis=1)

# Sort by the average and select the top 10
top_countries = merge.sort_values('avg', ascending=False).head(10)

# Create a new Figure
fig = go.Figure()

# Add a bar for each year
for year in years:
    fig.add_trace(go.Bar(
        x=top_countries['Area'],
        y=top_countries[year+'rel'],
        name=year
    ))

# Change the bar mode to group
fig.update_layout(barmode='group')

# Add labels and title
fig.update_layout(
    title='Total CO2 emission per Country for the years 2000-2010-2015-2020 (without LULUCF) for the top 10 most emitting countries',
    xaxis_title='Country',
    yaxis_title='Value'
)

# Show the plot
fig.show()

SyntaxError: invalid syntax (471290190.py, line 4)

In [131]:
# Sort by 'total_CO' (or any other initial sort criteria) and select the top 10
merge = pd.read_csv('merge.csv')

# LULUCF filter:
# Removes all rows with 'All sectors with LULUCF' in the 'Item' column
merge = merge[merge['Item'] != 'All sectors with LULUCF']

# Calculate the average of the relative values for each country
merge['avg_total'] = merge[[year for year in years]].mean(axis=1)
merge['avg_area'] = merge[[year+'/area' for year in years]].mean(axis=1)
merge['avg_pop'] = merge[[year+'/pop' for year in years]].mean(axis=1)

# Sort by the average and select the top 10
total_top = merge.sort_values('avg_total', ascending=False).head(10)
area_top = merge.sort_values('avg_area', ascending=False).head(10)
pop_top = merge.sort_values('avg_pop', ascending=False).head(10)

# Create a new Figure
fig = go.Figure()

# Define a function to add traces for a specific category
def add_category_traces(fig, category, data):
    for year in years:
        fig.add_trace(go.Bar(
            x=data['Area'],
            y=data[category],
            name=year
        ))

# Usage examples
add_category_traces(fig, 'avg_total', total_top)
add_category_traces(fig, 'avg_area', area_top)
add_category_traces(fig, 'avg_pop', pop_top)

# Change the bar mode to group
fig.update_layout(barmode='group')

# Add labels and title
fig.update_layout(
    title='Emissions per Country for Selected Categories',
    xaxis_title='Country',
    yaxis_title='Value'
)

# Define dropdown menus for switching categories
fig.update_layout(
    updatemenus=[
        dict(
            buttons=[
                dict(label="Total CO Emissions",
                     method="update",
                     args=[{"y": [total_top['avg_total'] for _ in years]}, {"x": [total_top['Area']]}]),
                dict(label="CO2/Area",
                     method="update",
                     args=[{"y": [area_top['avg_area'] for _ in years]}, {"x": [area_top['Area']]}]),
                dict(label="CO2/Population",
                     method="update",
                     args=[{"y": [pop_top['avg_pop'] for _ in years]}, {"x": [pop_top['Area']]}])
            ],
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.1,
            xanchor="left",
            y=1.1,
            yanchor="top"
        ),
    ]
)

# Show the plot
fig.show()

print(f'total_top:{total_top}')
print(f'area_top:{area_top}')
print(f'pop_top:{pop_top}')

total_top:             Area                        Item         2000         2010  \
66          China  All sectors without LULUCF  3839649.997  9311518.108   
140         India  All sectors without LULUCF  1078600.000  1767200.000   
154         Japan  All sectors without LULUCF  1283620.000  1240120.000   
116       Germany  All sectors without LULUCF   870492.000   794375.000   
57         Canada  All sectors without LULUCF   558790.000   543755.000   
264  Saudi Arabia  All sectors without LULUCF   311500.000   562700.000   
282  South Africa  All sectors without LULUCF   467400.000   527800.000   
142     Indonesia  All sectors without LULUCF   285294.300   447129.000   
198        Mexico  All sectors without LULUCF   426036.400   501269.000   
45         Brazil  All sectors without LULUCF   347025.000   438862.000   

           2015         2020  Land Area(Km2)  2000 Population  \
66   10722793.6  11515339.48       9596960.0     1.264099e+09   
140   2394200.0   2501200.00      

In [None]:
# Calculate the average of the relative values for each country
mergeN['avg'] = mergeN[[year+'rel' for year in years]].mean(axis=1)

# Sort by the average and select the bottom 10
top_countries = mergeN.sort_values('avg', ascending=True).head(10)

# Create a new Figure
fig = go.Figure()

# Add a bar for each year
for year in years:
    fig.add_trace(go.Bar(
        x=top_countries['Area'],
        y=top_countries[year+'rel'],
        name=year
    ))

# Change the bar mode to group
fig.update_layout(barmode='group')

# Add labels and title
fig.update_layout(
    title='Total CO2 emission per Country for the years 2000-2010-2015-2020 (without LULUCF) for the top 10 least emitting countries',
    xaxis_title='Country',
    yaxis_title='Value'
)

# Show the plot
fig.show()