In [18]:
# Imports
import pandas as pd
import plotly.io as pio
import plotly.graph_objs as go
import plotly.express as px

ems = pd.read_csv('../../emissions.csv')
sus = pd.read_csv('../../sus.csv')


In [19]:
## Preprocessing

# Filter the dataset
filtered_ems = ems[(ems['Item'] == 'All sectors with LULUCF') & (ems['Element'] == 'Emissions (CO2)')]

# Select the required columns
filtered_ems = filtered_ems[['Area', 'Item', '2000', '2005', '2010', '2015', '2020']]

# Write to a new dataset
filtered_ems.to_csv('emissionsYesLULUCF.csv', index=False)


In [20]:
# Filter the dataset 2
filtered_ems2 = ems[(ems['Item'] == 'All sectors without LULUCF') & (ems['Element'] == 'Emissions (CO2)')]

# Select the required columns 2
filtered_ems2 = filtered_ems2[['Area', 'Item', '2000', '2005', '2010', '2015', '2020']]

# Write to a new dataset 2
filtered_ems2.to_csv('emissionsNoLULUCF.csv', index=False)

In [21]:
# Get unique values of 'Entity' and 'Land Area(Km2)'
unique_values = sus[['Entity', 'Land Area(Km2)']].drop_duplicates()

# Write to a new dataset
unique_values.to_csv('landAreas.csv', index=False)

In [22]:
# Load the new datasets
emsY = pd.read_csv('emissionsYesLULUCF.csv')
emsN = pd.read_csv('emissionsNoLULUCF.csv')
area = pd.read_csv('landAreas.csv')

# Select the years to calculate the relative values for
years = ['2000', '2005', '2010', '2015', '2020']

# Merge emsN with area
mergeN = emsN.merge(area, left_on='Area', right_on='Entity', how='outer')

# Remove rows with any empty cells from mergeN
mergeN = mergeN.dropna()

# Merge emsY with area
mergeY = emsY.merge(area, left_on='Area', right_on='Entity', how='outer')

# Remove rows with any empty cells from mergeY
mergeY = mergeY.dropna()

# Calculate the relative values and store them in new columns
for year in years:
    mergeN[year+'rel'] = (mergeN[year] / mergeN['Land Area(Km2)'])
    mergeY[year+'rel'] = (mergeY[year] / mergeY['Land Area(Km2)'])

# Write mergeN to a new CSV file
mergeN.to_csv('mergeN.csv', index=False)

# Write mergeY to a new CSV file
mergeY.to_csv('mergeY.csv', index=False)


In [23]:
# Plotting the graph

# Select the years to plot
years = ['2000', '2005', '2010', '2015', '2020']

# Create a new Figure
fig = go.Figure()

# Add a bar for each year
for year in years:
    fig.add_trace(go.Bar(
        x=mergeN['Area'],
        y=mergeN[year+'rel'],
        name=year
    ))

# Change the bar mode to group
fig.update_layout(barmode='group')

# Add labels and title
fig.update_layout(
    title='Values for the years 2000-2005-2010-2015-2020',
    xaxis_title='Country',
    yaxis_title='Value'
)

# Show the plot
fig.show()

In [24]:
# Calculate the average of the relative values for each country
mergeN['avg'] = mergeN[[year+'rel' for year in years]].mean(axis=1)

# Sort by the average and select the top 10
top_countries = mergeN.sort_values('avg', ascending=False).head(10)

# Create a new Figure
fig = go.Figure()

# Add a bar for each year
for year in years:
    fig.add_trace(go.Bar(
        x=top_countries['Area'],
        y=top_countries[year+'rel'],
        name=year
    ))

# Change the bar mode to group
fig.update_layout(barmode='group')

# Add labels and title
fig.update_layout(
    title='Total CO2 emission per Country for the years 2000-2005-2010-2015-2020 (without LULUCF) for the top 10 most emitting countries',
    xaxis_title='Country',
    yaxis_title='Value'
)

# Show the plot
fig.show()

In [25]:
# Calculate the average of the relative values for each country
mergeN['avg'] = mergeN[[year+'rel' for year in years]].mean(axis=1)

# Sort by the average and select the bottom 10
top_countries = mergeN.sort_values('avg', ascending=True).head(10)

# Create a new Figure
fig = go.Figure()

# Add a bar for each year
for year in years:
    fig.add_trace(go.Bar(
        x=top_countries['Area'],
        y=top_countries[year+'rel'],
        name=year
    ))

# Change the bar mode to group
fig.update_layout(barmode='group')

# Add labels and title
fig.update_layout(
    title='Total CO2 emission per Country for the years 2000-2005-2010-2015-2020 (without LULUCF) for the top 10 least emitting countries',
    xaxis_title='Country',
    yaxis_title='Value'
)

# Show the plot
fig.show()