Madusanka Madiligama 01/04/2024

In [1]:
# standard plotly imports
import pandas as pd
import plotly as py
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import iplot, init_notebook_mode

In [2]:
# read data into df
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/Emissions%20Data.csv')
df.head()

Unnamed: 0,Year,Country,Continent,Emission
0,2008,Aruba,South America,24.750133
1,2009,Aruba,South America,24.876706
2,2010,Aruba,South America,24.182702
3,2011,Aruba,South America,23.922412
4,2008,Andorra,Europe,6.296125


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 788 entries, 0 to 787
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Year       788 non-null    int64  
 1   Country    788 non-null    object 
 2   Continent  788 non-null    object 
 3   Emission   788 non-null    float64
dtypes: float64(1), int64(1), object(2)
memory usage: 24.8+ KB


In [4]:
mean_emission_year = pd.DataFrame(df.groupby(['Year'])[['Emission']].mean()).reset_index()
mean_emission_year

Unnamed: 0,Year,Emission
0,2008,5.012454
1,2009,4.779365
2,2010,4.936749
3,2011,4.9049


In [5]:
# Plot Average Emission by Year
# Create the trace for the plot
trace1 = go.Scatter(
    x=mean_emission_year['Year'], 
    y=mean_emission_year['Emission'], 
    mode='lines+markers',
    name='Emission' 
)

# Define the layout for the plot
layout = go.Layout(
    title='Average Emission by Year', 
    xaxis=dict(
        title='Year', 
        dtick=1.0,
        showgrid=True  
    ),
    yaxis=dict(
        title='Emission',  
        showgrid=True  
    ),
    height=500, 
    width=600
)

# Create the figure with the trace and layout
fig = go.Figure(data=[trace1], layout=layout)

# Display the plot
iplot(fig)


In [6]:
mean_emission_country = pd.DataFrame(df.groupby(['Country'])['Emission'].mean()).reset_index()
mean_emission_country

Unnamed: 0,Country,Emission
0,Afghanistan,0.284058
1,Albania,1.558990
2,Algeria,3.314901
3,Andorra,6.109688
4,Angola,1.388990
...,...,...
192,Vietnam,1.716749
193,West Bank And Gaza,0.560487
194,Yemen,1.006546
195,Zambia,0.188319


In [7]:
# Plot Average Emission by Country
# Create the trace for the plot
trace = go.Scatter(
    x=mean_emission_country['Country'], 
    y=mean_emission_country['Emission'], 
    mode='lines+markers',
    marker=dict(
        color='rgba(80, 26, 80, 0.8)', 
        size=5,  
        line=dict(
            color='DarkSlateGrey',  
            width=2  
        )
    ), 
    text=mean_emission_country['Country']  
)

# Define the layout for the plot
layout = go.Layout(
    title='Average Emission by Country', 
    xaxis=dict(
        showticklabels=False,  
        title='Country'
         
    ),
    yaxis=dict(
        title='Average Emission',
        dtick=5.0,
        showgrid=True  
    ),
    height=500, 
    width=1600,
    showlegend=False  
)

# Create the figure with the trace and layout
fig = go.Figure(data=[trace], layout=layout)

iplot(fig)


In [8]:
import pycountry

# Create a list of dictionaries with country names and their ISO Alpha-3 codes
countries_data = [{'Country': country.name, 'ISO_Code': country.alpha_3} 
                  for country in pycountry.countries]

# Convert the list of dictionaries into a DataFrame
country_codes_df = pd.DataFrame(countries_data)

print(country_codes_df.head())

         Country ISO_Code
0          Aruba      ABW
1    Afghanistan      AFG
2         Angola      AGO
3       Anguilla      AIA
4  Åland Islands      ALA


In [9]:

# Filter the data for each year
emission_2008 = df[df['Year'] == 2008]
emission_2009 = df[df['Year'] == 2009]
emission_2010 = df[df['Year'] == 2010]
emission_2011 = df[df['Year'] == 2011]

# Create traces for each year
trace1 = go.Scatter(x=emission_2008['Country'], y=emission_2008['Emission'], 
                    mode='lines+markers', name='2008', text=emission_2008['Country'])
trace2 = go.Scatter(x=emission_2009['Country'], y=emission_2009['Emission'], 
                    mode='lines+markers', name='2009', text=emission_2009['Country'])
trace3 = go.Scatter(x=emission_2010['Country'], y=emission_2010['Emission'], 
                    mode='lines+markers', name='2010', text=emission_2010['Country'])
trace4 = go.Scatter(x=emission_2011['Country'], y=emission_2011['Emission'], 
                    mode='lines+markers', name='2011', text=emission_2011['Country'])

# Combine all traces
data = [trace1, trace2, trace3, trace4]

# Define the layout for the plot
layout = go.Layout(
    title='Average Emission by Country (2008-2011)', 
    xaxis=dict(
        showticklabels=False,  # Hide x-axis tick labels for clarity
        title='Country'  # Optional: add x-axis title
    ),
    yaxis=dict(
        title='Average Emission',
        dtick=5.0
    ),
    height=700, 
    width=1600,
    legend=dict(
        x=0,  # Legend x position
        y=1,  # Legend y position
        bgcolor='rgba(255, 255, 255, 0.5)'  # Legend background with some transparency
    )
)

# Create the figure with data and layout
fig = go.Figure(data=data, layout=layout)

# Display the plot
iplot(fig)


In [10]:
#Plot Average Emission by Country on a interactive map
# Aggregate the data to get the average emission for each country
average_emission = df.groupby('Country')['Emission'].mean().reset_index()

# Merge this with the average_emission DataFrame
average_emission = average_emission.merge(country_codes_df, on='Country', how='left')


# Merge the yearly data with the average_emission DataFrame
average_emission = average_emission.merge(emission_2008, on='Country', how='left', suffixes=('', '_2008'))
average_emission = average_emission.merge(emission_2009, on='Country', how='left', suffixes=('', '_2009'))
average_emission = average_emission.merge(emission_2010, on='Country', how='left', suffixes=('', '_2010'))
average_emission = average_emission.merge(emission_2011, on='Country', how='left', suffixes=('', '_2011'))

# Create the choropleth map using Plotly Express
fig = px.choropleth(
    average_emission,
    locations='ISO_Code', 
    color='Emission',
    hover_name='Country',
    hover_data={
        'Emission': ':.2f',
        'Emission_2008': ':.2f',
        'Emission_2009': ':.2f',
        'Emission_2010': ':.2f',
        'Emission_2011': ':.2f',
        'ISO_Code': False
    },
    color_continuous_scale='Portland',  # Different color scale
)

# Adjust layout and style to include an outer frame
fig.update_layout(
    title_text='Average Emission by Country',
    geo=dict(
        showframe=True,  # Show the frame border
        showcoastlines=True,
        projection_type='natural earth'  # Natural Earth projection for a round globe
    )
)

# Show the figure
fig.show()


In [13]:
import plotly.graph_objects as go
import plotly.colors as py_colors

# Select high emission countries for 2008
high_emission_countries = emission_2008[emission_2008['Emission'] > 12]['Country'].values

# Filter data for high emission countries
high_emission_data = df[df['Country'].isin(high_emission_countries)]

# Pivot the data to have countries as index and years as columns
top_countries = high_emission_data.pivot_table(values='Emission', index='Country', columns='Year')

# Define color schemes
colors2 = py_colors.qualitative.Dark2

# Create bar traces for each year
trace1 = go.Bar(
    x=top_countries.index, 
    y=top_countries[2008], 
    name='2008', 
    marker=dict(color=colors2[0], opacity=0.8, line=dict(color='rgb(0,0,0)', width=1.5))
)
trace2 = go.Bar(
    x=top_countries.index, 
    y=top_countries[2009],
    name='2009', 
    marker=dict(color=colors2[1], opacity=0.8, line=dict(color='rgb(0,0,0)', width=1.5))
)

# Combine traces
data = [trace1, trace2]

# Define layout
layout = go.Layout(
    title='Average Emission by Country in 2008 and 2009',
    yaxis=dict(title='Average Emission', dtick=5.0),
    barmode='group',
    height=700, 
    width=1000
)

# Create the figure
fig = go.Figure(data=data, layout=layout)

# Display the plot
fig.show()


In [12]:
high_emission_data = df[df['Country'].isin(high_emission_countries)]
high_emission_data

Unnamed: 0,Year,Country,Continent,Emission
0,2008,Aruba,South America,24.750133
1,2009,Aruba,South America,24.876706
2,2010,Aruba,South America,24.182702
3,2011,Aruba,South America,23.922412
20,2008,United Arab Emirates,Asia,23.033600
...,...,...,...,...
711,2011,Trinidad And Tobago,North America,37.140054
736,2008,United States,North America,18.489234
737,2009,United States,North America,17.192379
738,2010,United States,North America,17.484792


In [14]:


# Define color schemes
colors2 = py_colors.qualitative.Dark2

# Create bar traces for each year with specified width and colors
trace1 = go.Bar(
    x=top_countries.index, y=top_countries[2008], width=0.6,
    name='2008', marker=dict(color=colors2[4], opacity=0.8, line=dict(color='rgb(0,0,0)', width=1.5))
)
trace2 = go.Bar(
    x=top_countries.index, y=top_countries[2009], width=0.6,
    name='2009', marker=dict(color=colors2[5], opacity=0.8, line=dict(color='rgb(0,0,0)', width=1.5))
)
trace3 = go.Bar(
    x=top_countries.index, y=top_countries[2010], width=0.6,
    name='2010', marker=dict(color=colors2[2], opacity=0.8, line=dict(color='rgb(0,0,0)', width=1.5))
)
trace4 = go.Bar(
    x=top_countries.index, y=top_countries[2011], width=0.6,
    name='2011', marker=dict(color=colors2[3], opacity=0.8, line=dict(color='rgb(0,0,0)', width=1.5))
)

# Combine traces into a list
data = [trace1, trace2, trace3, trace4]

# Define layout with enhancements
layout = go.Layout(
    title='Average Emission by Country (2008-2011)',
    yaxis=dict(
        title='Average Emission',
        dtick=20.0
    ),
    barmode='relative',
    paper_bgcolor="rgba(255,255,255,1)",  
    plot_bgcolor='rgba(255,255,255,1)',  
    height=700, 
    width=1000,
    legend=dict(
        x=0,
        y=1,
        bgcolor='rgba(255, 255, 255, 0.5)'
    )
)

# Create the figure with the data and layout
fig = go.Figure(data=data, layout=layout)


fig.show()


In [15]:

# Calculations for fractional change and categorizing change
top_countries['Frac_changed'] = (top_countries[2011] - top_countries[2008]) / top_countries[2008]
top_countries['Change'] = (top_countries['Frac_changed'] > 0).astype(int)
top_countries['Frac_changed'] = abs(top_countries['Frac_changed'])

# Create scatter plot trace
trace = go.Scatter(
    x=top_countries.index, 
    y=top_countries[2008], 
    mode='markers',
    marker=dict(
        color=top_countries['Change'],  # Color based on increase or decrease
        size=100 * top_countries['Frac_changed'],  # Size based on fractional change
        colorscale=[(0, 'blue'), (1, 'yellow')],  # Blue for decrease, Yellow for increase
        colorbar=dict(title='Change<br>(Blue: Decrease, Yellow: Increase)'),
        showscale=True
    ),
    text=top_countries.index
)

# Define layout with annotations
layout = go.Layout(
    title='Fractional Change in Emission by Country w.r.t. 2008',
    yaxis=dict(title='Emission in 2008', dtick=10.0),
    xaxis=dict(showticklabels=False),
    height=500, 
    width=800,
    annotations=[dict(
        showarrow=False, 
        text='Size of Marker: Magnitude of Change<br>Color of Marker: Type of Change', 
        xref='paper',
        yref='paper',
        x=0.5, y=1.15, 
        align='center'
    )]
)

# Create the figure with the data and layout
fig = go.Figure(data=[trace], layout=layout)
fig.show()
