In [9]:
import pandas as pd
import numpy as np
import math
import cufflinks as cf
import plotly
import plotly.express as px
import plotly.graph_objects as go
%matplotlib inline
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot 
init_notebook_mode(connected=True)
cf.go_offline()

In [11]:
df_co = pd.read_csv('Datasets/co2_emission.csv', parse_dates = ['Year'])
df_co.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20853 entries, 0 to 20852
Data columns (total 4 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   Entity                          20853 non-null  object        
 1   Code                            18646 non-null  object        
 2   Year                            20853 non-null  datetime64[ns]
 3   Annual CO₂ emissions (tonnes )  20853 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 651.8+ KB


In [12]:
df_co_na = df_co[df_co['Code'].isnull()]
df_co_na['Entity'].unique()

array(['Africa', 'Americas (other)', 'Antarctic Fisheries',
       'Asia and Pacific (other)', 'EU-28', 'Europe (other)',
       'International transport', 'Kyrgysztan', 'Middle East',
       'Statistical differences', 'Wallis and Futuna Islands'],
      dtype=object)

In [13]:
df_kgz = df_co[df_co["Entity"].isin(["Kyrgysztan"])]
df_kgz.fillna('KGZ', inplace = True)
df_wlf = df_co[df_co["Entity"].isin(["Wallis and Futuna Islands"])]
df_wlf.fillna('WLF', inplace = True)

In [14]:
df_co.dropna(inplace=True)
df_co.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18646 entries, 0 to 20852
Data columns (total 4 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   Entity                          18646 non-null  object        
 1   Code                            18646 non-null  object        
 2   Year                            18646 non-null  datetime64[ns]
 3   Annual CO₂ emissions (tonnes )  18646 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 728.4+ KB


In [15]:
df=pd.concat([df_co, df_kgz, df_wlf])
df.sort_values(['Entity', 'Year']) 
df.reset_index(level=0, inplace=True)
del df["index"]
df.head() 

Unnamed: 0,Entity,Code,Year,Annual CO₂ emissions (tonnes )
0,Afghanistan,AFG,1949-01-01,14656.0
1,Afghanistan,AFG,1950-01-01,84272.0
2,Afghanistan,AFG,1951-01-01,91600.0
3,Afghanistan,AFG,1952-01-01,91600.0
4,Afghanistan,AFG,1953-01-01,106256.0


In [19]:
df_cc = pd.read_csv('Datasets/continents2.csv', usecols=["alpha-3", "region", "sub-region"])
df_cc.rename(columns={'alpha-3':'Code', 'region':'Region', 'sub-region':'Sub-Region'}, inplace=True)
df_cc.tail()

Unnamed: 0,Code,Region,Sub-Region
244,WLF,Oceania,Polynesia
245,ESH,Africa,Northern Africa
246,YEM,Asia,Western Asia
247,ZMB,Africa,Sub-Saharan Africa
248,ZWE,Africa,Sub-Saharan Africa


In [20]:
df_combination = pd.merge(df, df_cc, how='left', on = 'Code')
df_combination.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18696 entries, 0 to 18695
Data columns (total 6 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   Entity                          18696 non-null  object        
 1   Code                            18696 non-null  object        
 2   Year                            18696 non-null  datetime64[ns]
 3   Annual CO₂ emissions (tonnes )  18696 non-null  float64       
 4   Region                          18297 non-null  object        
 5   Sub-Region                      18297 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(4)
memory usage: 1022.4+ KB


In [21]:
df_combination_control = df_combination[df_combination['Region'].isnull()]
df_combination_control['Entity'].unique()

array(['Czechoslovakia', 'World'], dtype=object)

In [22]:
df_combination = df_combination[df_combination['Entity']!='World']
df_combination_control=df_combination[df_combination['Region'].isnull()]
df_combination_control['Entity'].unique()

array(['Czechoslovakia'], dtype=object)

In [23]:
df_combination['Region'].fillna('Europe', inplace=True)
df_combination['Sub-Region'].fillna('Eastern Europe', inplace = True)
df_combination.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18429 entries, 0 to 18695
Data columns (total 6 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   Entity                          18429 non-null  object        
 1   Code                            18429 non-null  object        
 2   Year                            18429 non-null  datetime64[ns]
 3   Annual CO₂ emissions (tonnes )  18429 non-null  float64       
 4   Region                          18429 non-null  object        
 5   Sub-Region                      18429 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(4)
memory usage: 1007.8+ KB


In [24]:
df_combination.head() 


Unnamed: 0,Entity,Code,Year,Annual CO₂ emissions (tonnes ),Region,Sub-Region
0,Afghanistan,AFG,1949-01-01,14656.0,Asia,Southern Asia
1,Afghanistan,AFG,1950-01-01,84272.0,Asia,Southern Asia
2,Afghanistan,AFG,1951-01-01,91600.0,Asia,Southern Asia
3,Afghanistan,AFG,1952-01-01,91600.0,Asia,Southern Asia
4,Afghanistan,AFG,1953-01-01,106256.0,Asia,Southern Asia


In [25]:
total_reg = df_combination.groupby(["Region", "Year"])["Annual CO₂ emissions (tonnes )"].sum()
df_reg = pd.DataFrame(total_reg)
df_reg.reset_index(level=0, inplace=True)
df_reg.reset_index(level=0, inplace=True)
df_reg.head()

Unnamed: 0,Year,Region,Annual CO₂ emissions (tonnes )
0,1884-01-01,Africa,21984.0
1,1885-01-01,Africa,36640.0
2,1886-01-01,Africa,47632.0
3,1887-01-01,Africa,47632.0
4,1888-01-01,Africa,80608.0


In [26]:
fig = px.area(df_reg,
              x="Year",
              y="Annual CO₂ emissions (tonnes )",
              color="Region",
              facet_col="Region",
              facet_col_wrap=5,
              labels={'Entity':'Country','Annual CO₂ emissions (tonnes )':'CO₂ Emission'},
              height=350)
fig.update_layout(title="Change in CO₂ Emission Between Years 1750 and 2020 - Regions",
                  title_x=0.50)
fig.update_layout(showlegend = False)
fig.show()

In [27]:
total_year = df_combination.groupby("Year")["Annual CO₂ emissions (tonnes )"].sum()
df_total_year = pd.DataFrame(total_year)
df_total_year.reset_index(level=0, inplace=True)
df_total_year.head()

Unnamed: 0,Year,Annual CO₂ emissions (tonnes )
0,1751-01-01,9350528.0
1,1752-01-01,9354192.0
2,1753-01-01,9354192.0
3,1754-01-01,9357856.0
4,1755-01-01,9361520.0


In [28]:
fig = px.area(df_total_year,
              x="Year",
              y="Annual CO₂ emissions (tonnes )",
              hover_name = 'Year',
              hover_data=['Year','Annual CO₂ emissions (tonnes )'],
              #color='Entity',
              labels={'Year':'Year','Annual CO₂ emissions (tonnes )':'CO₂ Emission'},
              height=600)
fig.update_layout(title="Change in CO₂ Emission Between Years 1750 and 2017",
                  title_x=0.50)
fig.update_layout(showlegend = False)
fig.update(layout_coloraxis_showscale = True)
fig.show()


In [29]:
final = df_combination.groupby(['Region', 'Entity'])["Annual CO₂ emissions (tonnes )"].sum()
df_final = pd.DataFrame(final)
df_final = df_final.sort_values('Annual CO₂ emissions (tonnes )', ascending = False)
df_final.reset_index(level=0, inplace=True)
df_final.reset_index(level=0, inplace=True)
df_final.head(7)

Unnamed: 0,Entity,Region,Annual CO₂ emissions (tonnes )
0,United States,Americas,399378300000.0
1,China,Asia,200136500000.0
2,Russia,Europe,100589100000.0
3,Germany,Europe,90565630000.0
4,United Kingdom,Europe,77071060000.0
5,Japan,Asia,62304610000.0
6,India,Asia,48557860000.0


In [32]:
df_population = pd.read_csv('Datasets\population_by_country_2020.csv', usecols=["Country (or dependency)", "Population (2020)", "Density (P/Km²)","Land Area (Km²)"])
df_population.rename(columns={'Country (or dependency)': 'Entity'}, inplace=True)
df_population.head()

Unnamed: 0,Entity,Population (2020),Density (P/Km²),Land Area (Km²)
0,China,1440297825,153,9388211
1,India,1382345085,464,2973190
2,United States,331341050,36,9147420
3,Indonesia,274021604,151,1811570
4,Pakistan,221612785,287,770880


In [33]:
df_bubble =pd.merge(df_final, df_population, how='left', on='Entity')
df_bubble.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 223 entries, 0 to 222
Data columns (total 6 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Entity                          223 non-null    object 
 1   Region                          223 non-null    object 
 2   Annual CO₂ emissions (tonnes )  223 non-null    float64
 3   Population (2020)               200 non-null    float64
 4   Density (P/Km²)                 200 non-null    float64
 5   Land Area (Km²)                 200 non-null    float64
dtypes: float64(4), object(2)
memory usage: 12.2+ KB


In [34]:
df_bubble.columns

Index(['Entity', 'Region', 'Annual CO₂ emissions (tonnes )',
       'Population (2020)', 'Density (P/Km²)', 'Land Area (Km²)'],
      dtype='object')

In [35]:
x=type(df_bubble['Land Area (Km²)'])
print (x)

<class 'pandas.core.series.Series'>


In [36]:
kx=df_bubble['Land Area (Km²)'].tolist()
print(type(kx))

kx=df_bubble.dropna

<class 'list'>


In [37]:
df_bubble['Log Scale'] = df_bubble['Density (P/Km²)'].apply(lambda x : math.log2(x+1))
bubble = px.scatter(df_bubble, 
                 x="Annual CO₂ emissions (tonnes )", 
                 y="Population (2020)", 
                 color="Log Scale",
                 #size='Land Area (Km²)',
                 color_continuous_scale="Temps",
                 size_max=40,
                 hover_name="Entity", 
                 hover_data=['Entity','Annual CO₂ emissions (tonnes )'],
                 labels={'Entity':'Country',
                         'Annual CO₂ emissions (tonnes )':'CO₂ Emission',
                         'Population (2020)':'Population',
                         'Log Scale':'Density'},
                 log_x=True,
                 log_y=True 
                 
                
                   )
bubble.update_layout(title="Population v. CO2 Emission, 2020", title_x=0.5)
bubble.show()