In [1]:
import numpy as np
import pandas as pd
import plotly as py

import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [2]:
# CO2 emission data is one Mathilda uploaded.
df = pd.read_csv('annual_csv.csv')
df2 = pd.read_csv('CO2emissions.csv')

In [3]:
# Tempperature data is available until the year 2016
df.head()
df = df.rename(columns={"Mean" : "Temperature"})
df.tail()

Unnamed: 0,Source,Year,Temperature
269,GISTEMP,1882,-0.1
270,GCAG,1881,-0.0628
271,GISTEMP,1881,-0.12
272,GCAG,1880,-0.1148
273,GISTEMP,1880,-0.2


In [4]:
# CO2 data is available until the year 2012
df2 = df2.rename(columns={"Total CO2 Emissions Excluding Land-Use Change and Forestry (MtCO2)" : "CO2_Emission"})
df2.tail()

Unnamed: 0,Country,Year,CO2_Emission
30639,Vietnam,2012,173.0497
30640,World,2012,33843.0497
30641,Yemen,2012,20.5386
30642,Zambia,2012,2.76
30643,Zimbabwe,2012,9.98


In [5]:
# We use only one source of the Temperature Anomaly data.
# Based on my research, GISTEMP has more public confidence over GCAG. 
# If want, you can change the value below to 'GCAG'
df = df.loc[df['Source'] == 'GISTEMP'].sort_values('Year', ascending=True)

In [6]:
# Either sum of average wouldn't matter for inicating the increase in CO2 emission over time
# but I chose the total sum of worldwide CO2 emission 
# because the degree of CO2 emission effect on climate change is proportional to the total amount of CO2 emission
df2 = df2.groupby(['Year']).sum().reset_index().sort_values('Year', ascending = True) 

In [7]:
df2

Unnamed: 0,Year,CO2_Emission
0,1850,745.1351
1,1851,740.7692
2,1852,772.9207
3,1853,802.1741
4,1854,945.8544
...,...,...
158,2008,68084.3547
159,2009,66709.4218
160,2010,70057.4880
161,2011,71844.0564


In [8]:
df.isna().sum()

Source         0
Year           0
Temperature    0
dtype: int64

In [9]:
df2.isna().sum()

Year            0
CO2_Emission    0
dtype: int64

In [10]:
# Both of them contain no Null data but their starting year differs.
# For the consistency with other analysis (The effect of Industrialization)
# I chose the starting year to be 1900 and the end year to be 2012. 
# Feel free to change this years if necessary.
mask = (df['Year'] >= 1900) & (df['Year'] <= 2012)
mask2 = (df2['Year'] >= 1900) & (df2['Year'] <= 2012)

In [11]:
df = df.loc[mask]
df2 = df2.loc[mask2]

In [12]:
df

Unnamed: 0,Source,Year,Temperature
233,GISTEMP,1900,-0.09
231,GISTEMP,1901,-0.15
229,GISTEMP,1902,-0.27
227,GISTEMP,1903,-0.35
225,GISTEMP,1904,-0.44
...,...,...,...
17,GISTEMP,2008,0.54
15,GISTEMP,2009,0.64
13,GISTEMP,2010,0.71
11,GISTEMP,2011,0.60


In [13]:
df2

Unnamed: 0,Year,CO2_Emission
50,1900,6032.3620
51,1901,6133.6154
52,1902,6237.6670
53,1903,6707.2002
54,1904,6783.3023
...,...,...
158,2008,68084.3547
159,2009,66709.4218
160,2010,70057.4880
161,2011,71844.0564


In [14]:
# Now we merge these two dataframes with common column, 'Year'.
df_combine = pd.merge(df, df2, on=['Year'])

In [15]:
df_combine

Unnamed: 0,Source,Year,Temperature,CO2_Emission
0,GISTEMP,1900,-0.09,6032.3620
1,GISTEMP,1901,-0.15,6133.6154
2,GISTEMP,1902,-0.27,6237.6670
3,GISTEMP,1903,-0.35,6707.2002
4,GISTEMP,1904,-0.44,6783.3023
...,...,...,...,...
108,GISTEMP,2008,0.54,68084.3547
109,GISTEMP,2009,0.64,66709.4218
110,GISTEMP,2010,0.71,70057.4880
111,GISTEMP,2011,0.60,71844.0564


In [28]:
fig = px.bar(df_combine, x='Year', y='CO2_Emission',
hover_data=['CO2_Emission', 'Temperature'], color='Temperature',
title= '<b>Global Co2 Emission & Temperature Anomaly<b>', 
labels={'CO2_Emission':'CO2 Emission'}, height=520)

fig.update_layout(
font = dict(
    family = "Times New roman",
    size = 15),
title_font_family = "Times New Roman",
title_font_size = 21,
title_x = 0.5,
title_y = 0.87
)

fig.show()
