In [1]:
import numpy as np
import pandas as pd
import plotly as py

import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [2]:
df = pd.read_csv('GlobalLandTemperaturesByCountry.csv')

In [5]:
# We can see that there are many Null-data in our dataset
df.isnull().sum()

dt                                   0
AverageTemperature               32651
AverageTemperatureUncertainty    31912
Country                              0
dtype: int64

# Data Cleaning

In [6]:
# We don't need Avareage Temperature Uncertainty for our analysis so..
df = df.drop("AverageTemperatureUncertainty", axis=1)
df.head()

Unnamed: 0,dt,AverageTemperature,Country
0,1743-11-01,4.384,Åland
1,1743-12-01,,Åland
2,1744-01-01,,Åland
3,1744-02-01,,Åland
4,1744-03-01,,Åland
...,...,...,...
577457,2013-05-01,19.059,Zimbabwe
577458,2013-06-01,17.613,Zimbabwe
577459,2013-07-01,17.000,Zimbabwe
577460,2013-08-01,19.759,Zimbabwe


In [7]:
# Chnage the column dt 'String' to datatime format to easily extract year and month from it.
df['dt'] = pd.to_datetime(df['dt'])

In [8]:
df = df.rename(columns={'dt':'Date'})

In [9]:
# Extract year and month data from the Date
df['Year'] = df['Date'].map(lambda x: x.year)
df['Month'] = df['Date'].map(lambda x: x.month)
df.head()

Unnamed: 0,Date,AverageTemperature,Country,Year,Month
0,1743-11-01,4.384,Åland,1743,11
1,1743-12-01,,Åland,1743,12
2,1744-01-01,,Åland,1744,1
3,1744-02-01,,Åland,1744,2
4,1744-03-01,,Åland,1744,3


In [10]:
# Lastly for data cleaning, let’s drop the rows with the null values so that they don't effect our analysis. 
# As we checked earlier, we have around 32000 rows with null values in AverageTemperature column. 
# And in total we have around 577000 rows, so dropping them is not a big deal. 
df = df.dropna()

In [11]:
df.head()

Unnamed: 0,Date,AverageTemperature,Country,Year,Month
0,1743-11-01,4.384,Åland,1743,11
5,1744-04-01,1.530,Åland,1744,4
6,1744-05-01,6.702,Åland,1744,5
7,1744-06-01,11.609,Åland,1744,6
8,1744-07-01,15.342,Åland,1744,7
...,...,...,...,...,...
577456,2013-04-01,21.142,Zimbabwe,2013,4
577457,2013-05-01,19.059,Zimbabwe,2013,5
577458,2013-06-01,17.613,Zimbabwe,2013,6
577459,2013-07-01,17.000,Zimbabwe,2013,7


In [12]:
# grouping the dataframe by Country and Year columns. And also, sorting the values by Year from earliest to latest time.
df_countries = df.groupby(['Country', 'Year']).mean().reset_index().sort_values('Year', ascending=True)
df_countries

Unnamed: 0,Country,Year,AverageTemperature,Month
35593,San Marino,1743,11.809000,11.0
19925,Ireland,1743,7.730000,11.0
43633,United Kingdom (Europe),1743,7.176000,11.0
16169,Greece,1743,10.806000,11.0
12897,Europe,1743,3.942000,11.0
...,...,...,...,...
3489,Bahrain,2013,27.925375,4.5
24196,Liechtenstein,2013,6.002375,4.5
23929,Libya,2013,24.314000,4.5
8993,Comoros,2013,26.542875,4.5


In [13]:
# There are many missing data in 19th century.
# So the time-series data will be provided in the time frame: 1900 - 2013
mask = (df_countries['Year'] >= 1900) & (df_countries['Year'] <= 2013)

In [14]:
df_countries = df_countries.loc[mask]

In [26]:
# Testing with Finland
df_countries.loc[df_countries['Country'] == 'Finland'].head(20)

Unnamed: 0,Country,Year,AverageTemperature,Month
13994,Finland,1900,0.0005,6.5
13995,Finland,1901,1.594083,6.5
13996,Finland,1902,-0.962417,6.5
13997,Finland,1903,1.879667,6.5
13998,Finland,1904,0.556417,6.5
13999,Finland,1905,1.499333,6.5
14000,Finland,1906,1.846333,6.5
14001,Finland,1907,1.0065,6.5
14002,Finland,1908,1.1685,6.5
14003,Finland,1909,1.09925,6.5


In [16]:
df_countries.head(10)

Unnamed: 0,Country,Year,AverageTemperature,Month
42679,Turks And Caicas Islands,1900,26.794917,6.5
7905,Central African Republic,1900,25.053833,6.5
25208,Madagascar,1900,22.915583,6.5
6376,Bulgaria,1900,10.962833,6.5
9028,Congo,1900,24.287833,6.5
39374,Suriname,1900,26.138417,6.5
40696,Taiwan,1900,21.742583,6.5
8880,Comoros,1900,25.852083,6.5
18802,Hungary,1900,10.30625,6.5
8397,China,1900,6.444167,6.5


In [25]:
# Manipulating the original dataframe
df_countrydate = df_countries.groupby(['Year','Country']).sum().reset_index()

#C reating the visualization
fig = px.choropleth(df_countrydate,
color_continuous_scale="Burgyl",
locations="Country",
locationmode = "country names",
color="AverageTemperature",
hover_name="Country",
animation_frame="Year"
)

fig.update_layout(
title_text = '<b>Annual Average Temperature Change: 1900 - 2013</b>',
title_x = 0.5,
geo=dict(
showframe = False,
showcoastlines = False,
))

fig.show() 