In [1]:
import numpy as np
import pandas as pd
import plotly as py

import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [2]:
df = pd.read_csv('GlobalLandTemperaturesByCountry.csv')

In [3]:
# We can see that there are many Null-data in our dataset
df.isnull().sum()

1743-11-01                0
4.3839999999999995    32651
2.294                 31912
Åland                     0
dtype: int64

# Data Cleaning

In [4]:
# We don't need Avareage Temperature Uncertainty for our analysis so..
df = df.drop("AverageTemperatureUncertainty", axis=1)
df.head()

KeyError: "['AverageTemperatureUncertainty'] not found in axis"

In [None]:
# Chnage the column dt 'String' to datatime format to easily extract year and month from it.
df['dt'] = pd.to_datetime(df['dt'])

In [None]:
df = df.rename(columns={'dt':'Date'})

In [None]:
# Extract year and month data from the Date
df['Year'] = df['Date'].map(lambda x: x.year)
df['Month'] = df['Date'].map(lambda x: x.month)
df.head()

In [None]:
# Lastly for data cleaning, let’s drop the rows with the null values so that they don't effect our analysis. 
# As we checked earlier, we have around 32000 rows with null values in AverageTemperature column. 
# And in total we have around 577000 rows, so dropping them is not a big deal. 
df = df.dropna()

In [None]:
df.head()

In [None]:
# grouping the dataframe by Country and Year columns. And also, sorting the values by Year from earliest to latest time.
df_countries = df.groupby(['Country', 'Year']).mean().reset_index().sort_values('Year', ascending=True)
df_countries

In [None]:
# There are many missing data in 19th century.
# So the time-series data will be provided in the time frame: 1900 - 2013
mask = (df_countries['Year'] >= 1900) & (df_countries['Year'] <= 2013)

In [None]:
df_countries = df_countries.loc[mask]

In [None]:
# Testing with Finland
df_countries.loc[df_countries['Country'] == 'Finland'].head(20)

In [None]:
df_countries.head(10)

In [None]:
# Manipulating the original dataframe
df_countrydate = df_countries.groupby(['Year','Country']).sum().reset_index()

#C reating the visualization
fig = px.choropleth(df_countrydate,
color_continuous_scale="Burgyl",
locations="Country",
locationmode = "country names",
color="AverageTemperature",
hover_name="Country",
animation_frame="Year"
)

fig.update_layout(
title_text = '<b>Annual Average Temperature Change: 1900 - 2013</b>',
title_x = 0.5,
geo=dict(
showframe = False,
showcoastlines = False,
))

fig.show() 