In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Imports

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Configs

In [None]:
sns.set_style('darkgrid')

# Read in data

In [None]:
df = pd.read_csv('/kaggle/input/temperature-change/Environment_Temperature_change_E_All_Data_NOFLAG.csv', encoding='latin-1')
df.head()

# Rename columns

In [None]:
df.columns = df.columns.str.lower()
df.columns = df.columns.str.replace('y', '')
df.head()

# Remove unnecessaray columns

In [None]:
df.drop(columns=['area code', 'months code', 'element code', 'unit'], inplace=True)
df.head()

# Rename month combinations

In [None]:
months_replace = {'Dec\x96Jan\x96Feb': 'quarter_1', 'Mar\x96Apr\x96May': 'quarter_2', 'Jun\x96Jul\x96Aug': 'quarter_3', 'Sep\x96Oct\x96Nov': 'quarter_4'}
df.replace(months_replace, inplace=True)

# Helper functions

In [None]:
# get specific dataframe for country
def country_df(df, country):
    dfn = df[(df['element'] == 'Temperature change') & (df['area'] == country)]
    dfn = dfn.set_index('months').transpose()[2:]
    dfn['year'] = dfn.index
    dfn.reset_index(drop=True, inplace=True)
    dfn.index.names = [country]
    dfn = dfn.astype('float')
    dfn = dfn.astype({'year': 'int'})
    return dfn

In [None]:
def months_df(df, country):
    months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] 
    dfn = df[(df['element'] == 'Temperature change') & (df['area'] == country)]
    dfn = dfn.drop(columns=['element', 'area'])
    dfn = pd.melt(dfn, id_vars=['months'], value_vars=dfn.columns[1:], var_name='year', value_name='delta_temperature')
    return dfn

In [None]:
def seasons_df(df, country):
    months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
    dfn = df[(df['element'] == 'Temperature change') & (df['area'] == country)]
    dfn.rename(columns={'months': 'seasons'}, inplace=True)
    dfn = dfn.set_index('seasons').transpose()[2:]
    dfn['year'] = dfn.index
    dfn.drop(columns=months, inplace=True)
    dfn.reset_index(drop=True, inplace=True)
    dfn.index.names = [country]
    dfn = dfn.astype('float')
    return dfn

In [None]:
# create regression plot for country
def country_regplot(data, period):
    fig = plt.figure(figsize=(8, 8))
    sns.regplot(data=data, x='year', y=period, fit_reg=True, lowess=True, scatter_kws={'alpha': 0.2}, line_kws={'lw': 2, 'alpha': 0.78})
    plt.xlabel('Year')
    plt.ylabel('∆ °C')
    plt.title(data.index.name)

In [None]:
# create density plot for country
def country_densityplot(data):
    quarters = ['quarter_1', 'quarter_2', 'quarter_3', 'quarter_4']
    labels = ['1st quarter', '2nd quarter', '3rd quarter', '4th quarter']
    colors = ['r', 'g', 'b', 'y']
    
    for i, quarter in enumerate(quarters):
        fig = sns.kdeplot(data[quarter], shade=True, color=colors[i], label=labels[i])
    plt.xlabel('∆ °C')
    plt.legend()
    plt.show()

In [None]:
# create box plot for country
def country_boxplot(data, country):
    fig = plt.figure(figsize=(24, 8))
    sns.boxplot(x=data['months'], y=data['delta_temperature'])
    plt.title(country)
    plt.ylabel('∆ °C')
    plt.xlabel('Months')
    plt.show()

# Temperature analysis

## Germany

In [None]:
germany = country_df(df, 'Germany')
country_regplot(germany, 'Meteorological year')

In [None]:
country_densityplot(germany)

In [None]:
country_boxplot(months_df(df, 'Germany'), 'Germany')

## Australia

In [None]:
australia = country_df(df, 'Australia')
country_regplot(australia, 'Meteorological year')

In [None]:
country_densityplot(australia)

In [None]:
country_boxplot(months_df(df, 'Australia'), 'Australia')

## USA

In [None]:
usa = country_df(df, 'United States of America')
country_regplot(usa, 'Meteorological year')

In [None]:
country_densityplot(usa)

In [None]:
country_boxplot(months_df(df, 'United States of America'), 'United States of America')

## China

In [None]:
china = country_df(df, 'China')
country_regplot(china, 'Meteorological year')

In [None]:
country_densityplot(china)

In [None]:
country_boxplot(months_df(df, 'China'), 'China')

## Russia

In [None]:
russia = country_df(df, 'Russian Federation')
country_regplot(russia, 'Meteorological year')

In [None]:
country_densityplot(russia)

In [None]:
country_boxplot(months_df(df, 'Russian Federation'), 'Russia')

# Analyze Continents

In [None]:
africa = seasons_df(df, 'Africa')
asia = seasons_df(df, 'Asia')
europe = seasons_df(df, 'Europe')
north_america = seasons_df(df, 'Northern America')
south_america = seasons_df(df, 'South America')
australia = seasons_df(df, 'Australia')
antarctica = seasons_df(df, 'Antarctica')

In [None]:
continents_str=["Africa","Asia","Europe","Northern America","South America","Australia","Antarctica"]
continents=[africa, asia, europe, north_america, south_america, australia, antarctica]

In [None]:
quarters = ['quarter_1', 'quarter_2', 'quarter_3', 'quarter_4']

cont_yearly = pd.concat(continents, axis=1, ignore_index=False)
cont_yearly.drop(columns=quarters, axis=1, inplace=True)
cont_yearly.set_index(africa['year'], inplace=True)
cont_yearly.drop('year', axis=1, inplace=True)
cont_yearly.columns = continents_str
cont_yearly.head()

In [None]:
def continents_violinplot(data):
    violins = plt.figure(figsize=(15,10))
    sns.violinplot(data=data, inner='quartile', cut=0, bw=0.3)  
    plt.ylabel('∆ °C', rotation=0)
    plt.title('Continental Temperature Shifts  \nper year average')
    plt.show()
    
continents_violinplot(cont_yearly)

# Analyze World

In [None]:
world = sum(continents) / 7
world.index.name = 'world'
world.head()

In [None]:
def world_regplot(data):
    labels = ['1st quarter', '2nd quarter', '3rd quarter', '4th quarter']
    plt.figure(figsize=(8,8))
    
    for i, q in enumerate(quarters):
        sns.regplot(data=data, x='year', y=q, fit_reg=True, lowess=True, label=labels[i], scatter_kws={'alpha':0.2}, line_kws={'lw':2, 'alpha':0.75})
    
    plt.gca().set_ylabel('∆ °C', rotation=0)
    plt.gca().set_title('World ∆ Continental Temperatures')
    plt.legend(loc='best', frameon=False)
    plt.show()
    
world_regplot(world)