#### Chapter 01 - The machine learning landscape

In [2]:
from __future__ import division, print_function, unicode_literals

import numpy as np
import numpy.random as rnd
import os

rnd.seed(42)

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

PROJECT_ROOT_DIR = "."
CHAPTER_ID = "fundamentals"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
    	plt.tight_layout()
    plt.save_fig(path, format='png', dpi=300)    

In [17]:
import pandas as pd

datapath = "datasets/lifesat/"

oecd_bli = pd.read_csv(datapath+"oecd_bli_2015.csv", thousands=',')
oecd_bli = oecd_bli[oecd_bli['INEQUALITY']=="TOT"]
oecd_bli = oecd_bli.pivot(index="Country", columns="Indicator", values="Value")
oecd_bli.head(2)

Indicator  Air pollution  Assault rate  Consultation on rule-making  \
Country                                                               
Australia           13.0           2.1                         10.5   
Austria             27.0           3.4                          7.1   

Indicator  Dwellings without basic facilities  Educational attainment  \
Country                                                                 
Australia                                 1.1                    76.0   
Austria                                   1.0                    83.0   

Indicator  Employees working very long hours  Employment rate  Homicide rate  \
Country                                                                        
Australia                              14.02             72.0            0.8   
Austria                                 7.61             72.0            0.4   

Indicator  Household net adjusted disposable income  \
Country                                       

In [None]:
oecd_bli["Life satisfaction"].head()

Country
Australia    73
Austria      69
Belgium      69
Brazil        7
Canada       73
Name: Life satisfaction, dtype: int64

In [18]:
gdp_per_capita = pd.read_csv(datapath+"gdp_per_capita.csv", thousands=',', delimiter='\t', encoding = 'latin1', na_values="n/a")
gdp_per_capita.rename(columns={"2015": "GDP per capita"}, inplace=True)
gdp_per_capita.set_index("Country", inplace=True)
gdp_per_capita.head(2)

                                            Subject Descriptor         Units  \
Country                                                                        
Afghanistan  Gross domestic product per capita, current prices  U.S. dollars   
Albania      Gross domestic product per capita, current prices  U.S. dollars   

             Scale                      Country/Series-specific Notes  \
Country                                                                 
Afghanistan  Units  See notes for:  Gross domestic product, curren...   
Albania      Units  See notes for:  Gross domestic product, curren...   

             GDP per capita  Estimates Start After  
Country                                             
Afghanistan         599.994                 2013.0  
Albania            3995.383                 2010.0  

In [19]:
full_conutry_stats = pd.merge(left=oecd_bli, right=gdp_per_capita, left_index=True, right_index=True)
full_conutry_stats.sort_values(by="GDP per capita", inplace=True)
full_conutry_stats

                 Air pollution  Assault rate  Consultation on rule-making  \
Country                                                                     
Brazil                    18.0           7.9                          4.0   
Mexico                    30.0          12.8                          9.0   
Russia                    15.0           3.8                          2.5   
Turkey                    35.0           5.0                          5.5   
Hungary                   15.0           3.6                          7.9   
Poland                    33.0           1.4                         10.8   
Chile                     46.0           6.9                          2.0   
Slovak Republic           13.0           3.0                          6.6   
Czech Republic            16.0           2.8                          6.8   
Estonia                    9.0           5.5                          3.3   
Greece                    27.0           3.7                          6.5   