In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans

from ipywidgets import interact, IntSlider, fixed, FloatSlider

In [2]:
%matplotlib inline

plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (12, 8)

In [3]:
data_set = pd.read_csv('../lab01/economic_freedom.csv', encoding="ISO-8859-1")
data_set = data_set.drop(["Country", "WEBNAME"], axis=1)

In [4]:
data_set.columns = ['id', 'name', 'region', 'world_rank',
       'region_rank', 'score', 'property_rights', 'judical_effectiveness',
       'gov_integrity', 'tax_burden', 'gov_spending', 'fiscal_health',
       'business_freedom', 'labor_freedom', 'monetary_freedom',
       'trade_freedom', 'investment_freedom ', 'financial_freedom',
       'tariff_rate', 'income_tax_rate', 'corp_tax_rate',
       'tax_burden', 'gov_expenditure',
       'population', 'gdp', 'gdp_growth_rate',
       'five_year_gdp_growth_rate', 'gdp_per_capita',
       'unemployment', 'inflation', 'fdi_inflow',
       'public_debt']

In [5]:
data_set.gdp = data_set.gdp.str[1:]
data_set.fdi_inflow = data_set.fdi_inflow.str[1:]
data_set.gdp_per_capita = data_set.gdp_per_capita.str[1:]
data_set.gdp = data_set.gdp.str.replace(',', '')
data_set.fdi_inflow = data_set.fdi_inflow.str.replace(',', '')
data_set.gdp_per_capita = data_set.gdp_per_capita.str.replace(',', '')

In [6]:
data_values = data_set[['property_rights', 'judical_effectiveness',
       'gov_integrity', 'tax_burden', 'gov_spending', 'fiscal_health',
       'business_freedom', 'labor_freedom', 'monetary_freedom',
       'trade_freedom', 'investment_freedom ', 'financial_freedom',
       'tariff_rate', 'income_tax_rate', 'corp_tax_rate',
       'tax_burden', 'gov_expenditure',
       'population', 'gdp', 'gdp_growth_rate',
       'five_year_gdp_growth_rate', 'gdp_per_capita',
       'unemployment', 'inflation', 'fdi_inflow',
       'public_debt']]
data_values

Unnamed: 0,property_rights,judical_effectiveness,gov_integrity,tax_burden,tax_burden.1,gov_spending,fiscal_health,business_freedom,labor_freedom,monetary_freedom,...,gov_expenditure,population,gdp,gdp_growth_rate,five_year_gdp_growth_rate,gdp_per_capita,unemployment,inflation,fdi_inflow,public_debt
0,19.6,29.6,25.2,91.7,5.0,80.3,99.3,49.2,60.4,76.7,...,25.6,35.5,69.6,2.5,2.9,1958,8.8,5.0,3.9,7.3
1,54.8,30.6,40.4,86.3,24.9,73.9,80.6,69.3,52.7,81.5,...,29.5,2.9,36.0,3.9,2.5,12507,13.9,2.0,119.1,71.2
2,31.6,36.2,28.9,76.4,24.5,48.7,18.7,61.6,49.9,74.9,...,41.4,41.5,632.9,2.0,3.1,15237,10.0,5.6,203.0,25.8
3,35.9,26.6,20.5,83.9,20.6,80.7,58.2,55.7,58.8,55.4,...,25.3,28.2,190.3,0.7,2.9,6753,8.2,31.7,2254.5,65.3
4,47.8,44.5,33.5,69.3,30.8,49.5,33.0,56.4,46.9,60.2,...,41.0,44.1,920.2,2.9,0.7,20876,8.7,25.7,1857.0,52.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181,7.6,13.1,7.9,74.7,14.9,58.1,17.6,33.9,28.0,0.0,...,37.4,31.4,380.7,-14.0,-7.8,12114,7.7,1087.5,68.0,34.9
182,49.8,40.3,34.0,79.7,18.0,74.1,40.7,63.5,62.8,68.9,...,29.4,93.6,647.4,6.8,6.2,6913,2.1,3.5,4100.0,58.2
183,19.6,22.2,20.3,,,83.7,0.0,45.1,49.8,61.5,...,,30.0,38.6,-13.8,-16.1,1287,14.0,4.9,269.9,141.0
184,45.0,35.6,32.3,72.3,17.9,80.1,12.3,71.1,46.0,70.3,...,25.8,17.2,68.9,3.6,4.0,3996,7.8,6.6,091.2,62.2


In [None]:
def draw_clusters(n=2):
    #kmeans = KMeans(n_clusters=n, random_state=1).fit(X)
    kmeans = KMeans(n_clusters=n).fit(data_values)

    fig, ax = plt.subplots(1,1)
    ax.scatter(X[:,0], X[:,1], c=kmeans.labels_)
    
    ax.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
                  marker='o', c="white", alpha=1, s=200)
    
    ax.set_xlabel('$x_1$')
    ax.set_ylabel('$x_2$')

    for i, c in enumerate(kmeans.cluster_centers_):
        ax.scatter(c[0], c[1], marker='$%d$' % i, alpha=1, s=50)
    plt.show()
    
interact(drawClusters, n=IntSlider(min=2,max=8,step=1,value=2));