# Objective:
 #   1. Analyze the World Happiness Report using various types of clustering algorithms as available in sklearn
  #  2. Map the result using plotly world graphs


In [68]:
## Call libraries
import numpy as np            # Data manipulation
import pandas as pd           # Dataframe manipulatio 
import matplotlib.pyplot as plt                   # For graphics

from sklearn import cluster, mixture              # For clustering
from sklearn.preprocessing import StandardScaler  # For scaling dataset

import os                     # For os related operations

## To plot the world map using plotly
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

Read the the dataset and describe the data

In [69]:
WH= pd.read_csv("../input/2017.csv", header = 0)
# Description of the dataset
WH.describe()

Removing Country and Happiness_Rank columns from the dataset for applying clustering algorithms

In [70]:
WH_X = WH.iloc[:, 2: ]      # Drop Country and Happiness_Rank columns
WH_X.info

Normalize dataset for easier parameter selection

- Standardize features by removing the mean   and scaling to unit variance


In [71]:
#  Instantiate scaler object
ss = StandardScaler()
# Use ot now to 'fit' &  'transform'
ss.fit_transform(WH_X)

# Define a Function to accept the data , model name and implement various clustering algorithms and plot variable in scatter plot


In [72]:
def cluster_Algorithm_plot(data, model_name, default_base,plot_row,plot_col,plot_pos):

    
    if model_name == 'KMeans':                                ## KMeans
        cluster_obj = cluster.KMeans(n_clusters=default_base['n_clusters'])
        model_result = cluster_obj.fit_predict(data)
    elif model_name == 'MiniBatchKMeans':            ## Mini Batch K-Means
        cluster_obj = cluster.MiniBatchKMeans(n_clusters=default_base['n_clusters'])
        model_result = cluster_obj.fit_predict(data)
    elif model_name == 'SpectralClustering':         ## Spectral clustering
        cluster_obj = cluster.SpectralClustering(n_clusters=default_base['n_clusters'])
        model_result = cluster_obj.fit_predict(data)
    elif model_name == 'MeanShift':                  ## Mean Shift
        cluster_obj = cluster.MeanShift(bandwidth=default_base['bandwidth'])
        model_result = cluster_obj.fit_predict(data)
    elif model_name == 'DBSCAN':                     ## DBSCAN
        cluster_obj = cluster.DBSCAN(eps=default_base['eps'])
        model_result = cluster_obj.fit_predict(data)
    elif model_name == 'AffinityPropagation':        ## Affinity Propagation
        cluster_obj = cluster.AffinityPropagation(damping=default_base['damping'], 
                                                  preference=default_base['preference'])
        cluster_obj.fit(data)
        model_result = cluster_obj.predict(data)
    elif model_name == 'Birch':                      ## Birch
        cluster_obj = cluster.Birch(n_clusters=default_base['n_clusters'])
        model_result = cluster_obj.fit_predict(data)
    elif model_name == 'GaussianMixture':            ## Gaussian Mixture modeling
        cluster_obj = mixture.GaussianMixture(n_components=default_base['n_clusters'], 
                                              covariance_type='full')
        cluster_obj.fit(data)
        model_result = cluster_obj.predict(data)
    
    
### Store the model results obtained from the clustering algorithm
    data[model_name] = pd.DataFrame(model_result)

## plot the scatter plot on the cluster result
    ax = plt.subplot(plot_row, plot_col, plot_pos)
    ax.set_title(model_name+" Cluster Result")
    plt.scatter(data.iloc[:, 4], data.iloc[:, 5],  c=model_result)
    
    return(data)


# Define a Function to plot World Choropleth Map using plotly 


In [73]:
def world_map_Plot(df,map_column):

    data = [ dict(
        type = 'choropleth',
        locations = df['Country'],
        locationmode = 'country names',
        z = df[map_column],
        text = df['Country'],
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
                ) ),
        colorbar = dict(
            title = 'World Happiness<br>Score for '+map_column)
                ) ]

    layout = dict(
            title = map_column+'<br><b>World Happiness Score</b><br>Source:\
            <a href="https://www.kaggle.com/unsdsn/world-happiness/data">\
            Kaggle World Happiness Report</a>',
            geo = dict(
                    showframe = False,
                    showcoastlines = False,
                    projection = dict(
                            type = 'Mercator'
                            )
                    )
            )

 # Map in the world plot
    whmap = go.Figure( data=data, layout=layout )
    iplot(whmap)

    return()


Call the function to apply various clustering methods and plot those

In [74]:
#Initialize variables for Cluster plotting
n_row = 4
n_col = 2
n_pos = 1
#Initialize list of models and respective parameters
clusters_list = ["KMeans", "MiniBatchKMeans", "SpectralClustering", "MeanShift",
                "DBSCAN", "Birch", "GaussianMixture", "AffinityPropagation"]
default_base = {'n_clusters':2, 'bandwidth':0.1, "damping":0.9, "eps":0.3,
               "preference":-200}

plt.figure(figsize=(20,20))

#Apply each in the clusters lost and plot the results
for i in clusters_list:
    WH_map = cluster_Algorithm_plot(WH_X,i,default_base,n_row,n_col,n_pos)
    n_pos += 1


In [75]:
# Prepare dataset with country added 
# Add the country column from the intial dataset
WH_map.insert(0,'Country',WH['Country'])
WH_map.dtypes

Plotting all clustring results and Happiness score using plotly function defined

In [76]:
column_list = ["Happiness.Score","KMeans", "MiniBatchKMeans", "SpectralClustering", "MeanShift",
                "DBSCAN", "Birch", "GaussianMixture", "AffinityPropagation"]

for i in column_list:
    world_map_Plot(WH_map,i)
