# Importing libraries:

In [None]:
import matplotlib as mpl
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
mpl.rcParams['figure.dpi'] = 500
plt.rcParams['figure.figsize'] = (12,6)

In [None]:
dataset = pd.read_csv('../input/countrycsv/country.csv')
info = pd.read_csv('../input/countrycsv/countryDictionary.csv')
x = dataset.iloc[:,1:10].values
countries = dataset.iloc[:,[0]].values #list of all countries

In [None]:
dataset.head()

# Performing Standardisation on the matrix of features x

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x = sc.fit_transform(x)

# Using the elbow method to find the optimal number of clusters

In [None]:
from sklearn.cluster import KMeans
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 0)
    kmeans.fit(x)
    wcss.append(kmeans.inertia_)
plt.plot(range(1, 11), wcss)
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()
plt.savefig('Elbow Method',dpi = 300)


From the Elbow Method , we see that 3 or 4 could be the optimum number of clusters , however let us also create a Dendrogram for further analysis

In [None]:
# Creating dendograms using wards method 
import scipy.cluster.hierarchy as sch
dendrogram = sch.dendrogram(sch.linkage(x, method = 'ward'))
plt.title('Dendrogram')
plt.xlabel('Countries')
plt.ylabel('Euclidean distances')
plt.savefig('Dendrogram',dpi = 500)
plt.show()

The Dendrogram provides us with more insight on the number of clusters , we can see that the ideal number of clusters should be 3

# Applying Kmeans Algorithm to form clusters

In [None]:
kmeans = KMeans(n_clusters = 3, init = 'k-means++', random_state = 0)
kmeans.fit(x)
y = kmeans.predict(x)
print(y)

Creating a list of names of features 

In [None]:
temp = info.iloc[1:10,[0]].values
features = []
for val in temp:
    for nval in val:
        features.append(nval)
print(features)

Creating list of countries belonging to particular cluster

In [None]:
c1 = []
c2 = []
c3 = []
for i in range(0,len(dataset)):
    if(y[i] == 0):
        c1.append(str(countries[i]))
    elif(y[i] == 1 ):
        c2.append(str(countries[i]))
    else:
        c3.append(str(countries[i]))
print(len(c1))
print(len(c2))
print(len(c3))

In order to understand the significance of each feature we shall plot graphs for them and understand the significance of each cluster 

In [None]:
def plotAll(y,features,x):
    for i in range(0,9):
        for j in range(i+1,9):
            plt.scatter(x[y == 0, i], x[y == 0, j], s = 2.5, c = 'red', label = 'Cluster 1',alpha=.8)
            plt.scatter(x[y == 1, i], x[y == 1, j], s = 2.5, c = 'blue', label = 'Cluster 2',alpha = .8)
            plt.scatter(x[y == 2, i], x[y == 2, j], s = 2.5, c = 'green', label = 'Cluster 3',alpha = .8)
            plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 2, c = 'yellow', label = 'Centroids')
            xtit = str(features[i])
            ytit = str(features[j])
            plt.title(xtit + ' vs ' + ytit)
            plt.xlabel(xtit)
            plt.ylabel(ytit)
            plt.legend()
            plt.rcParams['figure.figsize'] = (4,3)
            plt.savefig(xtit + ' vs ' + ytit,dpi =  300)
            plt.show()
plotAll(y,features,x)

From these graphs we can analyse that countries belonging to cluster c2 need our help as they have very poor health index as well as economic index.

In [None]:
print("List of 1st priority countries")
print(c2)