In [None]:
#importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
#Reading the dataset
df = pd.read_csv("C:\\Users\\user\\Desktop\\EDA\\WHR2023.csv")
df.head(10)

In [None]:
df.shape

In [None]:
df_columns = ['Country name','Ladder score','Logged GDP per capita','Social support','Healthy life expectancy','Freedom to make life choices','Generosity','Perceptions of corruption']

In [None]:
data = df[df_columns].copy()

In [None]:
data.head()

In [None]:
#Checking for the null values
data.isnull().sum()

In [None]:
data.fillna(value = data['Healthy life expectancy'].mean(),inplace=True)

In [None]:
sns.set_style('darkgrid')
plt.rcParams['font.size']=15
plt.rcParams['figure.figsize'] =(10,7)
plt.rcParams['figure.facecolor'] = '#F7D560'

In [None]:
plt.figure(figsize=(16,6))
plt.title(" Top 10 Countries with highest GDP per capita")
plt.xticks(rotation=40,ha='right')
sns.barplot(x='Country name', y='Logged GDP per capita',data=df.sort_values('Logged GDP per capita', ascending=False).iloc[:10])
plt.ylabel("GDP per capita")

In [None]:
plt.figure(figsize=(16,6))
plt.title(" Top 10 Countries with highest social support")
plt.xticks(rotation=40,ha='right')
sns.barplot(x='Country name', y='Social support',data=df.sort_values('Social support', ascending=False).iloc[:10])


In [None]:
plt.figure(figsize=(16,6))
plt.title(" Countries with most Perceptions of Corruption")
plt.xticks(rotation=30,ha='right')
sns.barplot(x='Country name', y='Perceptions of corruption',data=df.sort_values('Perceptions of corruption', ascending=False).iloc[:10])

In [None]:
plt.figure(figsize=(16,6))
plt.title("Top 10 Countries with lowest Perceptions of Corruption")
sns.barplot(x='Country name', y='Perceptions of corruption',data=df.sort_values('Perceptions of corruption', ascending=True).iloc[:10])

In [None]:
plt.figure(figsize=(16,6))
plt.title("Top 10 Happiest Countries Healthy Life Expectancy")
sns.barplot(x='Country name', y='Healthy life expectancy',data=df.sort_values('Ladder score', ascending=False).iloc[:10])

In [None]:
plt.figure(figsize=(16,6))
plt.title("Bottom 10 least happy Countries Healthy Life Expectancy")
plt.xticks(rotation=30,ha='right')
sns.barplot(x='Country name', y='Healthy life expectancy',data=df.sort_values('Ladder score', ascending=True).iloc[:10])


In [None]:
plt.figure(figsize=(20,5))
plt.title('Top 10 Most Generous Countries')
sns.barplot(x='Country name', y='Generosity',data=df.sort_values('Generosity', ascending=False).iloc[:10])

In [None]:
plt.rcParams['figure.figsize']=(15,7)
plt.title('Plot between Happiness Score and GDP')
sns.scatterplot(x=data['Ladder score'], y= data['Logged GDP per capita'])
plt.xlabel('Happiness Score')
plt.ylabel('GDP per Capita')

In [None]:
plt.rcParams['figure.figsize']=(15,7)
plt.title('Plot between Happiness Score and Healthy life expectancy')
sns.scatterplot(x=data['Ladder score'], y= data['Healthy life expectancy'], color = 'red')
plt.xlabel('Happiness Score')
plt.ylabel('Healthy life expectancy')

In [None]:
#Correlation Map
cor = data.corr(method = "pearson")
f,ax = plt.subplots(figsize=(10,5))
sns.heatmap(cor,mask=np.zeros_like(cor,dtype=np.bool),cmap="Blues",square=True,ax=ax)

In [None]:
sns.pairplot(data)

In [None]:
plt.rcParams['figure.figsize']=(15,7)
plt.title('Plot between Happiness score and Freedom to make life choices')
sns.scatterplot(x=data['Freedom to make life choices'],y=data['Ladder score'])
plt.ylabel('Happiness score')


In [None]:
#corruption vs happiness
plt.rcParams['figure.figsize']=(15,7)
plt.title('Corruption vs Happiness')
sns.scatterplot(x=data['Ladder score'],y= data['Perceptions of corruption'])
plt.xlabel('Happiness Score')
plt.ylabel('Coruption')

##### Hierarchical Clustering

In [None]:
data_no = data.drop(['Country name'],axis=1)

In [None]:
from sklearn.preprocessing import normalize
data_scaled = normalize(data_no)
data_scaled = pd.DataFrame(data_scaled, columns=data_no.columns)
datasc=data_scaled.copy()
data_scaled.head()

In [None]:
import scipy.cluster.hierarchy as shc
plt.figure(figsize=(10, 8))  
plt.title("Dendrograms")  
dend = shc.dendrogram(shc.linkage(data_scaled, method='ward'))

In [None]:
from sklearn.cluster import AgglomerativeClustering
cluster = AgglomerativeClustering(n_clusters=2, affinity='euclidean', linkage='ward')  
cluster.fit_predict(data_scaled)

->0 belongs to cluster 1

->1 belings to clustre 2

In [None]:
plt.figure(figsize=(6, 5))  
plt.scatter(data_scaled['Logged GDP per capita'], data_scaled['Perceptions of corruption'], c=cluster.labels_) 
plt.xlabel('GDP per Capita')
plt.ylabel('Perceptions of Corruption')
plt.colorbar()

##### K-Means Clustering 

In [None]:
X = data_scaled[["Social support","Healthy life expectancy"]]
plt.scatter(x=X["Social support"],y=X["Healthy life expectancy"],c='pink')
plt.xlabel("Social Support")
plt.ylabel("Healthy life expectancy")
plt.show()

In [None]:
#Performing K MEANS CLUSTERING with 3 clusters
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=3).fit(X)
centroids = kmeans.cluster_centers_
print(centroids)

plt.scatter(X['Social support'], X['Healthy life expectancy'], c= kmeans.labels_.astype(float), alpha=0.5)
plt.scatter(centroids[:, 0], centroids[:, 1], c='red')
plt.show()