In [76]:
import pandas as pd
import numpy as np
from sklearn.cluster import AgglomerativeClustering, KMeans as SKLearnKMeans
from sklearn.metrics import silhouette_score
from scipy.spatial.distance import pdist
from sklearn.metrics import pairwise_distances

In [77]:
EU_in = pd.read_csv('datasets/inflation.csv')  # Replace with the actual file name
EU_un = pd.read_csv('datasets/unemployment.csv')  # Replace with the actual file name

EU_in_cleaned = EU_in[['geo', 'TIME_PERIOD', 'OBS_VALUE']].rename(columns={'geo': 'Country', 'TIME_PERIOD': 'Year', 'OBS_VALUE': 'Inflation'})
EU_un_cleaned = EU_un[['geo', 'TIME_PERIOD', 'OBS_VALUE']].rename(columns={'geo': 'Country', 'TIME_PERIOD': 'Year', 'OBS_VALUE': 'Unemployment'})


In [78]:
EU_in_cleaned.head()
EU_un_cleaned.head()

Unnamed: 0,Country,Year,Unemployment
0,Austria,2015,6.1
1,Austria,2016,6.5
2,Austria,2017,5.9
3,Austria,2018,5.2
4,Austria,2019,4.8


In [79]:

EU_in_un = pd.merge(EU_in_cleaned, EU_un_cleaned, on=['Country', 'Year'])
EU_in_un.head()

Unnamed: 0,Country,Year,Inflation,Unemployment
0,Austria,2015,0.8,6.1
1,Austria,2016,1.0,6.5
2,Austria,2017,2.2,5.9
3,Austria,2018,2.1,5.2
4,Austria,2019,1.5,4.8


In [80]:
unemployment_data = EU_un_cleaned.pivot(index='Country', columns='Year', values='Unemployment').fillna(0).values
hierarchical_clustering = AgglomerativeClustering(n_clusters=3, linkage='ward')
labels_hierarchical = hierarchical_clustering.fit_predict(unemployment_data)

# Evaluate using Silhouette score
silhouette_avg = silhouette_score(unemployment_data, labels_hierarchical)
print(f"Silhouette Score for Hierarchical Clustering: {silhouette_avg}")

Silhouette Score for Hierarchical Clustering: 0.39511226784796655


In [81]:
def dunn_index(data, labels):
    distances = pairwise_distances(data)
    intra_cluster_distances = []
    inter_cluster_distances = []

    for label in np.unique(labels):
        cluster_points = data[labels == label]
        intra_cluster_distances.append(np.max(pdist(cluster_points)) if len(cluster_points) > 1 else 0)

    for i, label_i in enumerate(np.unique(labels)):
        for j, label_j in enumerate(np.unique(labels)):
            if i < j:
                cluster_i_points = data[labels == label_i]
                cluster_j_points = data[labels == label_j]
                inter_cluster_distances.append(np.min(pairwise_distances(cluster_i_points, cluster_j_points)))

    return np.min(inter_cluster_distances) / np.max(intra_cluster_distances)

In [82]:
joint_data = EU_in_un[['Inflation', 'Unemployment']].values
kmeans = SKLearnKMeans(n_clusters=3, random_state=42)
labels_kmeans = kmeans.fit_predict(joint_data)


dunn_index_value = dunn_index(joint_data, labels_kmeans)
print(f"Dunn Index for K-Means Clustering: {dunn_index_value}")


Dunn Index for K-Means Clustering: 0.021939030086455955
