# Gerekli Kütüphaneler

In [1]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn import metrics

seed = 42
np.random.seed(seed)
random.seed(seed)

# Veri Seti

In [2]:
X, y = make_blobs(n_samples=100, centers=3, n_features=2, random_state=seed)

# Agglomerative Clustering

In [3]:
class AgglomerativeClustering:
    def __init__(self, n_clusters=2, linkage="single"):
        # Kume sayisi
        self.n_clusters = n_clusters
        # Linkage -> ['single', 'complete', 'average']
        self.linkage = linkage
        # Etiketler
        self.labels_ = None

    def fit(self, X):
        n_samples = X.shape[0]

        # Her veri noktasi baslangicta ayri bir kume olarak kabul edilir
        clusters = {i: [i] for i in range(n_samples)}

        # Veri noktalari arasindaki mesafe hesaplanir
        distances = np.zeros((n_samples, n_samples))

        for i in range(n_samples):
            for j in range(i + 1, n_samples):
                distances[i, j] = np.sqrt(np.sum((X[i] - X[j]) ** 2))
                distances[j, i] = distances[i, j]

        while len(clusters) > self.n_clusters:
            # Iki kume arasindaki minimum mesafe bulunur
            min_dist = np.inf
            closest_clusters = None

            for c1 in clusters:
                for c2 in clusters:
                    if c1 != c2:
                        if self.linkage == "single":
                            dist = np.min([distances[i, j] for i in clusters[c1] for j in clusters[c2]])
                        elif self.linkage == "complete":
                            dist = np.max([distances[i, j] for i in clusters[c1] for j in clusters[c2]])
                        elif self.linkage == "average":
                            dist = np.mean([distances[i, j] for i in clusters[c1] for j in clusters[c2]])

                        if dist < min_dist:
                            min_dist = dist
                            closest_clusters = (c1, c2)

            c1, c2 = closest_clusters

            # Iki kumeyi birlestir
            clusters[c1].extend(clusters[c2])
            del clusters[c2]

            # Mesafe matrisini guncelle
            new_distances = np.zeros_like(distances)

            for c1 in clusters:
                for c2 in clusters:
                    if c1 != c2:
                        if self.linkage == "single":
                            dist = np.min([distances[i, j] for i in clusters[c1] for j in clusters[c2]])
                        elif self.linkage == "complete":
                            dist = np.max([distances[i, j] for i in clusters[c1] for j in clusters[c2]])
                        elif self.linkage == "average":
                            dist = np.mean([distances[i, j] for i in clusters[c1] for j in clusters[c2]])
                        
                        new_distances[c1, c2] = dist
                        new_distances[c2, c1] = new_distances[c1, c2]

            distances = new_distances

        # Kumeleri etiketle
        labels = np.zeros(n_samples, dtype=int)
    
        for cluster_idx, cluster in enumerate(clusters.values()):
            for sample_idx in cluster:
                labels[sample_idx] = cluster_idx
    
        self.labels_ = labels

# Eğitim

In [4]:
agg_cluster = AgglomerativeClustering(n_clusters=2, linkage="complete")
agg_cluster.fit(X)

# Sonuçlar

In [5]:
cluster_centers = agg_cluster.labels_

In [6]:
print(f"Homogeneity: {metrics.homogeneity_score(y, cluster_centers):.3f}")
print(f"Completeness: {metrics.completeness_score(y, cluster_centers):.3f}")
print(f"V-measure: {metrics.v_measure_score(y, cluster_centers):.3f}")
print(f"Adjusted Rand Index: {metrics.adjusted_rand_score(y, cluster_centers):.3f}")
print(f"Adjusted Mutual Information: {metrics.adjusted_mutual_info_score(y, cluster_centers):.3f}")
print(f"Silhouette Coefficient: {metrics.silhouette_score(X, cluster_centers):.3f}")

Homogeneity: 0.577
Completeness: 1.000
V-measure: 0.732
Adjusted Rand Index: 0.563
Adjusted Mutual Information: 0.729
Silhouette Coefficient: 0.702
