EXP-05 (kmean_1d)

In [None]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

def euclidean_distance(point1, point2):
    """Calculate the Euclidean distance between two points."""
    return np.abs(point1 - point2)

def k_means_clustering_1d(data, k, max_iterations=100):
    """Perform K-means clustering on 1D data."""
    initial_indices = random.sample(range(data.shape[0]), k)
    centers = data[initial_indices]

    print(f"Initial cluster centers (randomly selected): {centers}")

    # Store distances for each iteration
    all_distances = []

    for iteration in range(max_iterations):
        assignments = np.zeros(data.shape[0])
        iteration_distances = []  # Store distances for this iteration

        # (3) Assign each object to the closest cluster
        for i in range(data.shape[0]):
            distances = [round(euclidean_distance(data[i], center), 2) for center in centers]
            iteration_distances.append((data[i], distances))  # Save the point and distances
            assignments[i] = np.argmin(distances)

        all_distances.append(iteration_distances)  # Save distances for the iteration

        # (4) Update the cluster means
        new_centers = np.array([data[assignments == i].mean() for i in range(k)])

        # Check for convergence
        if np.array_equal(centers, new_centers):
            break
        centers = new_centers

    # Create final clusters
    clusters = [data[assignments == i] for i in range(k)]

    # Print distances for each iteration
    for iter_idx, distances in enumerate(all_distances):
        print(f"\nIteration {iter_idx + 1} distances:")
        for point, distance_values in distances:
            print(f"Distances from point {point} to centers: {distance_values}")

    return clusters

def plot_clusters_1d(data, clusters):
    """Visualize the clustering result for 1D data."""
    plt.figure(figsize=(8, 4))
    colors = ['red', 'green', 'blue', 'orange', 'purple', 'cyan']
    for i, cluster in enumerate(clusters):
        plt.scatter(cluster, np.zeros_like(cluster), color=colors[i % len(colors)], label=f'Cluster {i}')
    plt.title('1D K-means Clustering')
    plt.xlabel('Feature Value')
    plt.yticks([])  # Hide y-axis ticks
    plt.legend()
    plt.grid()
    plt.show()

# Example usage
if __name__ == "__main__":
    # Load data from a CSV file
    data = pd.read_csv('kmean_1d.csv').values.flatten()  # Ensure data is a 1D array

    k = 3  # Number of clusters
    clusters = k_means_clustering_1d(data, k)

    # Output results
    for i, cluster in enumerate(clusters):
        print(f"Cluster {i}: {cluster}")

    # Plot the clusters
    plot_clusters_1d(data, clusters)


EXP-05 (kmean_2d)

In [None]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

def euclidean_distance(point1, point2):
    """Calculate the Euclidean distance between two points."""
    return np.sqrt(np.sum((point1 - point2) ** 2))

def k_means_clustering_2d(data, k, max_iterations=100):
    """Perform K-means clustering on 2D data."""
    initial_indices = random.sample(range(data.shape[0]), k)
    centers = data[initial_indices]

    print(f"Initial cluster centers (randomly selected): {centers}")

    # Store distances for each iteration
    all_distances = []

    for iteration in range(max_iterations):
        assignments = np.zeros(data.shape[0])
        iteration_distances = []  # Store distances for this iteration

        # (3) Assign each object to the closest cluster
        for i in range(data.shape[0]):
            distances = [round(euclidean_distance(data[i], center), 2) for center in centers]
            iteration_distances.append((data[i], distances))  # Save the point and distances
            assignments[i] = np.argmin(distances)

        all_distances.append(iteration_distances)  # Save distances for the iteration

        # (4) Update the cluster means
        new_centers = np.array([data[assignments == i].mean(axis=0) for i in range(k)])

        # Check for convergence
        if np.array_equal(centers, new_centers):
            break
        centers = new_centers

    # Create final clusters
    clusters = [data[assignments == i] for i in range(k)]

    # Print distances for each iteration
    for iter_idx, distances in enumerate(all_distances):
        print(f"\nIteration {iter_idx + 1} distances:")
        for point, distance_values in distances:
            print(f"Distances from point {point} to centers: {distance_values}")

    return clusters

def plot_clusters(data, clusters):
    """Visualize the clustering result for 2D data."""
    plt.figure(figsize=(8, 6))
    colors = ['red', 'green', 'blue', 'orange', 'purple', 'cyan']
    for i, cluster in enumerate(clusters):
        plt.scatter(cluster[:, 0], cluster[:, 1], color=colors[i % len(colors)], label=f'Cluster {i}')
    plt.title('2D K-means Clustering')
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.legend()
    plt.grid()
    plt.show()

# Example usage
if __name__ == "__main__":
    # Load data from a CSV file
    data = pd.read_csv('kmean_2d.csv').values  # Ensure data is a 2D array

    k = 3  # Number of clusters
    clusters = k_means_clustering_2d(data, k)

    # Output results
    for i, cluster in enumerate(clusters):
        print(f"Cluster {i}: {cluster}")

    # Plot the clusters
    plot_clusters(data, clusters)
