In [1]:
# Course code: CSE381
# Course Name: Introduction to Machine Learning
# Instructor: Dr. Mahmoud Khalil
# TA: Eng. Engy Ahmed Hassan
# Lab Assignment 1

# Task:
# Implement the BSAS algorithm in Python. The output should be a JSON-formatted list where each element represents a cluster containing:
# - "Cluster": The cluster number.
# - "Members": A list of data points in the cluster.
# - "Mean": The mean value of the cluster.

import numpy as np
import json
#NOTE : IF YOU WANT TO TEST MY CODE WITH DIFFERENT DATA POINTS CHANGE THE DATA POINTS LIST OF POINTS 
# Input data points (in the order provided)
data_points = [
    (2, 3), (5, 4), (9, 2), (2, 5), (1, 4),
    (6, 4), (5, 3), (2, 2), (3, 3), (8, 2),
    (2, 4), (10, 2), (11, 2), (10, 3), (9, 1)
]

# Parameters
Theta = 2.5
q = 15

# Initialize variables
clusters = []  # To store clusters
representatives = []  # To store cluster representatives (centroids)

# BSAS Algorithm Implementation
for point in data_points:
    if not clusters:  # Start the first cluster with the first data point
        clusters.append([point])
        representatives.append(np.array(point))
    else:
        # Calculate distances between the point and existing cluster representatives
        distances = [np.linalg.norm(np.array(point) - rep) for rep in representatives]
        min_distance = min(distances)
        closest_cluster_idx = distances.index(min_distance)

        # Check if the point should form a new cluster or be added to an existing cluster
        if min_distance > Theta and len(clusters) < q:
            clusters.append([point])  # Create a new cluster
            representatives.append(np.array(point))
        else:
            # Add the point to the closest cluster
            clusters[closest_cluster_idx].append(point)
            # Update the representative (mean) of the cluster
            cluster_points = np.array(clusters[closest_cluster_idx])
            representatives[closest_cluster_idx] = cluster_points.mean(axis=0)

# Prepare the JSON-formatted output
output = []
for i, cluster in enumerate(clusters, start=1):
    cluster_points = np.array(cluster)
    cluster_mean = cluster_points.mean(axis=0).tolist()
    output.append({
        "Cluster": i,
        "Members": [[float(p[0]), float(p[1])] for p in cluster],
        "Mean": [round(cluster_mean[0], 2), round(cluster_mean[1], 2)]
    })

# Format the output exactly as required
formatted_output = "[\n"
for cluster in output:
    formatted_output += f"    {{\n"
    formatted_output += f"        \"Cluster\": {cluster['Cluster']},\n"
    formatted_output += f"        \"Members\": {json.dumps(cluster['Members'])},\n"
    formatted_output += f"        \"Mean\": {json.dumps(cluster['Mean'])}\n"
    formatted_output += f"    }},\n"
formatted_output = formatted_output.rstrip(",\n") + "\n]"  # Remove trailing comma and close the list

# Print the formatted output
print(formatted_output)


[
    {
        "Cluster": 1,
        "Members": [[2.0, 3.0], [2.0, 5.0], [1.0, 4.0], [2.0, 2.0], [3.0, 3.0], [2.0, 4.0]],
        "Mean": [2.0, 3.5]
    },
    {
        "Cluster": 2,
        "Members": [[5.0, 4.0], [6.0, 4.0], [5.0, 3.0]],
        "Mean": [5.33, 3.67]
    },
    {
        "Cluster": 3,
        "Members": [[9.0, 2.0], [8.0, 2.0], [10.0, 2.0], [11.0, 2.0], [10.0, 3.0], [9.0, 1.0]],
        "Mean": [9.5, 2.0]
    }
]
