Defining Class and Methods:

In [69]:
import numpy as np

# Matrix class with required methods
class matrix:
    def __init__(self, file_path=None):
        self.array_2d = None
        if file_path:
            self.load_from_csv(file_path)

    def load_from_csv(self, file_path):
        """Load data from a CSV file into the matrix."""
        self.array_2d = np.loadtxt(file_path, delimiter=',')

    def standardise(self):
        """Standardise the matrix data."""
        mean = np.mean(self.array_2d, axis=0)
        max_val = np.max(self.array_2d, axis=0)
        min_val = np.min(self.array_2d, axis=0)
        self.array_2d = (self.array_2d - mean) / (max_val - min_val)

    def get_distance(self, other_matrix, row_i):
        """Compute Euclidean distance from a specific row."""
        return np.sqrt(np.sum((self.array_2d[row_i] - other_matrix.array_2d) ** 2, axis=1))

    def get_weighted_distance(self, other_matrix, weights, row_i):
        """Compute Weighted Euclidean distance from a specific row."""
        print(weights)
        diff = self.array_2d[row_i] - other_matrix.array_2d
        return np.sqrt(np.sum(weights * (diff ** 2), axis=1))

    def get_count_frequency(self):
        """Return the frequency of each element in the matrix."""
        unique, counts = np.unique(self.array_2d, return_counts=True)
        return dict(zip(unique, counts))

  if np.all(S == new_S):


K=2, Frequency: {-0.538244306915821: 1, -0.5185836782968641: 1, -0.49145985101041223: 1, -0.48413384368440493: 2, -0.4804708400214012: 1, -0.4694818290323902: 3, -0.45850225877447015: 1, -0.4584928180433792: 1, -0.4534870205346765: 1, -0.4374602501589995: 1, -0.4365147960653573: 1, -0.42757442372292376: 1, -0.41859232563069754: 2, -0.41858367829686394: 1, -0.418199777750339: 1, -0.4172651453724082: 1, -0.4145367740873353: 1, -0.4121077101898489: 1, -0.4080573625073905: 1, -0.4054257835600217: 1, -0.40354776309832435: 2, -0.3948663308795041: 1, -0.38889574844630964: 1, -0.3881702749611763: 1, -0.3863373103208618: 1, -0.385232744783306: 3, -0.37910999408633755: 1, -0.37790673745729864: 2, -0.374243733794295: 1, -0.3725233048594623: 1, -0.37058073013129134: 1, -0.3693687307269702: 2, -0.36595209934949574: 1, -0.3657187536198309: 1, -0.36332052040212737: 1, -0.363254722805284: 2, -0.3619885520457919: 5, -0.36038357225881446: 1, -0.3595917191422804: 1, -0.3580573625073902: 1, -0.35642612509

Defining Functions:

In [None]:
# Additional functions needed for clustering
def get_initial_weights(m):
    """Generate initial random weights that sum to 1."""
    weights = np.random.rand(m)
    return weights / np.sum(weights)


def get_centroids(data, S, K):
    """Compute centroids for clusters."""
    centroids = np.zeros((K, data.array_2d.shape[1]))
    for k in range(K):
        rows_in_cluster = data.array_2d[S == k]
        if len(rows_in_cluster) > 0:
            centroids[k] = np.mean(rows_in_cluster, axis=0)
    return centroids


def get_separation_within(data, centroids, S, K):
    """Calculate separation within clusters."""
    a = np.zeros(data.array_2d.shape[1])
    for j in range(data.array_2d.shape[1]):
        for k in range(K):
            rows_in_cluster = data.array_2d[S == k]
            a[j] += np.sum(np.linalg.norm(rows_in_cluster[:, j] - centroids[k, j]))
    return a


def get_separation_between(data, centroids, S, K):
    """Calculate separation between clusters."""
    b = np.zeros(data.array_2d.shape[1])
    for j in range(data.array_2d.shape[1]):
        for k in range(K):
            count_k = np.sum(S == k)
            b[j] += count_k * np.linalg.norm(centroids[k, j] - np.mean(data.array_2d[:, j]))
    return b


def get_groups(data, K):
    """Assign groups based on the nearest centroids."""
    S = np.zeros(data.array_2d.shape[0], dtype=int)
    centroids = data.array_2d[np.random.choice(data.array_2d.shape[0], K, replace=False)]
    while True:
        new_S = np.array([np.argmin(np.linalg.norm(data.array_2d - c, axis=1)) for c in centroids])
        if np.all(S == new_S):
            break
        S = new_S
    return S


def get_new_weights(data, centroids, weights, S, K):
    """Update the weights vector."""
    a = get_separation_within(data, centroids, S, K)
    b = get_separation_between(data, centroids, S, K)
    new_weights = 0.5 * (weights + (b / a) / np.sum(b / a))
    return new_weights

Run Test for get count frequency with get groups:

In [None]:
# Test function with file path input
def run_test(file_path):
    """Run tests using a custom file path."""
    m = matrix(file_path)
    m.standardise()
    for k in range(2, 11):
        for i in range(20):
            S = get_groups(m, k)
            print(f'K={k}, Frequency: {m.get_count_frequency()}')


# Run the code by providing a file path interactively
file_path = r"C:\\Users\\rohit\\Desktop\\anubavam\\Data (2).csv"

# Call the test function
run_test(file_path)

Run Test for Orginal Matrix and standardized matrix:

In [70]:
m = matrix(file_path)
print("Original Matrix:")
print(m.array_2d)

m.standardise()
print("Standardized Matrix:")
print(m.array_2d)

Original Matrix:
[[1.423e+01 1.710e+00 2.430e+00 ... 1.040e+00 3.920e+00 1.065e+03]
 [1.320e+01 1.780e+00 2.140e+00 ... 1.050e+00 3.400e+00 1.050e+03]
 [1.316e+01 2.360e+00 2.670e+00 ... 1.030e+00 3.170e+00 1.185e+03]
 ...
 [1.327e+01 4.280e+00 2.260e+00 ... 5.900e-01 1.560e+00 8.350e+02]
 [1.317e+01 2.590e+00 2.370e+00 ... 6.000e-01 1.620e+00 8.400e+02]
 [1.413e+01 4.100e+00 2.740e+00 ... 6.100e-01 1.600e+00 5.600e+02]]
Standardized Matrix:
[[ 0.32352158 -0.12378425  0.03394821 ...  0.06711428  0.47923612
   0.22689497]
 [ 0.05246895 -0.10995026 -0.12113201 ...  0.07524436  0.28875993
   0.21619596]
 [ 0.04194264  0.00467425  0.16229045 ...  0.0589842   0.20451084
   0.31248698]
 ...
 [ 0.07089001  0.38412089 -0.05696088 ... -0.29873938 -0.38523274
   0.06284361]
 [ 0.04457422  0.05012879  0.00186264 ... -0.2906093  -0.36325472
   0.06640994]
 [ 0.2972058   0.34854776  0.19972361 ... -0.28247922 -0.37058073
  -0.13330475]]


Run Test for Get Distance using Euclidean Distance:

In [71]:
other_matrix = matrix()
other_matrix.array_2d = m.array_2d[:3]  # Subset for testing
row_index = 10
distance = m.get_distance(other_matrix, row_index)
print(f"Euclidean Distance from row {row_index}: {distance}")

Euclidean Distance from row 10: [0.55781967 0.70745246 0.48441441]


Run Test for Get Initial Weights:

In [92]:
num_features = m.array_2d.shape[1]
weights = get_initial_weights(num_features)
print(f"Initial Weights: {weights}, Sum: {np.sum(weights)}")

Initial Weights: [0.11367353 0.085919   0.03663466 0.08346334 0.12550699 0.06688809
 0.04894353 0.05177439 0.10482383 0.0335841  0.12239446 0.06093962
 0.06545445], Sum: 1.0


Run Test for Get Weighted Distance:

In [72]:
weighted_distance = m.get_weighted_distance(other_matrix, weights, row_index)
print(f"Weighted Euclidean Distance from row {row_index}: {weighted_distance}")

[0.01439821 0.11993677 0.02086905 0.04420468 0.10294545 0.0020436
 0.14802217 0.16518933 0.15250165 0.0552025  0.15618935 0.01198203
 0.00651521]
Weighted Euclidean Distance from row 10: [0.12821384 0.18556395 0.11869475]


Run Test for Get count Frequency:

In [73]:
frequency = m.get_count_frequency()
print(f"Frequency of each element: {frequency}")

Frequency of each element: {-0.538244306915821: 1, -0.5185836782968641: 1, -0.49145985101041223: 1, -0.48413384368440493: 2, -0.4804708400214012: 1, -0.4694818290323902: 3, -0.45850225877447015: 1, -0.4584928180433792: 1, -0.4534870205346765: 1, -0.4374602501589995: 1, -0.4365147960653573: 1, -0.42757442372292376: 1, -0.41859232563069754: 2, -0.41858367829686394: 1, -0.418199777750339: 1, -0.4172651453724082: 1, -0.4145367740873353: 1, -0.4121077101898489: 1, -0.4080573625073905: 1, -0.4054257835600217: 1, -0.40354776309832435: 2, -0.3948663308795041: 1, -0.38889574844630964: 1, -0.3881702749611763: 1, -0.3863373103208618: 1, -0.385232744783306: 3, -0.37910999408633755: 1, -0.37790673745729864: 2, -0.374243733794295: 1, -0.3725233048594623: 1, -0.37058073013129134: 1, -0.3693687307269702: 2, -0.36595209934949574: 1, -0.3657187536198309: 1, -0.36332052040212737: 1, -0.363254722805284: 2, -0.3619885520457919: 5, -0.36038357225881446: 1, -0.3595917191422804: 1, -0.3580573625073902: 1, -0.

Run Test for Get Groups:

In [93]:
K= 2
S = get_groups(m, K)
print(f"Cluster assignments: {S}")

Cluster assignments: [136 159]


  if np.all(S == new_S):


Run Test for Get Centroids:

In [89]:
K = 3  # Number of clusters
S = np.random.randint(0, K, size=m.array_2d.shape[0])  # Random cluster assignments
centroids = get_centroids(m, S, K)
print(f"Centroids for {K} clusters: {centroids}")

Centroids for 3 clusters: [[ 0.00254414 -0.03451265  0.04487258  0.01130628  0.01445495  0.04212874
   0.04729091  0.00108422  0.012515   -0.01932751  0.03628966  0.06170603
   0.04326953]
 [ 0.00990201  0.02415972 -0.03163018 -0.02361349  0.01177161  0.0046497
  -0.0066562  -0.02402099  0.02141106 -0.00125909 -0.01375864 -0.01526937
  -0.02292359]
 [-0.01455891  0.02036827 -0.02623844  0.01087312 -0.03299664 -0.06302049
  -0.05746981  0.02535884 -0.04110746  0.02793518 -0.03443203 -0.06762878
  -0.03376907]]


Run Test for Separation Within and Separation Between:

In [76]:
separation_within = get_separation_within(m, centroids, S, K)
print(f"Separation within clusters: {separation_within}")

separation_between = get_separation_between(m, centroids, S, K)
print(f"Separation between clusters: {separation_between}")

Separation within clusters: [4.88544432 5.00637855 3.30023688 3.91529724 3.56232188 4.93579352
 4.82393599 5.37036482 4.14864092 4.54428503 4.24705338 5.93555807
 5.11022743]
Separation between clusters: [3.60470136 3.07549851 3.31562819 3.76479787 2.17269174 3.53758233
 1.80412933 3.67246131 0.98210045 1.46239982 3.27638622 2.67209944
 5.07314591]


Run Test for Get New weights:

In [84]:
new_weights = get_new_weights(m, centroids, weights, S, K)
print(f"Updated Weights: {new_weights}")

Updated Weights: [0.05662783 0.07664442 0.08590753 0.13793147 0.05429476 0.09918596
 0.08286838 0.10949027 0.03373413 0.07636458 0.04579463 0.07615277
 0.06500326]
