In [1]:
import json
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [3]:
def normalize_data(data):
    scaler = StandardScaler()  # Use MinMaxScaler() if [0, 1] scaling is needed
    return scaler.fit_transform(data)

In [5]:
def load_json(file_path):
    # Open the JSON file and load its content
    with open(file_path, 'r') as f:
        data = json.load(f)['data']

    # Extract visual field matrices from all patients and both eyes
    X = [
        np.array(session['hvf']).flatten()  # Flatten each matrix to a 1D vector
        for patient_id in data
        for eye in ['R', 'L']  # Process both right and left eyes
        if eye in data[patient_id]  # Check if data for the eye exists
        for session in data[patient_id][eye]  # Extract sessions for the given eye
    ]

    # Convert the list of vectors to a NumPy array
    X = np.array(X)

    print(f"Loaded dataset with shape: {X.shape}")  # Log the shape of the dataset
    return X

In [7]:
def vector_quantization(data, n_clusters=10):
    # Initialize KMeans with a fixed random state for reproducibility
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans.fit(data)  # Train the KMeans model on input data

    # Predict cluster labels for each data point
    quantized_data = kmeans.predict(data)

    return quantized_data, kmeans.cluster_centers_

In [9]:
if __name__ == "__main__":
    # Load the visual field data from a JSON file
    data = load_json('alldata.json')

    # Normalize the data to ensure fair clustering
    normalized_data = normalize_data(data)

    # Perform vector quantization on the normalized data
    quantized_data, codebook = vector_quantization(normalized_data, n_clusters=10)

    # Display the results
    print("Discrete Data (Cluster Labels):", quantized_data)
    print("Codebook (Cluster Centers):", codebook)


Loaded dataset with shape: (28943, 72)
Discrete Data (Cluster Labels): [1 6 6 ... 8 1 6]
Codebook (Cluster Centers): [[ 0.00000000e+00  0.00000000e+00  0.00000000e+00 -8.76879362e-01
  -9.37866991e-01 -1.28523376e+00 -1.46841779e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00 -6.78131690e-01
  -5.64198753e-01 -7.14950820e-01 -1.18730253e+00 -1.42159622e+00
  -1.57793628e+00  0.00000000e+00  0.00000000e+00 -6.38784974e-01
  -4.31068266e-01 -3.98096872e-01 -4.74531677e-01 -1.02523127e+00
  -1.21360942e+00 -1.58176109e+00 -1.90314755e+00 -6.69387528e-01
  -5.23712517e-01 -3.73452183e-01 -3.06530675e-01 -5.05406503e-01
  -1.23576226e+00 -1.33758229e+00 -1.18745749e+00 -2.08132974e+00
  -7.05867033e-01 -5.25695850e-01 -3.24917299e-01 -3.44677284e-01
  -6.32810412e-01 -1.39871554e+00 -1.59065790e+00 -2.23329632e-01
  -2.04627235e+00  0.00000000e+00 -6.40196090e-01 -3.45667237e-01
  -3.49658128e-01 -4.50377441e-01 -1.05556048e+00 -1.20955644e+00
  -1.58264677e+00 -1.8837