In [1]:
!pip install --upgrade pytorch-lightning

Collecting pytorch-lightning
  Using cached pytorch_lightning-2.1.3-py3-none-any.whl.metadata (21 kB)
Using cached pytorch_lightning-2.1.3-py3-none-any.whl (777 kB)
Installing collected packages: pytorch-lightning
  Attempting uninstall: pytorch-lightning
    Found existing installation: pytorch-lightning 1.9.0
    Uninstalling pytorch-lightning-1.9.0:
      Successfully uninstalled pytorch-lightning-1.9.0
Successfully installed pytorch-lightning-2.1.3


Perform K-means:
If you get an error about utils or multimarginal_OT change '../../' to '../' or vice versa


In [2]:

import warnings
import sys
warnings.filterwarnings('ignore')
sys.path.append('../')
import json
import numpy as np
from utils.kmeans_utils import perform_kmeans_clustering
from utils.clustering_utils import clusters

def load_json_data(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

def extract_pressure_matrices(json_data):
    pressure_matrices = []
    for entry in json_data['pressureData']:
        pressure_matrix = entry["pressureMatrix"]
        pressure_matrices.append({"pressureMatrix": pressure_matrix})
    return pressure_matrices

def extract_features_from_pressure_matrices(pressure_matrices):
    return [np.array(item["pressureMatrix"]).flatten() for item in pressure_matrices]

def combine_features(features_posture, features_continuous):
    return features_posture + features_continuous

def main():

    # Load continuous sitting data
    continuous_data = load_json_data('Posture_Data/Data/Mayara/Continuous Data/SensingMatData_240112_154605.json')
    continuous_1=load_json_data('Posture_Data/Data/Aaron/SensingMatData_231126_230808.json')
    #print(continuous_data)
    
    # Extract features from the data
    pressure_matrice_continuous = extract_pressure_matrices(continuous_data)
    pressure_matrice_domain1=extract_pressure_matrices(continuous_1)
    
    
    features_continuous = extract_features_from_pressure_matrices(pressure_matrice_continuous)
    features_1=extract_features_from_pressure_matrices(pressure_matrice_domain1)
    print(features_continuous)



    num_clusters = 7  


    # Perform k-means clustering for continuous data of the reference subject
    cluster_labels_continuous, _ = perform_kmeans_clustering(np.array(features_continuous).reshape(-1,1), num_clusters)
    cluster_labels_continuous1,_=perform_kmeans_clustering(np.array(features_1).reshape(-1,1), num_clusters)
   
    # Create cluster objects for each domain
    continuous_domain = clusters(features_continuous, cluster_labels_continuous, num_clusters)
    domain_1= clusters(features_1,cluster_labels_continuous1,num_clusters)
    
    # Cluster data for each domain
    continuous_domain.cluster_data()
    domain_1.cluster_data()
    
    # Save the results
    np.save('Posture_Data\Results\KMeans\Clusters.npy', cluster_labels_continuous)
    mapped_labels_continuous = domain_1.clusters_mapping(continuous_domain.cluster_tensors)
    np.save('Posture_Data\Results\KMeans\MappedLabels_Continuous.npy', mapped_labels_continuous)

if __name__ == "__main__":
    main()



ImportError: cannot import name 'LazyTensor' from 'ot.utils' (/usr/users/detectionpositionassise/ayat_may/.local/lib/python3.8/site-packages/ot/utils.py)

To initialize the atoms, we need pytorch 1.9 or to change the environment. We add the following cell to avoid errors.

In [3]:
!pip install pytorch-lightning==1.9



A powerful machine or a GPU is needed for the atoms initialization.


In [8]:
import torch
import numpy as np
import json
import warnings
import os
import sys
sys.path.append('../../')
from dictionary_learning.weighted_barycenters import compute_barycenters
warnings.filterwarnings('ignore')

def load_json_data(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

def extract_pressure_matrices(json_data):
    pressure_matrices = []
    for entry in json_data['pressureData']:
        pressure_matrix = entry["pressureMatrix"]
        pressure_matrices.append({"pressureMatrix": pressure_matrix})
    return pressure_matrices

def extract_features_from_pressure_matrices(pressure_matrices):
    flattened_data = [np.array(item["pressureMatrix"]).flatten() for item in pressure_matrices]
    return np.concatenate(flattened_data, axis=0)

def combine_features(features_posture, features_continuous):
    return [features_posture , features_continuous]

def main():

    # Load continuous sitting data
    continuous_data = load_json_data('Posture_Data/Data/Mayara/Continuous Data/SensingMatData_240112_154605.json')
    continuous_1=load_json_data('Posture_Data/Data/Aaron/SensingMatData_231126_230808.json')
    
    pressure_matrices_continuous = extract_pressure_matrices(continuous_data)
    pressure_1=extract_pressure_matrices(continuous_1)
    
    features_continuous = extract_features_from_pressure_matrices(pressure_matrices_continuous)
    features_1=extract_features_from_pressure_matrices(pressure_1)
    
    
    # Combine features if necessary
    Y1 = np.load('Posture_Data\Results\KMeans\Clusters_M_A.npy', allow_pickle=True)
    Y2 = np.load('Posture_Data\Results\KMeans\MappedLabels_Continuous_M_A.npy', allow_pickle=True)
    
    # Define hyperparameters
    n_classes = 7
    n_samples = 100
    batch_size = 64
    ϵ = 0.01
    η_A = 0.0
    lr = 1e-1
    num_iter_max = 20
    num_iter_dil = 100
    # Prepare data for the barycenter computation
    Ys = [torch.nn.functional.one_hot(torch.from_numpy(Y1).long(), num_classes=7).float(),
        torch.nn.functional.one_hot(torch.from_numpy(Y2).long(), num_classes=7).float()]
    
    l=[y.shape for y in Ys]
    features_continuous_tensor = torch.from_numpy(features_continuous).view(l[0][0],-1)
    features_1_tensor = torch.from_numpy(features_1).view(l[1][0],-1)


    Xs = [features_continuous_tensor, features_1_tensor]
    print("Before fit - Xs shapes:", [x.shape for x in Xs])
    print("Before fit - Xs shapes:", [y.shape for y in Ys])
    # Ensure n_samples is not larger than the size of the smallest dataset
    #n_samples = min(n_samples, min(len(Xs), len(Ys[0]), len(Ys[1])))

    # Compute the barycenters
    atoms = compute_barycenters(Xs, Ys, n_samples, batch_size, num_iter_dil,
                                n_classes, ϵ, η_A, lr, num_iter_max)
    
    # Getting initialized atoms
    XP = [xatom[0] for xatom in atoms]
    YP = [yatom[1] for yatom in atoms]
    
    print("Before converting types - XP and YP dtypes:", XP[0].dtype, YP[0].dtype)

   
    XP = [x.to(torch.float32) for x in XP]
    YP = [y.to(torch.float32) for y in YP]

    
    print("After converting types - XP and YP dtypes:", XP[0].dtype, YP[0].dtype)

    # Create the Results/Atoms directory if it doesn't exist
    results_directory = "Posture_Data/Results/Atoms"
    os.makedirs(results_directory, exist_ok=True)

    # Save atoms supports as NumPy files
    for i, x_value in enumerate(XP):
        np.save(os.path.join(results_directory, f'xatom_{i}.npy'), x_value)

    for i, y_value in enumerate(YP):
        np.save(os.path.join(results_directory, f'yatom_{i}.npy'), y_value)
if __name__ == "__main__":
    main()

Before fit - Xs shapes: [torch.Size([29952, 1]), torch.Size([248320, 1])]
Before fit - Xs shapes: [torch.Size([29952, 7]), torch.Size([248320, 7])]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name    | Type                 | Params
-------------------------------------------------
0 | loss_fn | JointWassersteinLoss | 0     
-------------------------------------------------
4         Trainable params
0         Non-trainable params
4         Total params
0.000     Total estimated model params size (MB)


Before fit - Xs shapes: [torch.Size([29952, 1]), torch.Size([248320, 1])]
Before fit - Ys shapes: [torch.Size([29952, 7]), torch.Size([248320, 7])]


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: No training batches.


After fit - Xs shapes: [torch.Size([29952, 1]), torch.Size([248320, 1])]
After fit - Ys shapes: [torch.Size([29952, 7]), torch.Size([248320, 7])]
---------------------------------------------------------------------------------------------------------
|        Iteration        |          Loss           |          δLoss          |      Elapsed Time       |
---------------------------------------------------------------------------------------------------------


RuntimeError: [enforce fail at alloc_cpu.cpp:80] data. DefaultCPUAllocator: not enough memory: you tried to allocate 59501445120 bytes.

In [None]:
import warnings
import sys
import os
sys.path.append('../../')
warnings.filterwarnings('ignore')

from utils.clustering_utils import clusters
from dictionary_learning.DaDiL_clustering import *
# Now you can use a simple import statement



def load_json_data(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

def extract_pressure_matrices(json_data):
    pressure_matrices = []
    for entry in json_data['pressureData']:
        pressure_matrix = entry["pressureMatrix"]
        pressure_matrices.append({"pressureMatrix": pressure_matrix})
    return pressure_matrices

def extract_features_from_pressure_matrices(pressure_matrices):
    return [np.array(item["pressureMatrix"]).flatten() for item in pressure_matrices]

def combine_features(features_posture, features_continuous):
    return features_posture + features_continuous

def main():
    # Assuming posture_data is a list of JSON file paths
    posture_data = [f'Posture_Data/Data/Subject1/Postures/Posture {i}.json' for i in range(1,8)]

    # Now iterate over each file path, load JSON data, and extract pressure matrices
    pressure_matrices_posture = []
    for file_path in posture_data:
        posture_entry = load_json_data(file_path)
        pressure_matrices_posture.extend(extract_pressure_matrices(posture_entry))

     # Load continuous sitting data
    continuous_data = load_json_data('Posture_Data/Data/Mayara/Continuous Data/SensingMatData_240112_154605.json')
    continuous_1=load_json_data('Posture_Data/Data/Aaron/SensingMatData_231126_230808.json')
    
    pressure_matrices_continuous = extract_pressure_matrices(continuous_data)
    pressure_1=extract_pressure_matrices(continuous_1)
    
    features_continuous = extract_features_from_pressure_matrices(pressure_matrices_continuous)
    features_1=extract_features_from_pressure_matrices(pressure_1)
    
    
    
    Y1 = np.load('Posture_Data\Results\KMeans\Clusters_M_A.npy', allow_pickle=True)
    Y2 = np.load('Posture_Data\Results\KMeans\MappedLabels_Continuous_M_A.npy', allow_pickle=True)
    
    
    # Prepare data for the barycenter computation
    Ys = [torch.nn.functional.one_hot(torch.from_numpy(Y1).long(), num_classes=7).float(),
        torch.nn.functional.one_hot(torch.from_numpy(Y2).long(), num_classes=7).float()]
    
    l=[y.shape for y in Ys]
    features_continuous_tensor = torch.from_numpy(features_continuous).view(l[0][0],-1)
    features_1_tensor = torch.from_numpy(features_1).view(l[1][0],-1)


    features = [features_continuous_tensor, features_1_tensor]
    Xs = features
    print("Before fit - Xs shapes:", [x.shape for x in Xs])
    print("Before fit - Xs shapes:", [y.shape for y in Ys])
    # Ensure n_samples is not larger than the size of the smallest dataset
    #n_samples = min(n_samples, min(len(Xs), len(Ys[0]), len(Ys[1])))


    # Load XP and YP NumPy files
    XP = []
    YP = []
    for i in range(len(features)):
        x_file_path = os.path.join(
            'Results/Atoms', f'xatom_{i}.npy')
        y_file_path = os.path.join(
            'Results/Atoms', f'yatom_{i}.npy')

        # Load XP
        loaded_x = np.load(x_file_path)
        XP.append(torch.tensor(loaded_x))

        # Load YP
        loaded_y = np.load(y_file_path)
        YP.append(torch.tensor(loaded_y))

    # Define hyperparameters
    n_classes = 7
    n_samples = 3000
    batch_size = 128
    n_components = 3
    n_datasets = 3
    reg = 0.0
    reg_labels = 0.0
    num_iter_max = 100


    # Perform the DaDiL clustering
    cluster_labels = dadil_clustering(
        Xs, Ys, XP, YP, n_samples, n_components, reg, reg_labels, batch_size, n_classes, num_iter_max)

    domain_1 = clusters(features[0], cluster_labels[0], n_classes)
    domain_2 = clusters(features[1], cluster_labels[1], n_classes)


    # Cluster data for each domain

    domain_1.cluster_data()
    domain_2.cluster_data()


    np.save('Results/DaDiL/MappedLabels_Posture.npy',
            cluster_labels[0])

    mapped_labels_domain_2 = domain_2.clusters_mapping(
        domain_1.cluster_tensors)
    np.save('Results/DaDiL/MappedLabels_Continuous.npy',
            mapped_labels_domain_2)

  


if __name__ == "__main__":
    main()
