In [1]:
TAG: str = "baseline-kmeans"
DATASET: str = "EyeMovements"
REPETITIONS: int = 5

In [2]:
import sys
sys.path.append("../")

In [3]:
import comet_ml

In [4]:
import numpy as np
import tqdm
from torch.utils import data
from utils.load_dataset import load_dataset
from evaluation import evaluation
import os
from sklearn.cluster import KMeans

In [5]:
import pandas as pd

In [6]:
train_dataset, test_dataset = load_dataset(DATASET)

dataset = data.ConcatDataset([train_dataset, test_dataset])
class_num = len(train_dataset.tensors[1].unique())
X_shape = train_dataset.tensors[0].shape[1]

X = np.concatenate((dataset.datasets[0].tensors[0].numpy(),  dataset.datasets[1].tensors[0].numpy()), axis=0)
y = np.concatenate((dataset.datasets[0].tensors[1].numpy(),  dataset.datasets[1].tensors[1].numpy()), axis=0)


Using custom data configuration inria-soda--tabular-benchmark-18c6bb030d1e25c5
Found cached dataset csv (/home/MCB/wwydmanski/.cache/huggingface/datasets/inria-soda___csv/inria-soda--tabular-benchmark-18c6bb030d1e25c5/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)


  0%|          | 0/1 [00:00<?, ?it/s]

In [9]:
class_num, X.shape

(array([0, 1]), (7608, 20))

## Initialize network

## Implement Barlow Twins loss

## Prepare clustering evaluation

In [10]:
def cluster(model, X, y):
    y_pred = model.predict(X)
    nmi, ari, f, acc = evaluation.evaluate(y, y_pred, class_num)
    return acc

## Train the model

In [11]:
final_accs = []
for rep in range(REPETITIONS):
    experiment = comet_ml.Experiment(
        api_key="5AlQI5f2YzhHH2DLIYNOsuKzj",
        project_name="subtab_cluster",
        workspace="wwydmanski",
    )

    experiment.add_tag(TAG)

    experiment.log_code()

    model = KMeans(n_clusters=class_num, random_state=rep).fit(X)
    acc = cluster(model, X, y)
    final_accs.append(acc)
    experiment.log_metric("acc", acc)

COMET INFO: Experiment is live on comet.com https://www.comet.com/wwydmanski/subtab-cluster/42b50b6cb68f4e8e844ac0d58cfdfaa9

COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.com/wwydmanski/subtab-cluster/42b50b6cb68f4e8e844ac0d58cfdfaa9
COMET INFO:   Metrics:
COMET INFO:     acc : 0.5055205047318612
COMET INFO:   Parameters:
COMET INFO:     algorithm    : lloyd
COMET INFO:     copy_x       : True
COMET INFO:     init         : k-means++
COMET INFO:     max_iter     : 300
COMET INFO:     n_clusters   : 2
COMET INFO:     n_init       : warn
COMET INFO:     random_state : 0
COMET INFO:     tol          : 0.0001
COMET INFO:     verbose      : 0
COMET INFO:   Uploads:
COMET INFO:     environment details      : 1
COMET INFO:     filename                 : 1
COMET INFO:     git metadata             : 1
C

In [12]:
# Create `DATASET` folder if it doesn't exist
if not os.path.exists(f"results/{DATASET}"):
    os.makedirs(f"results/{DATASET}")

with open(f"results/{DATASET}/{TAG}.txt", "w") as f:
    f.write(str(round(np.mean(final_accs), 3)) + "~" + str(round(np.std(final_accs), 3)))