In [1]:
import os
import time
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import f1_score, recall_score
from sklearn.cluster import KMeans, MiniBatchKMeans
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import scipy.io as sio
from tqdm import tqdm
import copy
import math
import random

# reproducibility
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
random.seed(SEED)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# -------------------- Load Data --------------------
data_path = r"C:\Users\NNadi\Downloads\DKM-HybridSN-GRSL\Dataset\Hyperspectral oil spill detection datasets"
data = sio.loadmat(os.path.join(data_path, 'GM18.mat'))['img']
labels = sio.loadmat(os.path.join(data_path, 'GM18.mat'))['map']

# -------------------- Helper Functions --------------------
def splitTrainTestSet(X, y, testRatio, randomState=345):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=testRatio, random_state=randomState, stratify=y
    )
    return X_train, X_test, y_train, y_test

def applyPCA(X, numComponents=75):
    newX = X.reshape(-1, X.shape[2])
    pca = PCA(n_components=numComponents, whiten=True)
    newX = pca.fit_transform(newX)
    newX = newX.reshape(X.shape[0], X.shape[1], numComponents)
    return newX, pca

def padWithZeros(X, margin=2):
    newX = np.zeros((X.shape[0]+2*margin, X.shape[1]+2*margin, X.shape[2]))
    newX[margin:X.shape[0]+margin, margin:X.shape[1]+margin, :] = X
    return newX

def createImageCubes(X, y, windowSize=5, removeZeroLabels=True):
    margin = (windowSize - 1) // 2
    paddedX = padWithZeros(X, margin=margin)
    patchesData = np.zeros((X.shape[0]*X.shape[1], windowSize, windowSize, X.shape[2]))
    patchesLabels = np.zeros((X.shape[0]*X.shape[1]))
    idx = 0
    for r in range(margin, paddedX.shape[0]-margin):
        for c in range(margin, paddedX.shape[1]-margin):
            patch = paddedX[r-margin:r+margin+1, c-margin:c+margin+1]
            patchesData[idx] = patch
            patchesLabels[idx] = y[r-margin, c-margin]
            idx += 1
    if removeZeroLabels:
        mask = patchesLabels > 0
        patchesData = patchesData[mask]
        patchesLabels = patchesLabels[mask] - 1
    return patchesData, patchesLabels

# Dataset / preprocessing parameters (kept same as your code)
windowSize = 3
test_ratio = 0.25
dataset = 'IP'
K = 30 if dataset == 'IP' else 15

X, pca = applyPCA(data, numComponents=K)
X, y = createImageCubes(X, labels, windowSize=windowSize)

# Split and convert to torch tensors
X_train_np, X_test_np, y_train_np, y_test_np = splitTrainTestSet(X, y, test_ratio)
X_train = torch.tensor(X_train_np, dtype=torch.float32, device=DEVICE)
y_train = torch.tensor(y_train_np, dtype=torch.long, device=DEVICE)
X_test  = torch.tensor(X_test_np, dtype=torch.float32, device=DEVICE)
y_test  = torch.tensor(y_test_np, dtype=torch.long, device=DEVICE)

S = X_train.shape[1]
L = X_train.shape[3]
n_classes = len(np.unique(y_train_np))

print("X_train shape:", X_train.shape)
print("X_test  shape:", X_test.shape)

X_train shape: torch.Size([55442, 3, 3, 30])
X_test  shape: torch.Size([18481, 3, 3, 30])


In [2]:

class SpectralSpatialTransformer(nn.Module):
    """
    Spectral-Spatial Transformer head replacing the conv2d + flatten step.
    Keeps the 3D conv stem similar to HybridSN and then applies a Transformer
    over spatial tokens where each token is a (channel * depth) feature vector.
    """
    def __init__(self, input_shape, n_classes,
                 embed_dim=256, n_heads=8, n_layers=4, ff_dim=512, attn_dropout=0.0, drop=0.4):
        super(SpectralSpatialTransformer, self).__init__()
        # input_shape expected as (S, _, L) or (S, S, L) - we only need S and L
        S, _, L = input_shape

        # 3D conv stem (keeps same kernel choices as your HybridSN)
        self.conv3d_1 = nn.Conv3d(1, 8, kernel_size=(3,3,7))
        self.conv3d_2 = nn.Conv3d(8,16, kernel_size=(1,1,5))
        self.conv3d_3 = nn.Conv3d(16,32, kernel_size=(1,1,3))

        # Transformer projection + encoder
        self.embed_dim = embed_dim
        # token projection (project per-token feature vector to embed_dim)
        self.token_proj = None  # will be created after dummy forward
        self.pos_emb = None
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_dim,
            nhead=n_heads,
            dim_feedforward=ff_dim,
            dropout=attn_dropout,
            batch_first=True  # so inputs are (B, seq_len, embed_dim)
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)

        # FC head (kept same sizes as your original code)
        self.fc1 = None
        self.dropout1 = nn.Dropout(drop)
        self.fc2 = None
        self.dropout2 = nn.Dropout(drop)
        self.fc3 = None

        # Build modules that depend on S/L by running a dummy forward pass
        with torch.no_grad():
            # create dummy input in same layout as forward: (B,1,H,W,L)
            x_dummy = torch.zeros(1,1,S,S,L)
            x_dummy = F.relu(self.conv3d_1(x_dummy))
            x_dummy = F.relu(self.conv3d_2(x_dummy))
            x_dummy = F.relu(self.conv3d_3(x_dummy))
            B, C, H, W, D = x_dummy.shape  # H,W are spatial dims after convs
            token_dim = C * D              # per-spatial-location vector size

            # token projection: project token_dim -> embed_dim
            self.token_proj = nn.Linear(token_dim, embed_dim)

            # positional embedding for H*W tokens (learnable)
            seq_len = H * W
            self.pos_emb = nn.Parameter(torch.zeros(1, seq_len, embed_dim))

            # transformer's output is pooled -> pass to FCs
            # we'll pool to get a fixed vector of size embed_dim
            self.fc1 = nn.Linear(embed_dim, 256)
            self.fc2 = nn.Linear(256, 128)
            self.fc3 = nn.Linear(128, n_classes)

            # initialize pos_emb small
            nn.init.trunc_normal_(self.pos_emb, std=0.02)
            # init token_proj
            nn.init.xavier_uniform_(self.token_proj.weight)
            if self.token_proj.bias is not None:
                nn.init.zeros_(self.token_proj.bias)

    def forward(self, x):
        # x : (B, H, W, L) as in your dataset tensors
        x = x.unsqueeze(1)  # -> (B,1,H,W,L)
        x = F.relu(self.conv3d_1(x))
        x = F.relu(self.conv3d_2(x))
        x = F.relu(self.conv3d_3(x))
        B, C, H, W, D = x.shape

        # reshape to tokens: (B, H*W, C*D)
        x = x.view(B, C*D, H, W)          # (B, C*D, H, W)
        x = x.permute(0, 2, 3, 1).contiguous()  # (B, H, W, C*D)
        x = x.view(B, H*W, C*D)           # (B, seq_len, token_dim)

        # project tokens to embedding dim
        x = self.token_proj(x)            # (B, seq_len, embed_dim)

        # add positional embedding (broadcast batch)
        # self.pos_emb shape: (1, seq_len, embed_dim)
        x = x + self.pos_emb

        # transformer encoding (batch_first=True)
        x = self.transformer(x)           # (B, seq_len, embed_dim)

        # global pooling (mean over tokens)
        x = x.mean(dim=1)                 # (B, embed_dim)

        # fc head (similar to HybridSN)
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)                   # (B, n_classes)
        return x


# -------------------- Training --------------------
def train_model(model, X_train, y_train, epochs=1, lr=0.001, batch_size=None):
    # Note: original code did full-batch; keep same behavior for fairness unless batch_size is given
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    t0 = time.time()
    
    if batch_size is None:
        # full-batch (original)
        for epoch in tqdm(range(epochs), desc="Training", ncols=100, unit="epoch"):
            optimizer.zero_grad()
            outputs = model(X_train)
            loss = criterion(outputs, y_train)
            loss.backward()
            optimizer.step()
            tqdm.write(f"Epoch [{epoch+1}/{epochs}] - Loss: {loss.item():.4f}")
    else:
        # mini-batch training
        dataset = torch.utils.data.TensorDataset(X_train, y_train)
        loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
        for epoch in tqdm(range(epochs), desc="Training", ncols=100, unit="epoch"):
            epoch_loss = 0.0
            for xb, yb in loader:
                optimizer.zero_grad()
                outputs = model(xb)
                loss = criterion(outputs, yb)
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()
            tqdm.write(f"Epoch [{epoch+1}/{epochs}] - Loss: {epoch_loss/len(loader):.4f}")
    
    total_time = time.time() - t0
    return model, total_time

# -------------------- Evaluation --------------------
def evaluate_model(model, X_test, y_test):
    model.eval()
    t1 = time.time()
    with torch.no_grad():
        outputs = model(X_test)
        preds = outputs.argmax(dim=1).cpu().numpy()
    y_true = y_test.cpu().numpy()
    acc = np.mean(preds==y_true)
    f1 = f1_score(y_true, preds, average='macro')
    recall = recall_score(y_true, preds, average='macro')
    inf_time = time.time() - t1
    return inf_time, acc, f1, recall

# -------------------- DKM (existing) --------------------
def dkm_cluster_weights(model, k=8, iterations=10, layer_mask=None):
    """
    layer_mask: function name->bool to indicate whether to cluster that parameter. If None cluster all.
    Returns: compressed_params_effective (estimate in terms of k * vector_dim per clustered weight)
    """
    compressed_params = 0
    for name, param in model.named_parameters():
        if "weight" in name:
            if (layer_mask is not None) and (not layer_mask(name)):
                # skip clustering for this weight
                compressed_params += param.numel()
                continue
            W = param.data
            W_flat = W.view(W.shape[0], -1).clone()  # (out_channels, vector_dim)
            # initialize centroids by random selection
            idx = torch.randperm(W_flat.size(0))[:k]
            C = W_flat[idx].clone()
            for _ in range(iterations):
                dist = torch.cdist(W_flat, C)  # (N, k)
                A = torch.softmax(-dist, dim=1)  # soft assignments across k
                C = torch.mm(A.t(), W_flat) / (A.sum(0).unsqueeze(1)+1e-6)
            clustered_W = torch.mm(A, C)
            param.data = clustered_W.view(param.shape)
            compressed_params += k * W_flat.shape[1]  # store effective param count for cluster codebook
        elif "bias" in name:
            compressed_params += param.numel()
    return compressed_params

# -------------------- KMeans / MiniBatchKMeans clustering adapted to weights --------------------
def kmeans_cluster_weights(model, k=8, iterations=20, layer_mask=None, minibatch=False):
    compressed_params = 0
    for name, param in model.named_parameters():
        if "weight" in name:
            if (layer_mask is not None) and (not layer_mask(name)):
                compressed_params += param.numel()
                continue
            W = param.data
            W_flat = W.view(W.shape[0], -1).cpu().numpy()  # shape (N, D)
            if W_flat.shape[0] <= k:
                # cannot cluster fewer vectors than k
                compressed_params += W_flat.size
                continue
            if minibatch:
                km = MiniBatchKMeans(n_clusters=k, random_state=SEED, max_iter=iterations)
            else:
                km = KMeans(n_clusters=k, random_state=SEED, n_init=3, max_iter=iterations)
            labels = km.fit_predict(W_flat)
            centroids = km.cluster_centers_  # (k, D)
            # assign back using centroid of assigned cluster (hard assignment)
            clustered = centroids[labels]
            clustered_t = torch.tensor(clustered, dtype=param.dtype, device=param.device)
            param.data = clustered_t.view(param.shape)
            compressed_params += k * W_flat.shape[1]
        elif "bias" in name:
            compressed_params += param.numel()
    return compressed_params

# -------------------- EFDPC (density peak clustering variant) for weights --------------------
def efdpc_cluster_weights(model, k=8, layer_mask=None):
    """
    Adaptation of EFDPC (density-peak based) for clustering weight-vectors.
    We compute pairwise distances between weight vectors in a layer,
    compute density (rho) using exponential kernel, compute delta (distance to nearest higher-rho),
    compute gamma = rho * delta^2, pick top-k as centroids, and soft-assign using softmax(-dist).
    Returns compressed params estimate k * vector_dim per clustered weight.
    """
    compressed_params = 0
    for name, param in model.named_parameters():
        if "weight" in name:
            if (layer_mask is not None) and (not layer_mask(name)):
                compressed_params += param.numel()
                continue
            W = param.data
            W_flat = W.view(W.shape[0], -1).cpu().numpy()  # (N, D)
            N = W_flat.shape[0]
            if N <= k:
                compressed_params += W_flat.size
                continue
            # compute pairwise distance matrix (NxN)
            # to save memory, compute condensed distances
            from scipy.spatial.distance import pdist, squareform
            dist_vec = pdist(W_flat, metric='euclidean')
            Dist_matrix = squareform(dist_vec)
            # choose dc using a small percentile, similar to MATLAB percent=2
            percent = 2.0
            tril_vals = Dist_matrix[np.tril_indices(N, k=-1)]
            sda = np.sort(tril_vals)
            if len(sda) == 0:
                dc = 1.0
            else:
                pos = max(0, int(len(sda) * (percent/100.0) ) - 1)
                pos = min(len(sda)-1, pos)
                dini = sda[pos]
                # add small epsilon to avoid zero
                dc = max(dini / math.exp(10.0/N), 1e-6)

            # compute rho
            rho = np.zeros(N)
            for i in range(N):
                # exponential kernel
                rho[i] = np.sum(np.exp(- (Dist_matrix[i,:] / (dc+1e-12))**2 )) - 1.0  # exclude self
            # compute delta
            ordrho = np.argsort(-rho)  # descending by rho
            delta = np.zeros(N)
            maxD = np.max(Dist_matrix)
            delta[ordrho[0]] = maxD
            for idx in range(1, N):
                i = ordrho[idx]
                # distance to nearest higher density point
                higher = ordrho[:idx]
                delta[i] = np.min(Dist_matrix[i, higher])
            # normalize rho and delta
            if rho.max() - rho.min() > 0:
                rho_n = (rho - rho.min()) / (rho.max() - rho.min())
            else:
                rho_n = rho
            if delta.max() - delta.min() > 0:
                delta_n = (delta - delta.min()) / (delta.max() - delta.min())
            else:
                delta_n = delta
            gamma = rho_n * (delta_n**2)
            # pick top-k gamma indices as centroids
            topk_idx = np.argsort(-gamma)[:k]
            C = W_flat[topk_idx]  # (k, D)
            # soft assignment via distances
            from scipy.spatial.distance import cdist
            dist = cdist(W_flat, C)  # (N, k)
            A = np.exp(-dist)  # soft weights
            A = A / (A.sum(axis=1, keepdims=True) + 1e-12)
            clustered = A.dot(C)
            clustered_t = torch.tensor(clustered, dtype=param.dtype, device=param.device)
            param.data = clustered_t.view(param.shape)
            compressed_params += k * W_flat.shape[1]
        elif "bias" in name:
            compressed_params += param.numel()
    return compressed_params

# -------------------- Layer selection helper --------------------
def make_layer_mask_exclude_first_last(model):
    """
    Returns a function that given parameter name returns True if that parameter should be clustered.
    This implementation finds the ordered list of 'weight' parameter names and excludes the first and last.
    """
    weight_names = [name for name, _ in model.named_parameters() if "weight" in name]
    exclude = set()
    if len(weight_names) >= 1:
        exclude.add(weight_names[0])
    if len(weight_names) >= 2:
        exclude.add(weight_names[-1])
    def mask(name):
        if "weight" not in name:
            return False  # don't cluster biases in layer-selection sense (we keep biases unclustered count-wise above)
        return name not in exclude
    return mask


In [3]:

def make_layer_mask_all(model):
    def mask(name):
        return "weight" in name
    return mask

# -------------------- Utility: count total params --------------------
def count_params(model):
    return sum(p.numel() for p in model.parameters())

# -------------------- Utility: get compressed params from clustering run (already returned) --------------------

# -------------------- Utility: estimate inference time proportionally --------------------
def estimate_inf_time(num_params, baseline_params, baseline_inf_time):
    # avoid zero division
    if baseline_params == 0:
        return baseline_inf_time
    return baseline_inf_time * (num_params / baseline_params)

# -------------------- Run Baseline (kept exactly like your original configuration 1) --------------------
model = SpectralSpatialTransformer((S,S,L), n_classes).to(DEVICE)
print("\n-- Training baseline model (this preserves your original configuration 1 behavior) --")
model, train_time = train_model(model, X_train, y_train, epochs=5)  # identical to your original
inf_time_measured, acc, f1, rec = evaluate_model(model, X_test, y_test)

baseline_params = count_params(model)
baseline_inf_time_measured = inf_time_measured
baseline_results = {
    "train_time": train_time,
    "measured_inf_time": baseline_inf_time_measured,
    "params": baseline_params,
    "acc": acc,
    "f1": f1,
    "recall": rec
}

print("\n===== Baseline HybridSN =====")
print(f"Training Time : {train_time:.3f} sec")
print(f"Measured Inference Time: {inf_time_measured:.6f} sec")
print(f"Parameters    : {baseline_params}")
print(f"Accuracy      : {acc:.4f}")
print(f"F1 Score      : {f1:.4f}")
print(f"Recall        : {rec:.4f}")

# -------------------- Full experimental grid --------------------
# Training stages: 'posthoc' (full baseline train -> cluster -> finetune),
#                  'partial' (warm-up epochs -> cluster -> finetune)
training_stages = ['posthoc', 'partial']
layer_selections = ['all', 'exclude_first_last']
clustering_methods = ['KMeans', 'MiniBatchKMeans', 'EFDPC']  # 4 clustering options (DKM is original)

# hyperparameters for experiments
k_values = [4, 8, 16, 32]  # cluster counts to test
warmup_epochs = 2  # partial warm-up epochs (you can change)
fine_tune_epochs = 5  # fine-tune after clustering
lr_finetune = 0.001

# We'll store results
exp_results = []

# For fairness, reuse baseline trained model weights as starting point (posthoc) or for partial warmup start from scratch and warmup
for stage in training_stages:
    for layer_sel in layer_selections:
        for method in clustering_methods:
            for k in k_values:
                # Prepare a fresh model for this run
                model_k = SpectralSpatialTransformer((S,S,L), n_classes).to(DEVICE)
                # Copy baseline weights for posthoc scenario
                if stage == 'posthoc':
                    model_k.load_state_dict(model.state_dict())
                    warmup_time = 0.0
                elif stage == 'partial':
                    # For partial warmup we start from new random init and train warmup_epochs
                    model_k, warmup_time = train_model(model_k, X_train, y_train, epochs=warmup_epochs, lr=0.001)
                else:
                    raise ValueError("Unknown training stage")

                # Choose layer mask
                if layer_sel == 'all':
                    layer_mask = make_layer_mask_all(model_k)
                else:
                    layer_mask = make_layer_mask_exclude_first_last(model_k)

                # Apply clustering based on method
                if method == 'DKM':
                    compressed_params = dkm_cluster_weights(model_k, k=k, iterations=10, layer_mask=layer_mask)
                elif method == 'KMeans':
                    compressed_params = kmeans_cluster_weights(model_k, k=k, iterations=20, layer_mask=layer_mask, minibatch=False)
                elif method == 'MiniBatchKMeans':
                    compressed_params = kmeans_cluster_weights(model_k, k=k, iterations=50, layer_mask=layer_mask, minibatch=True)
                elif method == 'EFDPC':
                    compressed_params = efdpc_cluster_weights(model_k, k=k, layer_mask=layer_mask)
                else:
                    raise ValueError("Unknown clustering method")

                # Fine-tune the clustered model
                model_k, ft_time = train_model(model_k, X_train, y_train, epochs=fine_tune_epochs, lr=lr_finetune)

                # Evaluate after clustering + fine-tuning
                inf_time_measured_k, acc_k, f1_k, rec_k = evaluate_model(model_k, X_test, y_test)
                total_params_k = count_params(model_k)

                # Compute estimated inference time based only on parameter count relative to baseline
                estimated_inf_time_k = estimate_inf_time(total_params_k, baseline_params, baseline_inf_time_measured)

                # Save & print results
                result = {
                    "stage": stage,
                    "layer_selection": layer_sel,
                    "method": method,
                    "k": k,
                    "warmup_time": warmup_time if stage == 'partial' else 0.0,
                    "cluster_time": None,
                    "fine_tune_time": ft_time,
                    "measured_inf_time": inf_time_measured_k,
                    "estimated_inf_time": estimated_inf_time_k,
                    "params": total_params_k,
                    "compressed_params_effective": compressed_params,
                    "acc": acc_k,
                    "f1": f1_k,
                    "recall": rec_k
                }
                exp_results.append(result)

                # Print concise summary for this config
                print("\n----- Experiment Result -----")
                print(f"Stage           : {stage}")
                print(f"Layer selection : {layer_sel}")
                print(f"Clustering      : {method}")
                print(f"k (clusters)    : {k}")
                if stage == 'partial':
                    print(f"Warmup Time     : {warmup_time:.3f} sec")
                print(f"Fine-tune Time  : {ft_time:.3f} sec")
                print(f"Measured Inference Time: {inf_time_measured_k:.6f} sec")
                print(f"Estimated Inference Time (proportional to params): {estimated_inf_time_k:.6f} sec")
                print(f"Parameters (model): {total_params_k}")
                print(f"Compressed Params (effective/codebook): {compressed_params}")
                print(f"Accuracy         : {acc_k:.4f}")
                print(f"F1 Score         : {f1_k:.4f}")
                print(f"Recall           : {rec_k:.4f}")

# Optionally, print a short table summary
print("\n\n===== Summary of all experiments (first 10 shown) =====")
for r in exp_results[:10]:
    print(f"{r['stage']}/{r['layer_selection']}/{r['method']}/k={r['k']} -> params: {r['params']}, est_inf_time: {r['estimated_inf_time']:.6f}, acc: {r['acc']:.4f}, f1: {r['f1']:.4f}")

# Save results to a numpy file for downstream analysis if desired
import json
out_file = r"C:\Users\NNadi\Downloads\DKM-HybridSN-GRSL\res\oill_spill_GM18_SST.json"
with open(out_file, "w") as f:
    json.dump(exp_results, f, indent=2)
print(f"\nSaved experiment results to {out_file}")



-- Training baseline model (this preserves your original configuration 1 behavior) --


Training:  20%|██████████▍                                         | 1/5 [00:16<01:04, 16.04s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:28<00:41, 13.68s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:43<00:29, 14.59s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:05<00:17, 17.51s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:22<00:00, 16.57s/epoch]


Epoch [5/5] - Loss: 0.0000

===== Baseline HybridSN =====
Training Time : 82.838 sec
Measured Inference Time: 2.012929 sec
Parameters    : 2357937
Accuracy      : 1.0000
F1 Score      : 1.0000
Recall        : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:19<01:17, 19.33s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:49<01:17, 25.93s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:16<00:52, 26.16s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:48<00:28, 28.52s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:23<00:00, 28.78s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : KMeans
k (clusters)    : 4
Fine-tune Time  : 143.897 sec
Measured Inference Time: 4.288664 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35509
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:25<01:41, 25.44s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:53<01:20, 26.77s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:19<00:53, 26.54s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:46<00:26, 26.93s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:17<00:00, 27.54s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : KMeans
k (clusters)    : 8
Fine-tune Time  : 137.693 sec
Measured Inference Time: 3.714200 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:30<02:00, 30.24s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [01:01<01:33, 31.08s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:30<01:00, 30.04s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [02:04<00:31, 31.57s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:36<00:00, 31.35s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : KMeans
k (clusters)    : 16
Fine-tune Time  : 156.777 sec
Measured Inference Time: 3.087648 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:31<02:05, 31.37s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [01:00<01:29, 29.83s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:21<00:52, 26.19s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:48<00:26, 26.47s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:15<00:00, 27.15s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : KMeans
k (clusters)    : 32
Fine-tune Time  : 135.784 sec
Measured Inference Time: 2.419172 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

  super()._check_params_vs_input(X, default_n_init=3)
Training:  20%|██████████▍                                         | 1/5 [00:23<01:32, 23.04s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:55<01:25, 28.62s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:21<00:54, 27.34s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:44<00:25, 25.58s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:03<00:00, 24.79s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 4
Fine-tune Time  : 123.937 sec
Measured Inference Time: 2.836590 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35509
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Training:  20%|██████████▍                                         | 1/5 [00:25<01:42, 25.66s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:43<01:03, 21.19s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:05<00:43, 21.57s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:32<00:23, 23.43s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:06<00:00, 25.34s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 8
Fine-tune Time  : 126.738 sec
Measured Inference Time: 2.974828 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:54<01:22, 27.53s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:27<00:59, 29.76s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:50<00:27, 27.18s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:10<00:00, 26.03s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 16
Fine-tune Time  : 130.153 sec
Measured Inference Time: 2.777523 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:49<01:14, 24.70s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:14<00:49, 24.83s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:38<00:24, 24.80s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:05<00:00, 25.05s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 32
Fine-tune Time  : 125.256 sec
Measured Inference Time: 2.671755 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:26<01:46, 26.50s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:51<01:17, 25.69s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:19<00:53, 26.56s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:46<00:26, 26.96s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:15<00:00, 27.04s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 4
Fine-tune Time  : 135.199 sec
Measured Inference Time: 3.876447 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35509
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:26<01:46, 26.53s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:54<01:21, 27.32s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:17<00:50, 25.32s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:45<00:26, 26.53s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:10<00:00, 26.19s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 8
Fine-tune Time  : 130.942 sec
Measured Inference Time: 2.622089 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:21<01:26, 21.61s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:46<01:11, 23.75s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:12<00:48, 24.49s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:33<00:23, 23.08s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:58<00:00, 23.74s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 16
Fine-tune Time  : 118.705 sec
Measured Inference Time: 2.190477 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:25<01:42, 25.55s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:45<01:06, 22.32s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:08<00:45, 22.51s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:33<00:23, 23.66s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:59<00:00, 23.83s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 32
Fine-tune Time  : 119.145 sec
Measured Inference Time: 2.512018 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:17<01:10, 17.64s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:42<01:05, 21.85s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:08<00:47, 23.74s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:31<00:23, 23.47s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:47<00:00, 21.57s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 4
Fine-tune Time  : 107.838 sec
Measured Inference Time: 1.730061 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35761
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:24<01:38, 24.70s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:49<01:14, 24.78s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:09<00:45, 22.76s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:27<00:20, 20.86s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:52<00:00, 22.41s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 8
Fine-tune Time  : 112.038 sec
Measured Inference Time: 2.587043 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:26<01:46, 26.62s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:42<01:01, 20.47s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:07<00:45, 22.62s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:40<00:26, 26.59s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:06<00:00, 25.25s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 16
Fine-tune Time  : 126.239 sec
Measured Inference Time: 2.442310 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:24<01:36, 24.09s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:48<01:12, 24.25s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:10<00:46, 23.36s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:35<00:23, 23.84s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:59<00:00, 23.98s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 32
Fine-tune Time  : 119.903 sec
Measured Inference Time: 2.381916 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Training:  20%|██████████▍                                         | 1/5 [00:23<01:34, 23.66s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:47<01:11, 23.95s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:13<00:49, 24.59s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:35<00:23, 23.79s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:00<00:00, 24.05s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 4
Fine-tune Time  : 120.235 sec
Measured Inference Time: 2.427937 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35761
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Training:  20%|██████████▍                                         | 1/5 [00:14<00:56, 14.04s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:28<00:42, 14.08s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:46<00:32, 16.24s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:02<00:15, 15.84s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:17<00:00, 15.52s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 8
Fine-tune Time  : 77.633 sec
Measured Inference Time: 1.734296 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:30<00:46, 15.44s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:51<00:35, 17.77s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:06<00:16, 16.62s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:21<00:00, 16.28s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 16
Fine-tune Time  : 81.394 sec
Measured Inference Time: 1.364493 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:30<00:45, 15.19s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:45<00:30, 15.24s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:00<00:15, 15.29s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:16<00:00, 15.25s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 32
Fine-tune Time  : 76.253 sec
Measured Inference Time: 1.374760 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:14<00:57, 14.42s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:25<00:37, 12.43s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:36<00:23, 11.73s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:47<00:11, 11.67s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:59<00:00, 11.89s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 4
Fine-tune Time  : 59.476 sec
Measured Inference Time: 1.098281 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35761
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:11<00:44, 11.11s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:21<00:32, 10.68s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:34<00:23, 11.74s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:49<00:13, 13.09s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:01<00:00, 12.39s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 8
Fine-tune Time  : 61.977 sec
Measured Inference Time: 1.039564 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:11<00:44, 11.19s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:21<00:32, 10.87s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:33<00:22, 11.07s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:44<00:11, 11.07s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:55<00:00, 11.04s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 16
Fine-tune Time  : 55.220 sec
Measured Inference Time: 1.063979 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:10<00:42, 10.74s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:21<00:32, 10.93s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:32<00:21, 10.89s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:44<00:11, 11.31s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:58<00:00, 11.69s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 32
Fine-tune Time  : 58.454 sec
Measured Inference Time: 1.248219 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:14<00:14, 14.12s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:39<00:00, 19.94s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:21<01:26, 21.73s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:41<01:01, 20.48s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:02<00:41, 20.96s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:22<00:20, 20.29s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:42<00:00, 20.55s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : KMeans
k (clusters)    : 4
Warmup Time     : 39.880 sec
Fine-tune Time  : 102.760 sec
Measured Inference Time: 1.864999 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35509
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:17<00:17, 17.06s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:33<00:00, 16.64s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:16<01:07, 16.84s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:32<00:47, 15.86s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:46<00:30, 15.34s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:01<00:15, 15.18s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:17<00:00, 15.42s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : KMeans
k (clusters)    : 8
Warmup Time     : 33.296 sec
Fine-tune Time  : 77.110 sec
Measured Inference Time: 1.650622 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:22<00:22, 22.21s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:47<00:00, 23.62s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:26<01:46, 26.67s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:52<01:18, 26.15s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:18<00:51, 25.96s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:43<00:25, 25.72s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:08<00:00, 25.69s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : KMeans
k (clusters)    : 16
Warmup Time     : 47.256 sec
Fine-tune Time  : 128.438 sec
Measured Inference Time: 2.598216 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:25<00:25, 25.36s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:50<00:00, 25.04s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:26<01:45, 26.46s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:51<01:17, 25.86s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:18<00:52, 26.33s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:45<00:26, 26.40s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:10<00:00, 26.06s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : KMeans
k (clusters)    : 32
Warmup Time     : 50.093 sec
Fine-tune Time  : 130.296 sec
Measured Inference Time: 2.766576 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:26<00:26, 26.01s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:51<00:00, 25.90s/epoch]
  super()._check_params_vs_input(X, default_n_init=3)


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Training:  20%|██████████▍                                         | 1/5 [00:25<01:41, 25.47s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:50<01:15, 25.27s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:15<00:50, 25.09s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:39<00:24, 24.66s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:04<00:00, 24.98s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 4
Warmup Time     : 51.818 sec
Fine-tune Time  : 124.935 sec
Measured Inference Time: 2.454604 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35509
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:25<00:25, 25.79s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:51<00:00, 25.54s/epoch]


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Training:  20%|██████████▍                                         | 1/5 [00:24<01:36, 24.09s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:49<01:14, 24.91s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:16<00:51, 25.65s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:42<00:25, 25.98s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:08<00:00, 25.60s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 8
Warmup Time     : 51.086 sec
Fine-tune Time  : 128.010 sec
Measured Inference Time: 2.432828 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:25<00:25, 25.71s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:52<00:00, 26.16s/epoch]


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:51<01:16, 25.65s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:20<00:54, 27.01s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:45<00:26, 26.52s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:11<00:00, 26.36s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 16
Warmup Time     : 52.325 sec
Fine-tune Time  : 131.820 sec
Measured Inference Time: 3.425890 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:25<00:25, 25.87s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:51<00:00, 25.82s/epoch]


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:57<01:26, 28.96s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:24<00:56, 28.21s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:51<00:27, 27.73s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:18<00:00, 27.67s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 32
Warmup Time     : 51.654 sec
Fine-tune Time  : 138.359 sec
Measured Inference Time: 2.857999 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:29<00:29, 29.43s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:57<00:00, 28.94s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:26<01:47, 26.91s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:55<01:23, 27.95s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:23<00:55, 27.89s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:51<00:27, 27.81s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:18<00:00, 27.66s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 4
Warmup Time     : 57.889 sec
Fine-tune Time  : 138.305 sec
Measured Inference Time: 2.448055 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35509
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:25<00:25, 25.87s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:52<00:00, 26.23s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:26<01:44, 26.18s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:54<01:22, 27.50s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:21<00:54, 27.10s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:46<00:26, 26.55s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:15<00:00, 27.03s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 8
Warmup Time     : 52.475 sec
Fine-tune Time  : 135.166 sec
Measured Inference Time: 3.127180 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:27<00:27, 27.17s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:53<00:00, 26.85s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:25<01:41, 25.41s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:50<01:14, 24.93s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:14<00:49, 24.94s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:40<00:25, 25.31s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:06<00:00, 25.27s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 16
Warmup Time     : 53.714 sec
Fine-tune Time  : 126.354 sec
Measured Inference Time: 2.872574 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:26<00:26, 26.64s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:52<00:00, 26.12s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:25<01:41, 25.45s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:52<01:19, 26.50s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:17<00:51, 25.59s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:42<00:25, 25.50s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:06<00:00, 25.30s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 32
Warmup Time     : 52.251 sec
Fine-tune Time  : 126.522 sec
Measured Inference Time: 2.526747 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:23<00:23, 23.98s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:48<00:00, 24.22s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:23<01:34, 23.58s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:47<01:12, 24.04s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:11<00:47, 23.97s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:35<00:23, 23.95s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:59<00:00, 23.91s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 4
Warmup Time     : 48.447 sec
Fine-tune Time  : 119.557 sec
Measured Inference Time: 2.420771 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35761
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:24<00:24, 24.16s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:48<00:00, 24.07s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:24<01:36, 24.24s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:48<01:12, 24.05s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:11<00:47, 23.91s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:36<00:24, 24.10s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [02:00<00:00, 24.12s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 8
Warmup Time     : 48.146 sec
Fine-tune Time  : 120.599 sec
Measured Inference Time: 2.348650 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:24<00:24, 24.03s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:49<00:00, 24.55s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:23<01:33, 23.41s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:47<01:10, 23.53s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [01:11<00:48, 24.09s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:35<00:24, 24.05s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:59<00:00, 23.93s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 16
Warmup Time     : 49.118 sec
Fine-tune Time  : 119.664 sec
Measured Inference Time: 2.696483 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:25<00:25, 25.31s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:50<00:00, 25.00s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:16<01:06, 16.72s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:36<00:55, 18.57s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:52<00:34, 17.49s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:09<00:17, 17.23s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:26<00:00, 17.38s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 32
Warmup Time     : 50.017 sec
Fine-tune Time  : 86.934 sec
Measured Inference Time: 2.108520 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:17<00:17, 17.89s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:36<00:00, 18.44s/epoch]
  super()._check_params_vs_input(X, default_n_init=3)


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:35<00:52, 17.66s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:57<00:39, 19.78s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:21<00:21, 21.24s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:49<00:00, 21.97s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 4
Warmup Time     : 36.883 sec
Fine-tune Time  : 109.874 sec
Measured Inference Time: 3.077153 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35761
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:23<00:23, 23.27s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:42<00:00, 21.33s/epoch]
  super()._check_params_vs_input(X, default_n_init=3)


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:30<00:45, 15.32s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:45<00:30, 15.34s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:01<00:15, 15.32s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:16<00:00, 15.31s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 8
Warmup Time     : 42.673 sec
Fine-tune Time  : 76.551 sec
Measured Inference Time: 1.504400 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:15<00:15, 15.53s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:32<00:00, 16.35s/epoch]
  super()._check_params_vs_input(X, default_n_init=3)


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:29<00:44, 14.83s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:44<00:29, 14.95s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:01<00:15, 15.51s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:21<00:00, 16.23s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 16
Warmup Time     : 32.710 sec
Fine-tune Time  : 81.172 sec
Measured Inference Time: 1.864132 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:17<00:17, 17.60s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:33<00:00, 16.72s/epoch]
  super()._check_params_vs_input(X, default_n_init=3)


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:33<00:49, 16.60s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:49<00:32, 16.36s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:05<00:16, 16.50s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:22<00:00, 16.49s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 32
Warmup Time     : 33.447 sec
Fine-tune Time  : 82.443 sec
Measured Inference Time: 1.556634 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:16<00:16, 16.11s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:32<00:00, 16.20s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:15<01:03, 15.83s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:31<00:46, 15.66s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:46<00:31, 15.53s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:02<00:15, 15.77s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:19<00:00, 16.00s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 4
Warmup Time     : 32.407 sec
Fine-tune Time  : 80.002 sec
Measured Inference Time: 1.473874 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35761
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:16<00:16, 16.18s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:32<00:00, 16.27s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:16<01:07, 16.94s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:32<00:48, 16.17s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:48<00:31, 15.85s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:03<00:15, 15.59s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:18<00:00, 15.70s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 8
Warmup Time     : 32.545 sec
Fine-tune Time  : 78.492 sec
Measured Inference Time: 1.498789 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:15<00:15, 15.11s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:33<00:00, 16.95s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:17<01:08, 17.15s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:34<00:50, 16.99s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:50<00:33, 16.72s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:07<00:16, 16.84s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:23<00:00, 16.71s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 16
Warmup Time     : 33.930 sec
Fine-tune Time  : 83.572 sec
Measured Inference Time: 2.101874 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:17<00:17, 17.80s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:37<00:00, 18.54s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:17<01:09, 17.44s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:33<00:50, 16.73s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:49<00:32, 16.43s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [01:06<00:16, 16.55s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:22<00:00, 16.49s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 32
Warmup Time     : 37.100 sec
Fine-tune Time  : 82.442 sec
Measured Inference Time: 1.397671 sec
Estimated Inference Time (proportional to params): 2.012929 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


===== Summary of all experiments (first 10 shown) =====
posthoc/all/KMeans/k=4 -> params: 2357937, est_inf_time: 2.012929, acc: 1.0000, f1: 1.0000
posthoc/all/KMeans/k=8 -> params: 2357937, est_inf_time: 2.012929, acc: 1.0000, f1: 1.0000
posthoc/all/KMeans/k=16 -> params: 2357937, est_inf_time: 2.012929, acc: 1.0000, f1: 1.0000
posthoc/all/KMeans/k=32 -> params: 2357937, est_inf_time: 2.012929, acc: 1.0000, f1: 1.0000
posthoc/all/MiniBatchKMeans/k=4 -> params: 2357937, est_inf_time: 2.012929, acc: 1.0000, f1: 1.00