In [1]:
import os
import time
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import f1_score, recall_score
from sklearn.cluster import KMeans, MiniBatchKMeans
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import scipy.io as sio
from tqdm import tqdm
import copy
import math
import random

# reproducibility
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
random.seed(SEED)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# -------------------- Load Data --------------------
data_path = r"C:\Users\NNadi\Downloads\DKM-HybridSN-GRSL\Dataset\Hyperspectral oil spill detection datasets"
data = sio.loadmat(os.path.join(data_path, 'GM01.mat'))['img']
labels = sio.loadmat(os.path.join(data_path, 'GM01.mat'))['map']

# -------------------- Helper Functions --------------------
def splitTrainTestSet(X, y, testRatio, randomState=345):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=testRatio, random_state=randomState, stratify=y
    )
    return X_train, X_test, y_train, y_test

def applyPCA(X, numComponents=75):
    newX = X.reshape(-1, X.shape[2])
    pca = PCA(n_components=numComponents, whiten=True)
    newX = pca.fit_transform(newX)
    newX = newX.reshape(X.shape[0], X.shape[1], numComponents)
    return newX, pca

def padWithZeros(X, margin=2):
    newX = np.zeros((X.shape[0]+2*margin, X.shape[1]+2*margin, X.shape[2]))
    newX[margin:X.shape[0]+margin, margin:X.shape[1]+margin, :] = X
    return newX

def createImageCubes(X, y, windowSize=5, removeZeroLabels=True):
    margin = (windowSize - 1) // 2
    paddedX = padWithZeros(X, margin=margin)
    patchesData = np.zeros((X.shape[0]*X.shape[1], windowSize, windowSize, X.shape[2]))
    patchesLabels = np.zeros((X.shape[0]*X.shape[1]))
    idx = 0
    for r in range(margin, paddedX.shape[0]-margin):
        for c in range(margin, paddedX.shape[1]-margin):
            patch = paddedX[r-margin:r+margin+1, c-margin:c+margin+1]
            patchesData[idx] = patch
            patchesLabels[idx] = y[r-margin, c-margin]
            idx += 1
    if removeZeroLabels:
        mask = patchesLabels > 0
        patchesData = patchesData[mask]
        patchesLabels = patchesLabels[mask] - 1
    return patchesData, patchesLabels

# Dataset / preprocessing parameters (kept same as your code)
windowSize = 3
test_ratio = 0.25
dataset = 'IP'
K = 30 if dataset == 'IP' else 15

X, pca = applyPCA(data, numComponents=K)
X, y = createImageCubes(X, labels, windowSize=windowSize)

# Split and convert to torch tensors
X_train_np, X_test_np, y_train_np, y_test_np = splitTrainTestSet(X, y, test_ratio)
X_train = torch.tensor(X_train_np, dtype=torch.float32, device=DEVICE)
y_train = torch.tensor(y_train_np, dtype=torch.long, device=DEVICE)
X_test  = torch.tensor(X_test_np, dtype=torch.float32, device=DEVICE)
y_test  = torch.tensor(y_test_np, dtype=torch.long, device=DEVICE)

S = X_train.shape[1]
L = X_train.shape[3]
n_classes = len(np.unique(y_train_np))

print("X_train shape:", X_train.shape)
print("X_test  shape:", X_test.shape)

X_train shape: torch.Size([22002, 3, 3, 30])
X_test  shape: torch.Size([7334, 3, 3, 30])


In [2]:
class SpectralSpatialTransformer(nn.Module):
    """
    Spectral-Spatial Transformer head replacing the conv2d + flatten step.
    Keeps the 3D conv stem similar to HybridSN and then applies a Transformer
    over spatial tokens where each token is a (channel * depth) feature vector.
    """
    def __init__(self, input_shape, n_classes,
                 embed_dim=256, n_heads=8, n_layers=4, ff_dim=512, attn_dropout=0.0, drop=0.4):
        super(SpectralSpatialTransformer, self).__init__()
        # input_shape expected as (S, _, L) or (S, S, L) - we only need S and L
        S, _, L = input_shape

        # 3D conv stem (keeps same kernel choices as your HybridSN)
        self.conv3d_1 = nn.Conv3d(1, 8, kernel_size=(3,3,7))
        self.conv3d_2 = nn.Conv3d(8,16, kernel_size=(1,1,5))
        self.conv3d_3 = nn.Conv3d(16,32, kernel_size=(1,1,3))

        # Transformer projection + encoder
        self.embed_dim = embed_dim
        # token projection (project per-token feature vector to embed_dim)
        self.token_proj = None  # will be created after dummy forward
        self.pos_emb = None
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_dim,
            nhead=n_heads,
            dim_feedforward=ff_dim,
            dropout=attn_dropout,
            batch_first=True  # so inputs are (B, seq_len, embed_dim)
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)

        # FC head (kept same sizes as your original code)
        self.fc1 = None
        self.dropout1 = nn.Dropout(drop)
        self.fc2 = None
        self.dropout2 = nn.Dropout(drop)
        self.fc3 = None

        # Build modules that depend on S/L by running a dummy forward pass
        with torch.no_grad():
            # create dummy input in same layout as forward: (B,1,H,W,L)
            x_dummy = torch.zeros(1,1,S,S,L)
            x_dummy = F.relu(self.conv3d_1(x_dummy))
            x_dummy = F.relu(self.conv3d_2(x_dummy))
            x_dummy = F.relu(self.conv3d_3(x_dummy))
            B, C, H, W, D = x_dummy.shape  # H,W are spatial dims after convs
            token_dim = C * D              # per-spatial-location vector size

            # token projection: project token_dim -> embed_dim
            self.token_proj = nn.Linear(token_dim, embed_dim)

            # positional embedding for H*W tokens (learnable)
            seq_len = H * W
            self.pos_emb = nn.Parameter(torch.zeros(1, seq_len, embed_dim))

            # transformer's output is pooled -> pass to FCs
            # we'll pool to get a fixed vector of size embed_dim
            self.fc1 = nn.Linear(embed_dim, 256)
            self.fc2 = nn.Linear(256, 128)
            self.fc3 = nn.Linear(128, n_classes)

            # initialize pos_emb small
            nn.init.trunc_normal_(self.pos_emb, std=0.02)
            # init token_proj
            nn.init.xavier_uniform_(self.token_proj.weight)
            if self.token_proj.bias is not None:
                nn.init.zeros_(self.token_proj.bias)

    def forward(self, x):
        # x : (B, H, W, L) as in your dataset tensors
        x = x.unsqueeze(1)  # -> (B,1,H,W,L)
        x = F.relu(self.conv3d_1(x))
        x = F.relu(self.conv3d_2(x))
        x = F.relu(self.conv3d_3(x))
        B, C, H, W, D = x.shape

        # reshape to tokens: (B, H*W, C*D)
        x = x.view(B, C*D, H, W)          # (B, C*D, H, W)
        x = x.permute(0, 2, 3, 1).contiguous()  # (B, H, W, C*D)
        x = x.view(B, H*W, C*D)           # (B, seq_len, token_dim)

        # project tokens to embedding dim
        x = self.token_proj(x)            # (B, seq_len, embed_dim)

        # add positional embedding (broadcast batch)
        # self.pos_emb shape: (1, seq_len, embed_dim)
        x = x + self.pos_emb

        # transformer encoding (batch_first=True)
        x = self.transformer(x)           # (B, seq_len, embed_dim)

        # global pooling (mean over tokens)
        x = x.mean(dim=1)                 # (B, embed_dim)

        # fc head (similar to HybridSN)
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)                   # (B, n_classes)
        return x


# -------------------- Training --------------------
def train_model(model, X_train, y_train, epochs=1, lr=0.001, batch_size=None):
    # Note: original code did full-batch; keep same behavior for fairness unless batch_size is given
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    t0 = time.time()
    
    if batch_size is None:
        # full-batch (original)
        for epoch in tqdm(range(epochs), desc="Training", ncols=100, unit="epoch"):
            optimizer.zero_grad()
            outputs = model(X_train)
            loss = criterion(outputs, y_train)
            loss.backward()
            optimizer.step()
            tqdm.write(f"Epoch [{epoch+1}/{epochs}] - Loss: {loss.item():.4f}")
    else:
        # mini-batch training
        dataset = torch.utils.data.TensorDataset(X_train, y_train)
        loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
        for epoch in tqdm(range(epochs), desc="Training", ncols=100, unit="epoch"):
            epoch_loss = 0.0
            for xb, yb in loader:
                optimizer.zero_grad()
                outputs = model(xb)
                loss = criterion(outputs, yb)
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()
            tqdm.write(f"Epoch [{epoch+1}/{epochs}] - Loss: {epoch_loss/len(loader):.4f}")
    
    total_time = time.time() - t0
    return model, total_time

# -------------------- Evaluation --------------------
def evaluate_model(model, X_test, y_test):
    model.eval()
    t1 = time.time()
    with torch.no_grad():
        outputs = model(X_test)
        preds = outputs.argmax(dim=1).cpu().numpy()
    y_true = y_test.cpu().numpy()
    acc = np.mean(preds==y_true)
    f1 = f1_score(y_true, preds, average='macro')
    recall = recall_score(y_true, preds, average='macro')
    inf_time = time.time() - t1
    return inf_time, acc, f1, recall

# -------------------- DKM (existing) --------------------
def dkm_cluster_weights(model, k=8, iterations=10, layer_mask=None):
    """
    layer_mask: function name->bool to indicate whether to cluster that parameter. If None cluster all.
    Returns: compressed_params_effective (estimate in terms of k * vector_dim per clustered weight)
    """
    compressed_params = 0
    for name, param in model.named_parameters():
        if "weight" in name:
            if (layer_mask is not None) and (not layer_mask(name)):
                # skip clustering for this weight
                compressed_params += param.numel()
                continue
            W = param.data
            W_flat = W.view(W.shape[0], -1).clone()  # (out_channels, vector_dim)
            # initialize centroids by random selection
            idx = torch.randperm(W_flat.size(0))[:k]
            C = W_flat[idx].clone()
            for _ in range(iterations):
                dist = torch.cdist(W_flat, C)  # (N, k)
                A = torch.softmax(-dist, dim=1)  # soft assignments across k
                C = torch.mm(A.t(), W_flat) / (A.sum(0).unsqueeze(1)+1e-6)
            clustered_W = torch.mm(A, C)
            param.data = clustered_W.view(param.shape)
            compressed_params += k * W_flat.shape[1]  # store effective param count for cluster codebook
        elif "bias" in name:
            compressed_params += param.numel()
    return compressed_params

# -------------------- KMeans / MiniBatchKMeans clustering adapted to weights --------------------
def kmeans_cluster_weights(model, k=8, iterations=20, layer_mask=None, minibatch=False):
    compressed_params = 0
    for name, param in model.named_parameters():
        if "weight" in name:
            if (layer_mask is not None) and (not layer_mask(name)):
                compressed_params += param.numel()
                continue
            W = param.data
            W_flat = W.view(W.shape[0], -1).cpu().numpy()  # shape (N, D)
            if W_flat.shape[0] <= k:
                # cannot cluster fewer vectors than k
                compressed_params += W_flat.size
                continue
            if minibatch:
                km = MiniBatchKMeans(n_clusters=k, random_state=SEED, max_iter=iterations)
            else:
                km = KMeans(n_clusters=k, random_state=SEED, n_init=3, max_iter=iterations)
            labels = km.fit_predict(W_flat)
            centroids = km.cluster_centers_  # (k, D)
            # assign back using centroid of assigned cluster (hard assignment)
            clustered = centroids[labels]
            clustered_t = torch.tensor(clustered, dtype=param.dtype, device=param.device)
            param.data = clustered_t.view(param.shape)
            compressed_params += k * W_flat.shape[1]
        elif "bias" in name:
            compressed_params += param.numel()
    return compressed_params

# -------------------- EFDPC (density peak clustering variant) for weights --------------------
def efdpc_cluster_weights(model, k=8, layer_mask=None):
    """
    Adaptation of EFDPC (density-peak based) for clustering weight-vectors.
    We compute pairwise distances between weight vectors in a layer,
    compute density (rho) using exponential kernel, compute delta (distance to nearest higher-rho),
    compute gamma = rho * delta^2, pick top-k as centroids, and soft-assign using softmax(-dist).
    Returns compressed params estimate k * vector_dim per clustered weight.
    """
    compressed_params = 0
    for name, param in model.named_parameters():
        if "weight" in name:
            if (layer_mask is not None) and (not layer_mask(name)):
                compressed_params += param.numel()
                continue
            W = param.data
            W_flat = W.view(W.shape[0], -1).cpu().numpy()  # (N, D)
            N = W_flat.shape[0]
            if N <= k:
                compressed_params += W_flat.size
                continue
            # compute pairwise distance matrix (NxN)
            # to save memory, compute condensed distances
            from scipy.spatial.distance import pdist, squareform
            dist_vec = pdist(W_flat, metric='euclidean')
            Dist_matrix = squareform(dist_vec)
            # choose dc using a small percentile, similar to MATLAB percent=2
            percent = 2.0
            tril_vals = Dist_matrix[np.tril_indices(N, k=-1)]
            sda = np.sort(tril_vals)
            if len(sda) == 0:
                dc = 1.0
            else:
                pos = max(0, int(len(sda) * (percent/100.0) ) - 1)
                pos = min(len(sda)-1, pos)
                dini = sda[pos]
                # add small epsilon to avoid zero
                dc = max(dini / math.exp(10.0/N), 1e-6)

            # compute rho
            rho = np.zeros(N)
            for i in range(N):
                # exponential kernel
                rho[i] = np.sum(np.exp(- (Dist_matrix[i,:] / (dc+1e-12))**2 )) - 1.0  # exclude self
            # compute delta
            ordrho = np.argsort(-rho)  # descending by rho
            delta = np.zeros(N)
            maxD = np.max(Dist_matrix)
            delta[ordrho[0]] = maxD
            for idx in range(1, N):
                i = ordrho[idx]
                # distance to nearest higher density point
                higher = ordrho[:idx]
                delta[i] = np.min(Dist_matrix[i, higher])
            # normalize rho and delta
            if rho.max() - rho.min() > 0:
                rho_n = (rho - rho.min()) / (rho.max() - rho.min())
            else:
                rho_n = rho
            if delta.max() - delta.min() > 0:
                delta_n = (delta - delta.min()) / (delta.max() - delta.min())
            else:
                delta_n = delta
            gamma = rho_n * (delta_n**2)
            # pick top-k gamma indices as centroids
            topk_idx = np.argsort(-gamma)[:k]
            C = W_flat[topk_idx]  # (k, D)
            # soft assignment via distances
            from scipy.spatial.distance import cdist
            dist = cdist(W_flat, C)  # (N, k)
            A = np.exp(-dist)  # soft weights
            A = A / (A.sum(axis=1, keepdims=True) + 1e-12)
            clustered = A.dot(C)
            clustered_t = torch.tensor(clustered, dtype=param.dtype, device=param.device)
            param.data = clustered_t.view(param.shape)
            compressed_params += k * W_flat.shape[1]
        elif "bias" in name:
            compressed_params += param.numel()
    return compressed_params

# -------------------- Layer selection helper --------------------
def make_layer_mask_exclude_first_last(model):
    """
    Returns a function that given parameter name returns True if that parameter should be clustered.
    This implementation finds the ordered list of 'weight' parameter names and excludes the first and last.
    """
    weight_names = [name for name, _ in model.named_parameters() if "weight" in name]
    exclude = set()
    if len(weight_names) >= 1:
        exclude.add(weight_names[0])
    if len(weight_names) >= 2:
        exclude.add(weight_names[-1])
    def mask(name):
        if "weight" not in name:
            return False  # don't cluster biases in layer-selection sense (we keep biases unclustered count-wise above)
        return name not in exclude
    return mask


In [3]:

def make_layer_mask_all(model):
    def mask(name):
        return "weight" in name
    return mask

# -------------------- Utility: count total params --------------------
def count_params(model):
    return sum(p.numel() for p in model.parameters())

# -------------------- Utility: get compressed params from clustering run (already returned) --------------------

# -------------------- Utility: estimate inference time proportionally --------------------
def estimate_inf_time(num_params, baseline_params, baseline_inf_time):
    # avoid zero division
    if baseline_params == 0:
        return baseline_inf_time
    return baseline_inf_time * (num_params / baseline_params)

# -------------------- Run Baseline (kept exactly like your original configuration 1) --------------------
model = SpectralSpatialTransformer((S,S,L), n_classes).to(DEVICE)
print("\n-- Training baseline model (this preserves your original configuration 1 behavior) --")
model, train_time = train_model(model, X_train, y_train, epochs=5)  # identical to your original
inf_time_measured, acc, f1, rec = evaluate_model(model, X_test, y_test)

baseline_params = count_params(model)
baseline_inf_time_measured = inf_time_measured
baseline_results = {
    "train_time": train_time,
    "measured_inf_time": baseline_inf_time_measured,
    "params": baseline_params,
    "acc": acc,
    "f1": f1,
    "recall": rec
}

print("\n===== Baseline HybridSN =====")
print(f"Training Time : {train_time:.3f} sec")
print(f"Measured Inference Time: {inf_time_measured:.6f} sec")
print(f"Parameters    : {baseline_params}")
print(f"Accuracy      : {acc:.4f}")
print(f"F1 Score      : {f1:.4f}")
print(f"Recall        : {rec:.4f}")

# -------------------- Full experimental grid --------------------
# Training stages: 'posthoc' (full baseline train -> cluster -> finetune),
#                  'partial' (warm-up epochs -> cluster -> finetune)
training_stages = ['posthoc', 'partial']
layer_selections = ['all', 'exclude_first_last']
clustering_methods = ['KMeans', 'MiniBatchKMeans', 'EFDPC']  # 4 clustering options (DKM is original)

# hyperparameters for experiments
k_values = [4, 8, 16, 32]  # cluster counts to test
warmup_epochs = 2  # partial warm-up epochs (you can change)
fine_tune_epochs = 5  # fine-tune after clustering
lr_finetune = 0.001

# We'll store results
exp_results = []

# For fairness, reuse baseline trained model weights as starting point (posthoc) or for partial warmup start from scratch and warmup
for stage in training_stages:
    for layer_sel in layer_selections:
        for method in clustering_methods:
            for k in k_values:
                # Prepare a fresh model for this run
                model_k = SpectralSpatialTransformer((S,S,L), n_classes).to(DEVICE)
                # Copy baseline weights for posthoc scenario
                if stage == 'posthoc':
                    model_k.load_state_dict(model.state_dict())
                    warmup_time = 0.0
                elif stage == 'partial':
                    # For partial warmup we start from new random init and train warmup_epochs
                    model_k, warmup_time = train_model(model_k, X_train, y_train, epochs=warmup_epochs, lr=0.001)
                else:
                    raise ValueError("Unknown training stage")

                # Choose layer mask
                if layer_sel == 'all':
                    layer_mask = make_layer_mask_all(model_k)
                else:
                    layer_mask = make_layer_mask_exclude_first_last(model_k)

                # Apply clustering based on method
                if method == 'DKM':
                    compressed_params = dkm_cluster_weights(model_k, k=k, iterations=10, layer_mask=layer_mask)
                elif method == 'KMeans':
                    compressed_params = kmeans_cluster_weights(model_k, k=k, iterations=20, layer_mask=layer_mask, minibatch=False)
                elif method == 'MiniBatchKMeans':
                    compressed_params = kmeans_cluster_weights(model_k, k=k, iterations=50, layer_mask=layer_mask, minibatch=True)
                elif method == 'EFDPC':
                    compressed_params = efdpc_cluster_weights(model_k, k=k, layer_mask=layer_mask)
                else:
                    raise ValueError("Unknown clustering method")

                # Fine-tune the clustered model
                model_k, ft_time = train_model(model_k, X_train, y_train, epochs=fine_tune_epochs, lr=lr_finetune)

                # Evaluate after clustering + fine-tuning
                inf_time_measured_k, acc_k, f1_k, rec_k = evaluate_model(model_k, X_test, y_test)
                total_params_k = count_params(model_k)

                # Compute estimated inference time based only on parameter count relative to baseline
                estimated_inf_time_k = estimate_inf_time(total_params_k, baseline_params, baseline_inf_time_measured)

                # Save & print results
                result = {
                    "stage": stage,
                    "layer_selection": layer_sel,
                    "method": method,
                    "k": k,
                    "warmup_time": warmup_time if stage == 'partial' else 0.0,
                    "cluster_time": None,
                    "fine_tune_time": ft_time,
                    "measured_inf_time": inf_time_measured_k,
                    "estimated_inf_time": estimated_inf_time_k,
                    "params": total_params_k,
                    "compressed_params_effective": compressed_params,
                    "acc": acc_k,
                    "f1": f1_k,
                    "recall": rec_k
                }
                exp_results.append(result)

                # Print concise summary for this config
                print("\n----- Experiment Result -----")
                print(f"Stage           : {stage}")
                print(f"Layer selection : {layer_sel}")
                print(f"Clustering      : {method}")
                print(f"k (clusters)    : {k}")
                if stage == 'partial':
                    print(f"Warmup Time     : {warmup_time:.3f} sec")
                print(f"Fine-tune Time  : {ft_time:.3f} sec")
                print(f"Measured Inference Time: {inf_time_measured_k:.6f} sec")
                print(f"Estimated Inference Time (proportional to params): {estimated_inf_time_k:.6f} sec")
                print(f"Parameters (model): {total_params_k}")
                print(f"Compressed Params (effective/codebook): {compressed_params}")
                print(f"Accuracy         : {acc_k:.4f}")
                print(f"F1 Score         : {f1_k:.4f}")
                print(f"Recall           : {rec_k:.4f}")

# Optionally, print a short table summary
print("\n\n===== Summary of all experiments (first 10 shown) =====")
for r in exp_results[:10]:
    print(f"{r['stage']}/{r['layer_selection']}/{r['method']}/k={r['k']} -> params: {r['params']}, est_inf_time: {r['estimated_inf_time']:.6f}, acc: {r['acc']:.4f}, f1: {r['f1']:.4f}")

# Save results to a numpy file for downstream analysis if desired
import json
out_file = r"C:\Users\NNadi\Downloads\DKM-HybridSN-GRSL\res\oill_spill_GM01_SST.json"
with open(out_file, "w") as f:
    json.dump(exp_results, f, indent=2)
print(f"\nSaved experiment results to {out_file}")



-- Training baseline model (this preserves your original configuration 1 behavior) --


Training:  20%|██████████▍                                         | 1/5 [00:05<00:20,  5.22s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:10<00:15,  5.10s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:15<00:10,  5.08s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:20<00:04,  4.97s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:25<00:00,  5.01s/epoch]


Epoch [5/5] - Loss: 0.0000

===== Baseline HybridSN =====
Training Time : 25.072 sec
Measured Inference Time: 0.419203 sec
Parameters    : 2357937
Accuracy      : 1.0000
F1 Score      : 1.0000
Recall        : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:04<00:19,  4.84s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:09<00:14,  4.81s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:14<00:09,  4.86s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:19<00:04,  4.87s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:24<00:00,  4.86s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : KMeans
k (clusters)    : 4
Fine-tune Time  : 24.297 sec
Measured Inference Time: 0.460060 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35509
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:04<00:18,  4.72s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:09<00:14,  4.78s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:14<00:09,  4.86s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:19<00:04,  4.83s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:23<00:00,  4.78s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : KMeans
k (clusters)    : 8
Fine-tune Time  : 23.923 sec
Measured Inference Time: 0.417167 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:04<00:17,  4.35s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:09<00:13,  4.58s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:14<00:09,  4.76s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:18<00:04,  4.72s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:23<00:00,  4.70s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : KMeans
k (clusters)    : 16
Fine-tune Time  : 23.497 sec
Measured Inference Time: 0.437920 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:04<00:19,  4.86s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:09<00:14,  4.87s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:14<00:09,  4.89s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:19<00:04,  4.89s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:24<00:00,  4.92s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : KMeans
k (clusters)    : 32
Fine-tune Time  : 24.629 sec
Measured Inference Time: 0.509842 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

  super()._check_params_vs_input(X, default_n_init=3)
Training:  20%|██████████▍                                         | 1/5 [00:04<00:19,  4.77s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:09<00:14,  4.75s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:14<00:09,  4.74s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:19<00:04,  4.79s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:23<00:00,  4.79s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 4
Fine-tune Time  : 23.981 sec
Measured Inference Time: 0.460842 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35509
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Training:  20%|██████████▍                                         | 1/5 [00:04<00:18,  4.50s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:09<00:14,  4.70s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:14<00:09,  4.71s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:18<00:04,  4.70s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:23<00:00,  4.70s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 8
Fine-tune Time  : 23.500 sec
Measured Inference Time: 0.380163 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:09<00:14,  4.84s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:14<00:09,  4.89s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:19<00:04,  4.89s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:24<00:00,  4.91s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 16
Fine-tune Time  : 24.574 sec
Measured Inference Time: 0.429277 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:09<00:14,  4.84s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:14<00:09,  4.92s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:19<00:04,  4.91s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:24<00:00,  4.88s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 32
Fine-tune Time  : 24.414 sec
Measured Inference Time: 0.435836 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:04<00:19,  4.85s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:09<00:14,  4.82s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:14<00:09,  4.73s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:18<00:04,  4.67s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:23<00:00,  4.70s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 4
Fine-tune Time  : 23.506 sec
Measured Inference Time: 0.400063 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35509
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:04<00:19,  4.79s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:09<00:14,  4.71s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:13<00:09,  4.62s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:18<00:04,  4.44s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:22<00:00,  4.51s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 8
Fine-tune Time  : 22.570 sec
Measured Inference Time: 0.424653 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:04<00:18,  4.72s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:09<00:13,  4.47s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:13<00:09,  4.57s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:18<00:04,  4.67s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:23<00:00,  4.69s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 16
Fine-tune Time  : 23.450 sec
Measured Inference Time: 0.422081 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:04<00:19,  4.92s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:09<00:14,  4.74s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:13<00:09,  4.60s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:18<00:04,  4.46s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:22<00:00,  4.57s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 32
Fine-tune Time  : 22.857 sec
Measured Inference Time: 0.390438 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:04<00:17,  4.26s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:08<00:13,  4.53s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:13<00:09,  4.57s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:17<00:04,  4.46s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:22<00:00,  4.50s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 4
Fine-tune Time  : 22.516 sec
Measured Inference Time: 0.419998 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35761
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:04<00:18,  4.65s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:09<00:14,  4.80s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:14<00:09,  4.80s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:19<00:04,  4.78s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:23<00:00,  4.71s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 8
Fine-tune Time  : 23.568 sec
Measured Inference Time: 0.441072 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:04<00:17,  4.36s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:08<00:13,  4.51s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:13<00:09,  4.63s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:18<00:04,  4.69s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:23<00:00,  4.64s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 16
Fine-tune Time  : 23.188 sec
Measured Inference Time: 0.448674 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:04<00:17,  4.43s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:08<00:13,  4.42s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:13<00:09,  4.58s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:18<00:04,  4.64s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:23<00:00,  4.64s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 32
Fine-tune Time  : 23.218 sec
Measured Inference Time: 0.440308 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Training:  20%|██████████▍                                         | 1/5 [00:04<00:19,  4.93s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:09<00:14,  4.97s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:15<00:10,  5.44s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:21<00:05,  5.37s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:26<00:00,  5.37s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 4
Fine-tune Time  : 26.845 sec
Measured Inference Time: 0.474144 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35761
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Training:  20%|██████████▍                                         | 1/5 [00:04<00:17,  4.43s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:09<00:13,  4.55s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:14<00:10,  5.05s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:21<00:05,  5.72s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:27<00:00,  5.57s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 8
Fine-tune Time  : 27.880 sec
Measured Inference Time: 0.661434 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:13<00:19,  6.56s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:19<00:13,  6.57s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:30<00:08,  8.26s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:41<00:00,  8.27s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 16
Fine-tune Time  : 41.352 sec
Measured Inference Time: 1.061635 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:23<00:35, 11.77s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:36<00:24, 12.29s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:48<00:12, 12.28s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:59<00:00, 11.82s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 32
Fine-tune Time  : 59.109 sec
Measured Inference Time: 0.999545 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:15<01:03, 15.98s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:28<00:41, 13.88s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:43<00:28, 14.39s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:57<00:14, 14.17s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:11<00:00, 14.23s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 4
Fine-tune Time  : 71.154 sec
Measured Inference Time: 0.828614 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35761
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:11<00:46, 11.68s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:21<00:31, 10.46s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:31<00:20, 10.38s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:43<00:11, 11.18s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:55<00:00, 11.09s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 8
Fine-tune Time  : 55.447 sec
Measured Inference Time: 1.023996 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:10<00:41, 10.48s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:21<00:32, 10.89s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:34<00:23, 11.80s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:44<00:11, 11.14s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:58<00:00, 11.75s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 16
Fine-tune Time  : 58.748 sec
Measured Inference Time: 1.551382 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  20%|██████████▍                                         | 1/5 [00:12<00:49, 12.34s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:21<00:30, 10.21s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:35<00:24, 12.14s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:46<00:11, 11.76s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:57<00:00, 11.53s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : posthoc
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 32
Fine-tune Time  : 57.668 sec
Measured Inference Time: 1.762851 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:15<00:15, 15.05s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:25<00:00, 12.67s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:14<00:58, 14.55s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:26<00:38, 12.89s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:40<00:27, 13.63s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:54<00:13, 13.62s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:06<00:00, 13.37s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : KMeans
k (clusters)    : 4
Warmup Time     : 25.349 sec
Fine-tune Time  : 66.856 sec
Measured Inference Time: 1.052132 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35509
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:13<00:13, 13.90s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:25<00:00, 12.51s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:07<00:29,  7.32s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:20<00:32, 10.68s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:33<00:23, 11.69s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:46<00:12, 12.15s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:58<00:00, 11.69s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : KMeans
k (clusters)    : 8
Warmup Time     : 25.030 sec
Fine-tune Time  : 58.470 sec
Measured Inference Time: 0.956068 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:09<00:09,  9.77s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:20<00:00, 10.31s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:11<00:44, 11.13s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:20<00:30, 10.19s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:33<00:23, 11.53s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:44<00:11, 11.20s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:54<00:00, 10.85s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : KMeans
k (clusters)    : 16
Warmup Time     : 20.644 sec
Fine-tune Time  : 54.276 sec
Measured Inference Time: 0.756611 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:09<00:09,  9.22s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:16<00:00,  8.32s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:13<00:54, 13.57s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:27<00:41, 13.96s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:40<00:26, 13.24s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:51<00:12, 12.58s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:01<00:00, 12.38s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : KMeans
k (clusters)    : 32
Warmup Time     : 16.650 sec
Fine-tune Time  : 61.897 sec
Measured Inference Time: 0.952147 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:10<00:10, 10.17s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:21<00:00, 10.74s/epoch]
  super()._check_params_vs_input(X, default_n_init=3)


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Training:  20%|██████████▍                                         | 1/5 [00:10<00:40, 10.06s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:17<00:25,  8.53s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:23<00:15,  7.58s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:30<00:07,  7.06s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:41<00:00,  8.21s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 4
Warmup Time     : 21.486 sec
Fine-tune Time  : 41.057 sec
Measured Inference Time: 0.985475 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35509
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:09<00:09,  9.93s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:19<00:00,  9.95s/epoch]
  super()._check_params_vs_input(X, default_n_init=3)


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:20<00:30, 10.22s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:31<00:21, 10.52s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:41<00:10, 10.46s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:55<00:00, 11.11s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 8
Warmup Time     : 19.911 sec
Fine-tune Time  : 55.557 sec
Measured Inference Time: 1.044177 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:14<00:14, 14.97s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:26<00:00, 13.30s/epoch]
  super()._check_params_vs_input(X, default_n_init=3)


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:23<00:35, 11.93s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:36<00:24, 12.47s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:46<00:11, 11.49s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:59<00:00, 11.81s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 16
Warmup Time     : 26.620 sec
Fine-tune Time  : 59.057 sec
Measured Inference Time: 1.618371 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:13<00:13, 13.42s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:25<00:00, 12.85s/epoch]
  super()._check_params_vs_input(X, default_n_init=3)


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:16<00:24,  8.24s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:23<00:14,  7.36s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:30<00:07,  7.42s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:37<00:00,  7.48s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : MiniBatchKMeans
k (clusters)    : 32
Warmup Time     : 25.718 sec
Fine-tune Time  : 37.387 sec
Measured Inference Time: 0.529928 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:10<00:10, 10.86s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:20<00:00, 10.47s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:09<00:39,  9.94s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:19<00:29,  9.93s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:29<00:19,  9.99s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:40<00:10, 10.16s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:50<00:00, 10.08s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 4
Warmup Time     : 20.946 sec
Fine-tune Time  : 50.419 sec
Measured Inference Time: 0.945567 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35509
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:09<00:09,  9.50s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:20<00:00, 10.38s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:11<00:46, 11.54s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:22<00:33, 11.02s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:30<00:19,  9.87s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:40<00:09,  9.80s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:51<00:00, 10.32s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 8
Warmup Time     : 20.766 sec
Fine-tune Time  : 51.596 sec
Measured Inference Time: 0.969323 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:10<00:10, 10.25s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:20<00:00, 10.30s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:13<00:53, 13.28s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:24<00:36, 12.05s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:35<00:22, 11.46s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:45<00:10, 10.87s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:57<00:00, 11.51s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 16
Warmup Time     : 20.599 sec
Fine-tune Time  : 57.548 sec
Measured Inference Time: 1.034179 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:12<00:12, 12.07s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:25<00:00, 12.55s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:10<00:41, 10.33s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:22<00:34, 11.55s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:32<00:21, 10.72s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:42<00:10, 10.42s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:52<00:00, 10.52s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : all
Clustering      : EFDPC
k (clusters)    : 32
Warmup Time     : 25.106 sec
Fine-tune Time  : 52.593 sec
Measured Inference Time: 1.739072 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:11<00:11, 11.88s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:22<00:00, 11.20s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:09<00:39,  9.92s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:23<00:36, 12.06s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:33<00:22, 11.13s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:43<00:10, 10.75s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:53<00:00, 10.76s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 4
Warmup Time     : 22.416 sec
Fine-tune Time  : 53.823 sec
Measured Inference Time: 0.852098 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35761
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:09<00:09,  9.75s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:17<00:00,  8.92s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:10<00:40, 10.09s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:20<00:30, 10.06s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:30<00:20, 10.12s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:40<00:10, 10.16s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:50<00:00, 10.19s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 8
Warmup Time     : 17.852 sec
Fine-tune Time  : 50.964 sec
Measured Inference Time: 0.883286 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:09<00:09,  9.94s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:20<00:00, 10.03s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:10<00:40, 10.19s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:20<00:30, 10.27s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:30<00:20, 10.01s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:37<00:09,  9.03s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:48<00:00,  9.69s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 16
Warmup Time     : 20.066 sec
Fine-tune Time  : 48.467 sec
Measured Inference Time: 1.045360 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:10<00:10, 10.42s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:20<00:00, 10.33s/epoch]


Epoch [2/2] - Loss: 0.0000


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
Training:  20%|██████████▍                                         | 1/5 [00:10<00:41, 10.33s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:20<00:30, 10.23s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:30<00:20, 10.30s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:41<00:10, 10.45s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:51<00:00, 10.34s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : KMeans
k (clusters)    : 32
Warmup Time     : 20.666 sec
Fine-tune Time  : 51.691 sec
Measured Inference Time: 1.019161 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:10<00:10, 10.17s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:19<00:00,  9.57s/epoch]
  super()._check_params_vs_input(X, default_n_init=3)


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:20<00:31, 10.36s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:30<00:19,  9.92s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:40<00:10, 10.02s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:52<00:00, 10.50s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 4
Warmup Time     : 19.154 sec
Fine-tune Time  : 52.487 sec
Measured Inference Time: 1.022154 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35761
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:10<00:10, 10.21s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:20<00:00, 10.02s/epoch]
  super()._check_params_vs_input(X, default_n_init=3)


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:17<00:26,  8.80s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:26<00:18,  9.31s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:36<00:09,  9.49s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:46<00:00,  9.32s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 8
Warmup Time     : 20.061 sec
Fine-tune Time  : 46.613 sec
Measured Inference Time: 1.051184 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:09<00:09,  9.68s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:19<00:00,  9.89s/epoch]
  super()._check_params_vs_input(X, default_n_init=3)


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:19<00:29,  9.96s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:29<00:19,  9.83s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:36<00:08,  8.80s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:44<00:00,  8.96s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 16
Warmup Time     : 19.804 sec
Fine-tune Time  : 44.811 sec
Measured Inference Time: 1.103717 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:12<00:12, 12.27s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:21<00:00, 11.00s/epoch]
  super()._check_params_vs_input(X, default_n_init=3)


Epoch [2/2] - Loss: 0.0000


  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_input(X, default_n_init=3)
  super()._check_params_vs_i

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:25<00:37, 12.62s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:40<00:27, 13.51s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:53<00:13, 13.22s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [01:03<00:00, 12.61s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : MiniBatchKMeans
k (clusters)    : 32
Warmup Time     : 22.007 sec
Fine-tune Time  : 63.043 sec
Measured Inference Time: 0.991542 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:09<00:09,  9.02s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:15<00:00,  7.73s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:08<00:35,  8.75s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:18<00:28,  9.47s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:28<00:19,  9.54s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:38<00:09,  9.75s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:48<00:00,  9.61s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 4
Warmup Time     : 15.470 sec
Fine-tune Time  : 48.049 sec
Measured Inference Time: 1.054472 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 35761
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:09<00:09,  9.89s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:19<00:00,  9.78s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:09<00:39,  9.84s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:19<00:29,  9.91s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:29<00:19,  9.98s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:40<00:10, 10.04s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:49<00:00,  9.92s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 8
Warmup Time     : 19.563 sec
Fine-tune Time  : 49.618 sec
Measured Inference Time: 0.996089 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 60977
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:06<00:06,  6.64s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:12<00:00,  6.45s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:10<00:40, 10.16s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:19<00:29,  9.91s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:29<00:19,  9.79s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:39<00:09,  9.76s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:48<00:00,  9.76s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 16
Warmup Time     : 12.909 sec
Fine-tune Time  : 48.792 sec
Measured Inference Time: 1.083853 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 111409
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


Training:  50%|██████████████████████████                          | 1/2 [00:10<00:10, 10.20s/epoch]

Epoch [1/2] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 2/2 [00:21<00:00, 10.86s/epoch]


Epoch [2/2] - Loss: 0.0000


Training:  20%|██████████▍                                         | 1/5 [00:10<00:40, 10.18s/epoch]

Epoch [1/5] - Loss: 0.0000


Training:  40%|████████████████████▊                               | 2/5 [00:19<00:29,  9.82s/epoch]

Epoch [2/5] - Loss: 0.0000


Training:  60%|███████████████████████████████▏                    | 3/5 [00:30<00:20, 10.09s/epoch]

Epoch [3/5] - Loss: 0.0000


Training:  80%|█████████████████████████████████████████▌          | 4/5 [00:39<00:09,  9.99s/epoch]

Epoch [4/5] - Loss: 0.0000


Training: 100%|████████████████████████████████████████████████████| 5/5 [00:48<00:00,  9.63s/epoch]


Epoch [5/5] - Loss: 0.0000

----- Experiment Result -----
Stage           : partial
Layer selection : exclude_first_last
Clustering      : EFDPC
k (clusters)    : 32
Warmup Time     : 21.738 sec
Fine-tune Time  : 48.162 sec
Measured Inference Time: 0.514715 sec
Estimated Inference Time (proportional to params): 0.419203 sec
Parameters (model): 2357937
Compressed Params (effective/codebook): 211633
Accuracy         : 1.0000
F1 Score         : 1.0000
Recall           : 1.0000


===== Summary of all experiments (first 10 shown) =====
posthoc/all/KMeans/k=4 -> params: 2357937, est_inf_time: 0.419203, acc: 1.0000, f1: 1.0000
posthoc/all/KMeans/k=8 -> params: 2357937, est_inf_time: 0.419203, acc: 1.0000, f1: 1.0000
posthoc/all/KMeans/k=16 -> params: 2357937, est_inf_time: 0.419203, acc: 1.0000, f1: 1.0000
posthoc/all/KMeans/k=32 -> params: 2357937, est_inf_time: 0.419203, acc: 1.0000, f1: 1.0000
posthoc/all/MiniBatchKMeans/k=4 -> params: 2357937, est_inf_time: 0.419203, acc: 1.0000, f1: 1.00