In [None]:
import torch
from torch.utils.data import DataLoader
from models.cvae.feature_extraction import extract_features
from models.cvae.cvae import get_model
import os
import cv2
import numpy as np
from modules.dataset import SignatureFigDataset
import yaml
import random
from modules.clustering_utils import find_optimal_clusters, plot_results, analyze_clusters
from collections import defaultdict
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [None]:
with open("config/config.yaml", "r") as f:
    try:
        config = yaml.safe_load(f)
    except yaml.YAMLError as exc:
        print(exc)
print(config)

seed = config['train_params']['seed']
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
if device == 'cuda':
    torch.cuda.manual_seed_all(seed)

In [None]:
# Data
signatures_fig_path = os.path.join("data", "a_fig")

if not os.path.exists(signatures_fig_path):
    raise FileNotFoundError(f"The directory {signatures_fig_path} does not exist.")

signature_figs = []
filenames = []  # New list to store filenames
for signature_fig_file in os.listdir(signatures_fig_path):
    # Store filename without extension
    filenames.append(os.path.splitext(signature_fig_file)[0])

    signature_fig_file_path = os.path.join(signatures_fig_path, signature_fig_file)
    signature_fig = cv2.imread(signature_fig_file_path, cv2.IMREAD_GRAYSCALE)
    signature_fig = signature_fig.reshape(1, *signature_fig.shape, 1)
    signature_figs.append(signature_fig)

signature_figs = np.concatenate(signature_figs, axis=0) # (N, H, W, C)

signature_fig_dataset = SignatureFigDataset(signature_figs, filenames)

dataloader = DataLoader(signature_fig_dataset, batch_size=config['train_params']['batch_size'], shuffle=True)

model = get_model(config).to(device)

In [None]:
model_path = os.path.join(config['train_params']['task_name'], config['train_params']['ckpt_name'])

model = get_model(config).to(device)
model.load_state_dict(torch.load(model_path))

features, filenames = extract_features(model, dataloader, device)

max_clusters = 10
optimal_n_clusters, silhouette_scores = find_optimal_clusters(features, max_clusters)
print(f"\nOptimal number of clusters: {optimal_n_clusters}")

kmeans = KMeans(n_clusters=optimal_n_clusters, random_state=42)
cluster_labels = kmeans.fit_predict(features)

# Create cluster mapping
clusters = defaultdict(list)
for filename, label in zip(filenames, cluster_labels):
    clusters[int(label)].append(filename)

# Reduce dimensionality for visualization
pca = PCA(n_components=2)
features_2d = pca.fit_transform(features)

plot_results(features_2d, cluster_labels, silhouette_scores, 
            'K-means Clustering of CVAE Features', 
            'Silhouette', start_n_clusters=2)

# Print cluster analysis
analyze_clusters(clusters)
