In [2]:
# clustering_and_autoencoder.py
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import pairwise_distances_argmin_min
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import torch.nn as nn
import torch

from resnet_extractor import extract_embeddings

# Load embeddings
X, filenames = extract_embeddings("test_images")

# -------- KMEANS --------
kmeans = KMeans(n_clusters=3, random_state=0).fit(X)
closest, distances = pairwise_distances_argmin_min(X, kmeans.cluster_centers_)

# -------- AUTOENCODER --------
class AutoEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
        )
        self.decoder = nn.Sequential(
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 512),
        )

    def forward(self, x):
        z = self.encoder(x)
        return self.decoder(z)

model = AutoEncoder()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

X_tensor = torch.tensor(X, dtype=torch.float32)
for epoch in range(30):
    output = model(X_tensor)
    loss = criterion(output, X_tensor)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

recon = model(X_tensor).detach().numpy()
recon_error = np.mean((X - recon) ** 2, axis=1)

# -------- PLOT --------
plt.hist(distances, bins=20, alpha=0.5, label="KMeans Deviation")
plt.hist(recon_error, bins=20, alpha=0.5, label="Autoencoder Error")
plt.legend()
plt.title("Deviation Score Comparison")
plt.savefig("score_comparison.png")
plt.show()


ModuleNotFoundError: No module named 'sklearn'