In [None]:
import torchvision.models as models

# Load a pre-trained ResNet model
model = models.resnet18(pretrained=True)
model.eval()


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 75.1MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from sklearn.mixture import GaussianMixture
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score
import json
from google.colab import drive

# Step 1: Load the CIFAR-10 test dataset and extract features from the neural network
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False)

# Load a pre-trained model (e.g., ResNet18)
import torchvision.models as models
model = models.resnet18(pretrained=True)
model.eval()

features = []
true_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        output = model(images)
        features.append(output)
        true_labels.extend(labels.numpy())

features = torch.cat(features).numpy()

# Step 2: Apply Gaussian Mixture Model (GMM)
n_components = 10  # Assuming 10 clusters for CIFAR-10
gmm = GaussianMixture(n_components=n_components, random_state=42)
gmm.fit(features)

# Predict clusters
predicted_clusters = gmm.predict(features)

# Step 3: Evaluate Clustering Performance
ari = adjusted_rand_score(true_labels, predicted_clusters)
nmi = normalized_mutual_info_score(true_labels, predicted_clusters)

print(f"Adjusted Rand Index (ARI): {ari:.4f}")
print(f"Normalized Mutual Information (NMI): {nmi:.4f}")

# Step 4: Save Cluster Predictions
cluster_dict = {f"image_{i:03}": int(cluster) for i, cluster in enumerate(predicted_clusters)}

# Mount Google Drive
drive.mount('/content/drive')

# Define the path where you want to save the file
save_path_json = '/content/drive/My Drive/submission/gmm_clusters.json'
save_path_txt = '/content/drive/My Drive/submission/gmm_clusters.txt'

# Save as JSON
with open(save_path_json, 'w') as f:
    json.dump(cluster_dict, f)

# Save as TXT
with open(save_path_txt, 'w') as f:
    for i, cluster in enumerate(predicted_clusters):
        f.write(f"image_{i:03} {int(cluster)}\n")


Files already downloaded and verified
Adjusted Rand Index (ARI): 0.0807
Normalized Mutual Information (NMI): 0.1568
Mounted at /content/drive
