In [2]:
import torch
from torch import nn
import torchvision
from torchvision import transforms
from collections import defaultdict
import matplotlib.pyplot as plt
from sklearn import manifold
from torch.utils.data import DataLoader
import numpy as np
device='cpu'

In [3]:
# Define a Convolutional Autoencoder model
class ConvAutoencoder(nn.Module):
    def __init__(self, embedding_dim=8):
        super(ConvAutoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=0),
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(128, 128, kernel_size=3, stride=2, padding=0, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=1, padding=1),
              nn.ReLU(),
            nn.ConvTranspose2d(32, 32, kernel_size=3, stride=2, padding=0, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 1, kernel_size=3, stride=1, padding=0),
            nn.Sigmoid(),
        )
        self.embedding_encoder = nn.Sequential(
            nn.Linear(128 * 2 * 2, 512),
            nn.ReLU(),
            nn.Linear(512, embedding_dim)
            )
        self.embedding_decoder = nn.Sequential(
            nn.Linear(embedding_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 128 * 2 * 2)
            )
        self.embedding_classifier = nn.Sequential(
            nn.Linear(embedding_dim,64),
            nn.ReLU(),
            nn.Linear(64,64),
            nn.ReLU(),
            nn.Linear(64,62))

    def forward(self, x):
        x = self.encoder(x)
        x = x.view(x.size(0), -1)  # Flatten        
        embedding = self.embedding_encoder(x)
        logits = self.embedding_classifier(embedding)
        #embedding = embedding/torch.norm(embedding,p=2,dim=-1,keepdim=True)
        xhat = self.decoder(self.embedding_decoder(embedding).view(x.size(0), 128, 2, 2))
        return xhat, embedding,logits

In [4]:
batch_size=128

In [5]:
transform = transforms.ToTensor()
emnist_data = torchvision.datasets.EMNIST(root='./data', train=True,split='byclass', download=True, transform=transform)
emnist_test_loader = DataLoader(emnist_data, batch_size=batch_size, shuffle=False)
# Load your trained autoencoder
autoencoder = ConvAutoencoder(embedding_dim=6).to(device)
autoencoder.load_state_dict(torch.load('AE_EMNIST_1.pt'))  # Load your model
autoencoder.eval()

HTTPError: HTTP Error 503: Service Unavailable

In [18]:
autoencoder.eval()
test_encode, test_targets,X_list,label_list = [], [],[],[]
for x_val, y_val in emnist_test_loader:
    x_val = x_val.to(device)

    xhat,zhat,_ = autoencoder(x_val)
    # yhat = model.decoder(zhat)
    test_encode.append(zhat.detach())
    test_targets.append(y_val.detach())
    X_list.append(zhat.detach().numpy())
    label_list.append(y_val.detach().numpy())
X_list=np.vstack(X_list)
label_list=np.concatenate(label_list)
EMNIST=(X_list,label_list)
torch.save(EMNIST,'../data/EMNIST.pt')

print('Embeddings are calculated')


Embeddings are calculated


In [23]:
selected_labels=np.random.randint(0,20,10)
test_encode = torch.cat(test_encode).cpu().numpy()
test_targets = torch.cat(test_targets).cpu().numpy()

# Select a subset of classes
selected_classes = np.random.randint(0,20,10)  # Replace with your chosen class indices
mask = np.isin(test_targets, selected_classes)

# Filter the data
z_subset = test_encode[mask]
Y_subset = test_targets[mask]

# Apply t-SNE to the subset
tsne = manifold.TSNE(n_components=2, init="pca", random_state=0)
X_2d_subset = tsne.fit_transform(z_subset)

TypeError: cat(): argument 'tensors' (position 1) must be tuple of Tensors, not numpy.ndarray

In [None]:
import numpy as np

In [1]:
fig = plt.figure(figsize=(10, 10))

LABELS = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 
          'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
          'a', 'b', 'd', 'e', 'f', 'g', 'h', 'n', 'q', 'r', 't']

# Iterate over each class in the selected_classes and plot them separately
for class_index in np.unique(Y_subset):
    # Select data points that belong to the current class
    indices = Y_subset == class_index
    plt.scatter(X_2d_subset[indices, 0], X_2d_subset[indices, 1], label=f' Labels[class_index]', s=1)
plt.legend(bbox_to_anchor=(0.63, 0.6), loc="upper left")

# Adding legend
#plt.legend()

# Show the plot
plt.show()

NameError: name 'plt' is not defined