In [5]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.model_selection import train_test_split

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets
from tqdm import tqdm
from torchvision import transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim

In [7]:
# CNN module without encoder and decoder
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=69, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=1)

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(2176, 128)  # Adjust input features based on your image size
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.pool2(x)

        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)

        return x



In [8]:
# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')   

In [9]:
# Hyperparameters
num_classes = 10
learning_rate =0.001
batch_size = 50
num_epochs = 10

In [10]:
images = np.load('images.npy')
labels = np.load('./labels.npy')

In [11]:
tensor_data = torch.from_numpy(images).float()
tensor_labels = torch.from_numpy(labels).long()

# Split the dataset into training and test sets
test_size = 0.2  
train_data, test_data, train_labels, test_labels = train_test_split(
    tensor_data, tensor_labels, test_size=test_size, random_state=2
)

# Create PyTorch datasets
train_dataset = TensorDataset(train_data, train_labels)
test_dataset = TensorDataset(test_data, test_labels)

# Create PyTorch DataLoaders
batch_size = 50  # Adjust as needed
train_dataloader = DataLoader(train_dataset,batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset,batch_size=batch_size, shuffle=False)

In [12]:
model=CNN(num_classes=10).to(device)
optimizer=torch.optim.Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

In [13]:
train_count = len(train_data)
test_count = len(test_data)
print(train_count)

17428


In [14]:
#Model training and saving best model
from torch.autograd import Variable
best_accuracy=0.0

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    
    for i, (images,labels) in enumerate(train_dataloader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        optimizer.zero_grad()
        
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    
    # Evaluation on testing dataset
    model.eval()
    
    test_accuracy=0.0
    for i, (images,labels) in enumerate(test_dataloader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
    
    test_accuracy=test_accuracy/test_count
    
    
    print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))


Epoch: 0 Train Loss: tensor(1.1140) Train Accuracy: 0.5817649759008492 Test Accuracy: 0.6414964425063117
Epoch: 1 Train Loss: tensor(0.8761) Train Accuracy: 0.6744319485884783 Test Accuracy: 0.6972687629102593
Epoch: 2 Train Loss: tensor(0.7944) Train Accuracy: 0.7046132660087217 Test Accuracy: 0.6991048886848749
Epoch: 3 Train Loss: tensor(0.7340) Train Accuracy: 0.7287124168005509 Test Accuracy: 0.7399586871700712
Epoch: 4 Train Loss: tensor(0.7037) Train Accuracy: 0.7389258664218499 Test Accuracy: 0.7477622217121873
Epoch: 5 Train Loss: tensor(0.6654) Train Accuracy: 0.7526968097314666 Test Accuracy: 0.7254991966949736
Epoch: 6 Train Loss: tensor(0.6425) Train Accuracy: 0.7608446178563232 Test Accuracy: 0.7553362405324765
Epoch: 7 Train Loss: tensor(0.6149) Train Accuracy: 0.7742139086527428 Test Accuracy: 0.7482212531558412
Epoch: 8 Train Loss: tensor(0.5945) Train Accuracy: 0.782648611429883 Test Accuracy: 0.7206793665366078
Epoch: 9 Train Loss: tensor(0.5704) Train Accuracy: 0.79

In [19]:
"""In general, using a Variational Autoencoder (VAE) for image classification may not yield competitive results compared to
a well-designed Convolutional Neural Network (CNN). The primary purpose of a VAE is to learn a probabilistic latent space 
representation of input data, which is useful for generative tasks and image reconstruction. However, VAEs are not specifically
designed for classification tasks."""

"""A Variational Autoencoder (VAE) is a type of generative model in the family of autoencoders. Unlike traditional autoencoders,
VAEs are designed to learn a probabilistic representation of input data, typically for the purpose of generating new samples that
resemble the training data."""

'''The provided PyTorch code implements a Convolutional Variational Autoencoder (ConvVAE) for image classification. 
The ConvVAE architecture consists of an encoder, which progressively reduces the spatial dimensions of the input image
through convolutional layers, and a decoder, which reconstructs the original input from the latent space representation.
The reparameterization trick introduces stochasticity during training by sampling from the latent space using the mean
and log variance. The training process involves minimizing a loss function that combines Binary Cross Entropy (BCE) loss
for image reconstruction and Kullback-Leibler Divergence (KLD) loss for regularization. The code includes functions for both
training and validation, iterating through epochs and printing the training and validation losses along with accuracy.
The model's performance is evaluated on a dataset split into training and testing sets, and a learning rate scheduler is
employed to adjust the learning rate during training. While VAEs are known for generative tasks, this code demonstrates their 
application to a classification task by combining the strengths of both generative and discriminative approaches.'''


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm
import torchvision.transforms as transforms


# Define the Variational Autoencoder (VAE) architecture
class ConvVAE(nn.Module):
    def __init__(self, image_channels=3, kernel_size=4, latent_dim=256, init_channels=8):
        super(ConvVAE, self).__init__()

        # Encoder
        self.enc1 = nn.Conv2d(
            in_channels=image_channels,
            out_channels=init_channels,
            kernel_size=kernel_size,
            stride=2,
            padding=1
        )
        self.enc2 = nn.Conv2d(
            in_channels=init_channels,
            out_channels=init_channels*2,
            kernel_size=kernel_size,
            stride=2,
            padding=1
        )
        self.enc3 = nn.Conv2d(
            in_channels=init_channels*2,
            out_channels=init_channels*4,
            kernel_size=kernel_size,
            stride=2,
            padding=1
        )
        self.enc4 = nn.Conv2d(
            in_channels=init_channels*4,
            out_channels=64,
            kernel_size=kernel_size,
            stride=2,
            padding=0
        )

        # Fully connected layers for learning representations
        self.fc1 = nn.Linear(64, 128)
        self.fc_mu = nn.Linear(128, latent_dim)
        self.fc_log_var = nn.Linear(128, latent_dim)
        self.fc2 = nn.Linear(latent_dim, 64)

        # Decoder
        self.dec1 = nn.ConvTranspose2d(
            in_channels=64, out_channels=init_channels*8, kernel_size=kernel_size, stride=1, padding=0
        )
        self.dec2 = nn.ConvTranspose2d(
            in_channels=init_channels*8, out_channels=init_channels*4, kernel_size=kernel_size, stride=2, padding=1
        )
        self.dec3 = nn.ConvTranspose2d(
            in_channels=init_channels*4, out_channels=init_channels*2, kernel_size=kernel_size, stride=2, padding=1
        )
        self.dec4 = nn.ConvTranspose2d(
            in_channels=init_channels*2, out_channels=image_channels, kernel_size=kernel_size, stride=2, padding=1
        )

    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5*log_var)
        eps = torch.randn_like(std)
        sample = mu + (eps * std)
        return sample

    def forward(self, x):
        x = F.relu(self.enc1(x))
        x = F.relu(self.enc2(x))
        x = F.relu(self.enc3(x))
        x = F.relu(self.enc4(x))
        batch, _, _, _ = x.shape
        x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1)
        hidden = self.fc1(x)
        mu = self.fc_mu(hidden)
        log_var = self.fc_log_var(hidden)
        z = self.reparameterize(mu, log_var)
        z = self.fc2(z)
        z = z.view(batch, 64, 1, 1)  # Adjust the size before decoding
        x = F.relu(self.dec1(z))
        x = F.relu(self.dec2(x))
        x = F.relu(self.dec3(x))
        reconstruction = torch.sigmoid(self.dec4(x))
        return reconstruction, mu, log_var

# Function for calculating the final loss of VAE
def final_loss(bce_loss, mu, logvar):
    BCE = bce_loss
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD
    
# Training function
def train(model, dataloader, dataset_size, device, optimizer, criterion):
    model.train()
    running_loss = 0.0
    counter = 0
    correct_predictions = 0
    total_samples = dataset_size
    for i, data in tqdm(enumerate(dataloader), total=int(dataset_size/dataloader.batch_size)):
        counter += 1
        data, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        reconstruction, mu, logvar = model(data)
        bce_loss = criterion(reconstruction, data)
        loss = final_loss(bce_loss, mu, logvar)
        loss.backward()
        running_loss += loss.item()
        optimizer.step()
        # Get predicted labels from reconstructed images
        _, predicted_labels = torch.max(reconstruction, 1)
        predicted_labels = predicted_labels.view(-1)
        predicted_labels = predicted_labels[:len(labels)]
        correct_predictions += (predicted_labels== labels).sum().item()
        if i == int(dataset_size/dataloader.batch_size) - 1:
            recon_images = reconstruction

    train_loss = running_loss / counter
    accuracy = correct_predictions / total_samples
    return accuracy,train_loss


# validation function
def validate(model, dataloader, dataset, device, criterion):
    model.eval()
    running_loss = 0.0
    counter = 0
    correct_predictions = 0
    total_samples = dataset
    with torch.no_grad():
        for i, data in tqdm(enumerate(dataloader), total=int(dataset/dataloader.batch_size)):
            counter += 1
            data, labels = data[0].to(device), data[1].to(device)
            reconstruction, mu, logvar = model(data)
            bce_loss = criterion(reconstruction, data)
            loss = final_loss(bce_loss, mu, logvar)
            running_loss += loss.item()
            # Get predicted labels from reconstructed images
            _, predicted_labels = torch.max(reconstruction, 1)
            predicted_labels = predicted_labels.view(-1)
            predicted_labels = predicted_labels[:len(labels)]
            correct_predictions += (predicted_labels== labels).sum().item()
            if i == int(dataset/dataloader.batch_size) - 1:
                recon_images = reconstruction

    val_loss = running_loss / counter
    val_accuracy = correct_predictions / total_samples
    return val_loss, recon_images,val_accuracy


# Set device (GPU if available, else CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize VAE model
model = ConvVAE().to(device)

# Set hyperparameters
lr = 0.001
epochs = 10
batch_size = 64

# loading image and label
images = np.load('images.npy')
labels = np.load('labels.npy')

# Convert to PyTorch tensors
tensor_data = torch.from_numpy(images.transpose(0, 3, 1, 2)).float()
tensor_labels = torch.from_numpy(labels).long()


# Image resizing transformation
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((32, 32)),
    transforms.ToTensor()
])

# Apply the transformation to the tensor_data
tensor_data_resized = torch.stack([transform(img) for img in tensor_data])

# Split the dataset into training and test sets
test_size = 0.2
train_data, test_data, train_labels, test_labels = train_test_split(
    tensor_data_resized, tensor_labels, test_size=test_size, random_state=2
)

# Create PyTorch datasets
train_dataset = TensorDataset(train_data, train_labels)
test_dataset = TensorDataset(test_data, test_labels)

# Create PyTorch DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Setup optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)
# Set up criterion (Mean Squared Error Loss)
criterion = nn.MSELoss(reduction='mean')

# Lists to store training and validation losses
train_loss = []
valid_loss = []

# Learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

for epoch in range(epochs):
    print(f"Epoch {epoch+1} of {epochs}")

    # Training phase
    train_accuracy,train_epoch_loss = train(
        model, train_dataloader, len(train_dataset), device, optimizer, criterion
    )

    # Validation phase
    valid_epoch_loss, recon_images,val_accuracy = validate(
        model, test_dataloader, len(test_dataset), device, criterion
    )

    # Store losses in lists
    train_loss.append(train_epoch_loss)
    valid_loss.append(valid_epoch_loss)

     # Print and log results
    print(f"Train Loss : {train_epoch_loss:.4f}")
    print(f"Val Loss : {valid_epoch_loss:.4f}")
    print(f"Train accuracy : {train_accuracy:.4f}")
    print(f"Test accuracy : {val_accuracy:.4f}")

    # Adjust learning rate
    scheduler.step()


Epoch 1 of 10


273it [00:02, 100.53it/s]                                                                                                                                  
69it [00:00, 276.42it/s]                                                                                                                                   


Train Loss : 0.9362
Val Loss : 0.0131
Train accuracy : 0.3044
Test accuracy : 0.3036
Epoch 2 of 10


273it [00:02, 107.97it/s]                                                                                                                                  
69it [00:00, 251.92it/s]                                                                                                                                   


Train Loss : 0.0126
Val Loss : 0.0123
Train accuracy : 0.3176
Test accuracy : 0.3124
Epoch 3 of 10


273it [00:02, 101.90it/s]                                                                                                                                  
69it [00:00, 260.86it/s]                                                                                                                                   


Train Loss : 0.0123
Val Loss : 0.0120
Train accuracy : 0.3162
Test accuracy : 0.3105
Epoch 4 of 10


273it [00:02, 102.88it/s]                                                                                                                                  
69it [00:00, 226.64it/s]                                                                                                                                   


Train Loss : 0.0120
Val Loss : 0.0119
Train accuracy : 0.3128
Test accuracy : 0.3108
Epoch 5 of 10


273it [00:02, 106.57it/s]                                                                                                                                  
69it [00:00, 244.75it/s]                                                                                                                                   


Train Loss : 0.0120
Val Loss : 0.0118
Train accuracy : 0.3129
Test accuracy : 0.3108
Epoch 6 of 10


273it [00:02, 102.65it/s]                                                                                                                                  
69it [00:00, 239.76it/s]                                                                                                                                   


Train Loss : 0.0120
Val Loss : 0.0118
Train accuracy : 0.3126
Test accuracy : 0.3110
Epoch 7 of 10


273it [00:02, 102.90it/s]                                                                                                                                  
69it [00:00, 253.89it/s]                                                                                                                                   


Train Loss : 0.0119
Val Loss : 0.0118
Train accuracy : 0.3134
Test accuracy : 0.3112
Epoch 8 of 10


273it [00:02, 105.43it/s]                                                                                                                                  
69it [00:00, 230.42it/s]                                                                                                                                   


Train Loss : 0.0119
Val Loss : 0.0118
Train accuracy : 0.3126
Test accuracy : 0.3115
Epoch 9 of 10


273it [00:02, 104.11it/s]                                                                                                                                  
69it [00:00, 247.63it/s]                                                                                                                                   


Train Loss : 0.0119
Val Loss : 0.0118
Train accuracy : 0.3141
Test accuracy : 0.3110
Epoch 10 of 10


273it [00:02, 98.24it/s]                                                                                                                                   
69it [00:00, 177.44it/s]                                                                                                                                   

Train Loss : 0.0119
Val Loss : 0.0118
Train accuracy : 0.3135
Test accuracy : 0.3112



