<a href="https://colab.research.google.com/github/shreyasvinaya/SAiDL-Summer-Assignment-2023/blob/main/SAIDL_Assignment_CORE_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# !pip install tensorflow_probability

In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.utils import to_categorical


import tensorflow_probability as tfp


In [2]:
# Load CIFAR-100 dataset
(train_images, train_labels), (test_images, test_labels) = datasets.cifar100.load_data()
train_images, test_images = train_images / 255.0, test_images / 255.0
train_labels, test_labels = to_categorical(train_labels), to_categorical(test_labels)


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz


## Standard Softmax

In [3]:
# CNN model with standard softmax
model = models.Sequential([
    
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.5),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.Conv2D(256, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.5),

    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dense(100, activation='softmax')
])
model.summary()
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(train_images, train_labels, epochs=30,
                    validation_data=(test_images, test_labels))



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 30, 30, 32)        896       
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 64)        18496     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 64)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 14, 14, 64)        0         
                                                                 
 conv2d_2 (Conv2D)           (None, 12, 12, 128)       73856     
                                                                 
 conv2d_3 (Conv2D)           (None, 10, 10, 256)       295168    
                                                        

In [4]:
# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f'Test accuracy: {test_acc}')

Test accuracy: 0.4171000123023987


In [5]:
# Calculate other metrics (precision, recall, F1 score, confusion matrix) using test set predictions
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

In [6]:
predictions = model.predict(test_images)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(test_labels, axis=1)

print(classification_report(true_classes, predicted_classes))
print(confusion_matrix(true_classes, predicted_classes))

              precision    recall  f1-score   support

           0       0.68      0.67      0.68       100
           1       0.58      0.53      0.55       100
           2       0.27      0.35      0.31       100
           3       0.25      0.19      0.22       100
           4       0.12      0.42      0.19       100
           5       0.37      0.34      0.35       100
           6       0.46      0.47      0.46       100
           7       0.52      0.45      0.48       100
           8       0.45      0.53      0.49       100
           9       0.68      0.50      0.57       100
          10       0.39      0.22      0.28       100
          11       0.24      0.21      0.22       100
          12       0.45      0.41      0.43       100
          13       0.39      0.35      0.37       100
          14       0.56      0.34      0.42       100
          15       0.35      0.24      0.29       100
          16       0.61      0.35      0.45       100
          17       0.62    

## Gumbell Softmax

In [8]:
tfd = tfp.distributions




In [9]:

# Custom Gumbel-Softmax layer
class GumbelSoftmaxLayer(layers.Layer):
    def __init__(self, num_classes, temperature, **kwargs):
        super(GumbelSoftmaxLayer, self).__init__(**kwargs)
        self.num_classes = num_classes
        self.temperature = temperature

    def call(self, inputs):
        gumbel_dist = tfd.RelaxedOneHotCategorical(self.temperature, logits=inputs)
        return gumbel_dist.sample()


In [13]:

# CNN model with Gumbel-Softmax
model_gumbel = models.Sequential([
    
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.5),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.Conv2D(256, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.5),

    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dense(100),
    GumbelSoftmaxLayer(100, temperature=0.5)
])

model_gumbel.compile(optimizer='adam',
                     loss='categorical_crossentropy',
                     metrics=['accuracy'])

# Train the model
history_gumbel = model_gumbel.fit(train_images, train_labels, epochs=30,
                                  validation_data=(test_images, test_labels))



Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [14]:
# Evaluate the model
test_loss_gumbel, test_acc_gumbel = model_gumbel.evaluate(test_images, test_labels)
print(f'Test accuracy (Gumbel-Softmax): {test_acc_gumbel}')



Test accuracy (Gumbel-Softmax): 0.32409998774528503


In [15]:
# Calculate other metrics (precision, recall, F1 score, confusion matrix) using test set predictions
predictions_gumbel = model_gumbel.predict(test_images)
predicted_classes_gumbel = np.argmax(predictions_gumbel, axis=1)

print(classification_report(true_classes, predicted_classes_gumbel))
print(confusion_matrix(true_classes, predicted_classes_gumbel))

              precision    recall  f1-score   support

           0       0.67      0.58      0.62       100
           1       0.42      0.48      0.45       100
           2       0.23      0.19      0.21       100
           3       0.12      0.08      0.10       100
           4       0.10      0.17      0.12       100
           5       0.36      0.21      0.26       100
           6       0.28      0.46      0.35       100
           7       0.36      0.40      0.38       100
           8       0.30      0.37      0.33       100
           9       0.63      0.40      0.49       100
          10       0.19      0.14      0.16       100
          11       0.22      0.11      0.15       100
          12       0.31      0.28      0.30       100
          13       0.34      0.30      0.32       100
          14       0.29      0.23      0.26       100
          15       0.26      0.20      0.23       100
          16       0.42      0.31      0.36       100
          17       0.59    

## Bonus Task

In [16]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.29.2-py3-none-any.whl (7.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m47.3 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m35.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.29.2


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from transformers import ViTFeatureExtractor, ViTForImageClassification

# Load and preprocess the CIFAR-100 dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_data = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
test_data = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

# Define the transformer-based architecture
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224', num_labels=100, ignore_mismatched_sizes=True)

# Replace the standard softmax with alternative softmax function (e.g., Gumbel-Softmax)
# Implement the alternative softmax function here

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Compile the model (optimizer, loss function, and evaluation metrics)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        data, targets = data.to(device), targets.to(device)

        # Forward pass
        outputs = model(data)
        loss = criterion(outputs.logits, targets)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()

        # Update weights
        optimizer.step()

# Evaluate the model
correct = 0
total = 0
with torch.no_grad():
    for data, targets in test_loader:
        data, targets = data.to(device), targets.to(device)
        outputs = model(data)
        _, predicted = torch.max(outputs.logits.data, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

print(f'Accuracy: {100 * correct / total}%')

Files already downloaded and verified
Files already downloaded and verified


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([100, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([100]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
