# SHUBHAM SHARMA
## IIT BOMBAY
This code contains a basic model of VGG for 3D MNIST dataset. It can be an introductory code for understanding 3D images

In [0]:
#For preparing the dataset
import h5py
import numpy as np
#For making the VGG model
import torch
from torch import nn, optim
import torchvision
from torchvision import datasets, transforms
from torchsummary import summary
import torch.nn.functional as F

In [0]:
with h5py.File("full_dataset_vectors.h5", "r") as hf:   
    ls=list(hf.keys())
    print('List of datasets in this file : \n',ls)

List of datasets in this file : 
 ['X_test', 'X_train', 'y_test', 'y_train']


In [0]:
with h5py.File("full_dataset_vectors.h5", "r") as hf:   
    X_train = hf["X_train"][:]
    y_train = hf["y_train"][:]    
    X_test = hf["X_test"][:]  
    y_test = hf["y_test"][:]
    
X_train=X_train.reshape(10000,1, 16,16,16)
X_test=X_test.reshape(2000,1, 16,16,16)
print('The shape of X_train is :', X_train.shape)
print('The shape of y_train is :', y_train.shape)
print('The shape of X_test is :', X_test.shape)
print('The shape of y_test is :', y_test.shape)

The shape of X_train is : (10000, 1, 16, 16, 16)
The shape of y_train is : (10000,)
The shape of X_test is : (2000, 1, 16, 16, 16)
The shape of y_test is : (2000,)


In [0]:
print('The max of X_train is :',np.max(X_train))
print('The min of X_train is :',np.min(X_train))
X_train = torch.from_numpy(X_train)
X_test = torch.from_numpy(X_test)
y_train = torch.from_numpy(y_train)
y_test = torch.from_numpy(y_test)

The max of X_train is : 1.0
The min of X_train is : 0.0


Lets work on the model now

In [0]:
#CHecking the number of GPU and then setting the GPU id
print(torch.cuda.current_device())#To know thw current active device
print(torch.cuda.get_device_capability())#the major and minor cuda capability of the device
device = torch.device('cuda')

0
(7, 5)


In [0]:
class Net(nn.Module):
  
  def __init__(self):
    super(Net,self).__init__()
    
    
    #For first layer input=(1,16,16,16)
    self.conv1a=nn.Conv3d(in_channels=1, out_channels=64, kernel_size=(3,3,3), stride=1, padding=1)
    self.conv1b=nn.Conv3d(in_channels=64, out_channels=64, kernel_size=(3,3,3), stride=1, padding=1)
    self.BN1=nn.BatchNorm3d(64)
    
    #For second layer input=(64,8,8,8)
    self.conv2a=nn.Conv3d(in_channels=64, out_channels=128, kernel_size=(3,3,3), stride=1, padding=1)
    self.conv2b=nn.Conv3d(in_channels=128, out_channels=128, kernel_size=(3,3,3), stride=1, padding=1)
    self.BN2=nn.BatchNorm3d(128)

    #For third layer input=(128,4,4,4)
    self.conv3a=nn.Conv3d(in_channels=128, out_channels=256, kernel_size=(3,3,3), stride=1, padding=1)
    self.conv3b=nn.Conv3d(in_channels=256, out_channels=256, kernel_size=(3,3,3), stride=1, padding=1)
    self.BN3=nn.BatchNorm3d(256)

    #For forth layer input=(256,2,2,2)
    self.conv4a=nn.Conv3d(in_channels=256, out_channels=512, kernel_size=(3,3,3), stride=1, padding=1)
    self.conv4b=nn.Conv3d(in_channels=512, out_channels=512, kernel_size=(3,3,3), stride=1, padding=1)
    self.BN4=nn.BatchNorm3d(512)
    
    #Max pool layer
    self.max_pool=nn.MaxPool3d(kernel_size=(2,2,2), stride=2)

    #Now thw fully connected layers
    self.fc1 = nn.Linear(512,100)
    self.fc2 = nn.Linear(100,10)
    
    #Defining activations
    self.relu = nn.ReLU()
    
    ####################################################################
  def forward(self,x):
    y=self.conv1a(x)
    y=self.BN1(y)
    y=self.relu(y)
    y=self.conv1b(y)
    y=self.BN1(y)
    y=self.relu(y)
    y=self.max_pool(y)
    ##
    y=self.conv2a(y)
    y=self.BN2(y)
    y=self.relu(y)
    y=self.conv2b(y)
    y=self.BN2(y)
    y=self.relu(y)
    y=self.max_pool(y)
    ##
    y=self.conv3a(y)
    y=self.BN3(y)
    y=self.relu(y)
    y=self.conv3b(y)
    y=self.BN3(y)
    y=self.relu(y)
    y=self.max_pool(y)
    ##
    y=self.conv4a(y)
    y=self.BN4(y)
    y=self.relu(y)
    y=self.conv4b(y)
    y=self.BN4(y)
    y=self.relu(y)
    y=self.max_pool(y)
    ## Fully Connected Layers
    y=y.view(-1, 512)
    y=self.fc1(y)
    y=self.relu(y)
    y=self.fc2(y)
    
    return y

      
      
##########################################################################
model = Net().to(device)#not necessary to add to device
summary(model, input_size=(1, 16, 16, 16))
    
    

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv3d-1       [-1, 64, 16, 16, 16]           1,792
       BatchNorm3d-2       [-1, 64, 16, 16, 16]             128
              ReLU-3       [-1, 64, 16, 16, 16]               0
            Conv3d-4       [-1, 64, 16, 16, 16]         110,656
       BatchNorm3d-5       [-1, 64, 16, 16, 16]             128
              ReLU-6       [-1, 64, 16, 16, 16]               0
         MaxPool3d-7          [-1, 64, 8, 8, 8]               0
            Conv3d-8         [-1, 128, 8, 8, 8]         221,312
       BatchNorm3d-9         [-1, 128, 8, 8, 8]             256
             ReLU-10         [-1, 128, 8, 8, 8]               0
           Conv3d-11         [-1, 128, 8, 8, 8]         442,496
      BatchNorm3d-12         [-1, 128, 8, 8, 8]             256
             ReLU-13         [-1, 128, 8, 8, 8]               0
        MaxPool3d-14         [-1, 128, 

In [0]:
lr=1e-5
num_of_epochs=50
# Loss and optimizer
loss_fn = nn.CrossEntropyLoss()#NO NEED TO BE USING SOFTMAX WHEN USING Crioss Entropy loss
optimizer = optim.Adam(model.parameters(), lr=lr)  

In [0]:
trainset = torch.utils.data.TensorDataset(X_train,y_train)# create your datset
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

valset = torch.utils.data.TensorDataset(X_test,y_test)# create your datset
valloader = torch.utils.data.DataLoader(valset, batch_size=1, shuffle=True)
no_of_epochs=20

In [0]:
#Training the model
total_step = len(trainloader)
for epochs in range(num_of_epochs):
  for i, (images, labels) in enumerate(trainloader):  
    #CHECK THE SHAPE OF BOTH IMAGES AND LABELS
    images = images.to(device)
    images= images.float()
    labels = labels.to(device)  
    
    #Forward pass
    outputs = model(images)
    loss = loss_fn(outputs, labels)
    
    # Backpropagation and then optimization
    optimizer.zero_grad()#Initially setting the gradient values to zero so backward() can find the gradient
    loss.backward()#backpropagate and then optimize
    optimizer.step()
    if (i+1) % 100 == 0:
      print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
             .format(epochs+1, num_of_epochs, i+1, total_step, loss.item()))

    
    
    

Epoch [1/50], Step [100/157], Loss: 0.9078
Epoch [2/50], Step [100/157], Loss: 0.5899
Epoch [3/50], Step [100/157], Loss: 0.5003
Epoch [4/50], Step [100/157], Loss: 0.3832
Epoch [5/50], Step [100/157], Loss: 0.2903
Epoch [6/50], Step [100/157], Loss: 0.2276
Epoch [7/50], Step [100/157], Loss: 0.1806
Epoch [8/50], Step [100/157], Loss: 0.0388
Epoch [9/50], Step [100/157], Loss: 0.1306
Epoch [10/50], Step [100/157], Loss: 0.0444
Epoch [11/50], Step [100/157], Loss: 0.0170
Epoch [12/50], Step [100/157], Loss: 0.1545
Epoch [13/50], Step [100/157], Loss: 0.0051
Epoch [14/50], Step [100/157], Loss: 0.0427
Epoch [15/50], Step [100/157], Loss: 0.0208
Epoch [16/50], Step [100/157], Loss: 0.0044
Epoch [17/50], Step [100/157], Loss: 0.0674
Epoch [18/50], Step [100/157], Loss: 0.0236
Epoch [19/50], Step [100/157], Loss: 0.1079
Epoch [20/50], Step [100/157], Loss: 0.1335
Epoch [21/50], Step [100/157], Loss: 0.0186
Epoch [22/50], Step [100/157], Loss: 0.0182
Epoch [23/50], Step [100/157], Loss: 0.04

In [0]:
#For Testing
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
  correct=0
  total=0
  for images, labels in valloader:
    images=images.to(device)
    images= images.float()
    labels=labels.to(device)
    outputs= model(images)
    #outputs.data is the output of softmax layer
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    
  print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

  

Accuracy of the network on the 10000 test images: 69.9 %
