In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.set_grad_enabled(True)

<torch.autograd.grad_mode.set_grad_enabled at 0x123216630>

In [3]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        
        # in_channels = 1 because they are greyscale images
        # out_channels = 6 means, we're using 6, 5*5 filters/kernals, thus 6 outputs will be there
        # output of the previous layer is the input to the next layer
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        # when moving from the convolutional layer to fully connected layers, inputs should be flattened
        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        # out_features = 15 because we have 15 class labels
        self.out = nn.Linear(in_features=60, out_features=15)
        
    def forward(self, t):
        # (1) input layer
        t = t    # here we show this for clarity
        
        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (4) hidden linear layer
        t = t.reshape(-1,12 * 4 * 4)   # change the shape accordingly
        t = self.fc1(t)
        t = F.relu(t)
        
        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t)
        
        # (6) output layer
        t = self.out(t)
        # softmax returns a probability of predictions for each class, 
        # however, we don't need this, if we're using cross_entropy during training
        # t = F.softmax(t, dim=1) 
        
        return t

In [4]:
network = Network()

train_loader = torch.utils.data.DataLoader( train_set, batch_size= 1 )
batch = next(iter(data_loader)) # when training one batch we can use the iterator, otherwise a for loop

In [5]:
print(network)

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)


In [7]:
# similarly we can see the weights of each convolutional layer 
network.conv1.weight

Parameter containing:
tensor([[[[-0.0598, -0.1265,  0.1818,  0.1847,  0.0762],
          [ 0.1003,  0.0602,  0.0628,  0.0830,  0.1645],
          [ 0.0725, -0.1323, -0.0140,  0.0700,  0.1965],
          [-0.1832,  0.1262, -0.1030,  0.1608,  0.0534],
          [ 0.0628,  0.0274,  0.0783, -0.0811, -0.1549]]],


        [[[ 0.0769,  0.1114,  0.1558, -0.0901, -0.0383],
          [ 0.1951, -0.1368, -0.0052, -0.1501, -0.1664],
          [-0.1326, -0.1543,  0.0170, -0.1380,  0.0826],
          [-0.0976, -0.0695,  0.0243, -0.0447,  0.1989],
          [ 0.0046, -0.0609,  0.0042,  0.0111,  0.1765]]],


        [[[-0.0508,  0.0094,  0.1131,  0.0586, -0.1609],
          [ 0.1771, -0.0782, -0.1880,  0.0978, -0.0349],
          [ 0.1403, -0.0576, -0.1824,  0.0245, -0.0202],
          [-0.0708,  0.0675, -0.0774,  0.1668, -0.0570],
          [ 0.0740,  0.0649, -0.1821,  0.1563, -0.1224]]],


        [[[-0.0503,  0.1237,  0.1543, -0.0422, -0.0348],
          [ 0.0908, -0.0557,  0.1802, -0.1077,  0.0588

In [10]:
# similarly we can see weights of fully connected layers
network.fc1.weight

Parameter containing:
tensor([[-0.0368,  0.0192, -0.0093,  ...,  0.0636, -0.0106, -0.0501],
        [-0.0602, -0.0193, -0.0103,  ...,  0.0406,  0.0144,  0.0573],
        [ 0.0554,  0.0357,  0.0171,  ..., -0.0609, -0.0555, -0.0388],
        ...,
        [ 0.0294, -0.0610,  0.0533,  ..., -0.0540,  0.0311, -0.0063],
        [-0.0162,  0.0026,  0.0236,  ..., -0.0409,  0.0549, -0.0037],
        [ 0.0651, -0.0014,  0.0487,  ...,  0.0060,  0.0349,  0.0360]],
       requires_grad=True)

In [12]:
# can use network.parameters() as well, but the it doesn't give the names
for name,param in network.named_parameters():
    print(name ,"\t\t" , param.shape)

conv1.weight 		 torch.Size([6, 1, 5, 5])
conv1.bias 		 torch.Size([6])
conv2.weight 		 torch.Size([12, 6, 5, 5])
conv2.bias 		 torch.Size([12])
fc1.weight 		 torch.Size([120, 192])
fc1.bias 		 torch.Size([120])
fc2.weight 		 torch.Size([60, 120])
fc2.bias 		 torch.Size([60])
out.weight 		 torch.Size([10, 60])
out.bias 		 torch.Size([10])


In [None]:
# TODO need to create the 

In [19]:
network = Network()

'''
b_size = Batch size for DataLoader
'''
def train_model(b_size, train_set):
    
    # here train_set should be tensors with images and label
    train_loader = torch.utils.data.DataLoader( train_set, batch_size= b_size )
    optimizer = optim.Adam(network.parameters(), lr=0.01)

    print("*****Batch size is :", b_size, "******")
    
    for epoch in range(10):

        total_loss = 0
        total_correct = 0

        for batch in train_loader: # get batch
            images, labels = batch

            # train
            preds = network(images)
            loss = F.cross_entropy(preds, labels)  #since we're using cross_entropy, no need to use softmax in the forward function

            # We need to clear them out before each instance
            optimizer.zero_grad()
            loss.backward()  #calculate gradient
            optimizer.step() #update weights

            total_loss += loss.item()
            total_correct += get_num_correct(preds, labels)

        print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)

In [15]:
def get_num_of_correct_preds(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [18]:
@torch.no_grad()    # because we don't need this function to track gradients
def get_all_preds(model, loader):
    all_preds = torch.tensor([])
    for batch in loader:
        images, labels = batch

        preds = model(images)
        all_preds = torch.cat(
            (all_preds, preds)
            ,dim=0
        )
        
    return all_preds

def new_eval(model, loader):

    with torch.no_grad():
        
        predicted = []
        for batch in loader:
            images, labels = batch

            correct = 0
            total = 0

            outputs = model(images)
            _, predict = torch.max(outputs.data, 1)

            y_test = test_label_batch.numpy()
            predicted.append(predict)

            print("Accuracy: ", accuracy_score(predicted, y_test))
            print("Precision: ", precision_score(predicted, y_test, average='weighted'))
            print("Recall: ", recall_score(predicted, y_test, average='weighted'))

In [None]:
# training confusion matrix

data_loader = torch.nn.data.DataLoader(test_set, batch_size= 1)  #because we need to create one batch with all data

train_preds = get_all_preds(network, data_loader)

# train_set.targets   # target labels of train_set

# predict
train_preds.argmax(dim=1)   # gives the index with highest probability

train_preds.argmax(dim=1).eq(train_set.targets) # returns binary values by comparing indices


In [23]:
t1 = torch.tensor([0, 1, 2])  #actual
t2 = torch.tensor([0, 0, 1])  #predicted

y = (t1 == t2)
print(y)
x = (t1 == t2).sum().float() / len(t1)
print(x)

tensor([False,  True, False])
tensor(0.3333)


In [29]:
t1= torch.tensor([1,2,3], dtype=torch.float32)
t2 = torch.tensor([4,5,6], dtype=torch.float32)
l1 = [t1,t2]
print(l1)

[tensor([1., 2., 3.]), tensor([4., 5., 6.])]


In [32]:
t3 = torch.tensor([],dtype=torch.float32)
t4 = torch.tensor([],dtype=torch.float32)
l2 = [t2, t2]

for td, te in l1, l2:
    t3 = torch.cat(
            (t3, td), dim=0
        )
    t4 = torch.cat(
         (t4, te), dim=0
        )
           
print(t3)
print(t4)

tensor([1., 2., 3., 4., 5., 6.])
tensor([4., 5., 6., 4., 5., 6.])
