In [1]:
import torch
from torch import nn
from torch import optim
import numpy as np
import random

In [2]:
#create net

class ConvNet(nn.Module):
    def __init__(self, device, weight_init = 'xavier', num_filters = (16,32)):
        super(ConvNet, self).__init__()
        self.device = device
        self.weight_init = weight_init
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=num_filters[0],
                kernel_size=5,
                stride=1,
                padding=2,
            ),
            nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(
                in_channels=num_filters[0], 
                out_channels=num_filters[1], 
                kernel_size=5, 
                stride=1, 
                padding=2
            ),
            nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2),
        )
        # fully connected layer, output 10 classes
        self.out = nn.Sequential(nn.Linear(num_filters[1] * 7 * 7, 10),
                                 nn.Softmax())
        #self.apply(self.init_wieghts)

    
    def init_wieghts(self, w):
        if isinstance(w, nn.Conv2d) or isinstance(w, nn.Linear):
            if self.weight_init == 'xavier':
                torch.nn.init.xavier_normal_(w.weight, gain = nn.init.calculate_gain('sigmoid'))
            if self.weight_init == 'he':
                torch.nn.init.kaiming_normal_(w.weight, nonlinearity='sigmoid')


    def forward(self, x):
        if torch.is_tensor(x):    
            x = x.to(self.device)
        else:
            x = torch.from_numpy(x).to(self.device)
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        output = self.out(x)
        return output



In [3]:
def get_mnist():
  from mnist_loader import load_data_wrapper
  train, val, test = load_data_wrapper()
  train_x, train_y = zip(*train)
  val_x, val_y = zip(*val)
  test_x, test_y = zip(*test)
  train_x_ , val_x_, test_x_ = [], [] ,[]

  for t in train_x:
    train_x_.append(t.reshape((1,28,28)))
  for v ,ts in zip(val_x, test_x):
    val_x_.append(v.reshape((1,28,28)))
    test_x_.append(ts.reshape((1,28,28)))
  

  train_x_ , val_x_, test_x_ = np.array(train_x_), np.array(val_x_), np.array(test_x_) 
 
  train_y_ =  np.array([np.argmax(np.squeeze(t), axis=0) for t in train_y])
  val_y_ = np.array(val_y)
  test_y_ = np.array(test_y)
  

  return (train_x_, train_y_), (val_x_, val_y_), (test_x_, test_y_)

In [4]:
from torch.utils.data import Dataset
class CustomMnistDataset(Dataset):
  def __init__(self, X,Y):
    super().__init__()
    self.X = np.divide(X, 255.0)
    self.Y = Y
  def __len__(self):
    return len(self.Y)
  def __getitem__(self, index):
    return self.X[index], self.Y[index]



In [17]:
def train_epoch(model, train_loader, optimizer, loss_fn, epoch_index, device):
  loss = 0
  for i, batch in enumerate(train_loader):
    x, y = batch
    y = y.to(device)
    optimizer.zero_grad()
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    loss.backward()
    optimizer.step()

  print(f'Training Loss for last batch of epoch {epoch_index}: {loss}')
  return loss

In [18]:
from torch.utils.data import DataLoader
from  torcheval.metrics.functional import multiclass_accuracy, multiclass_f1_score, multiclass_precision, multiclass_recall, auc
#training code

#hyperparams
hyper_params = {
  "EPOCHS": 25,
  "OPTIMIZER": 'adam',
  "NUM_FILTERS": (32,64),
  "WEIGHT_INIT": 'xavier',
  "BATCH_SIZE": 8
}

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
np.random.seed(42)
torch.manual_seed(42)
random.seed(42)

model = ConvNet(device=device,weight_init=hyper_params["WEIGHT_INIT"],num_filters=hyper_params["NUM_FILTERS"])
model.to(device)

#create loss func and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr= .01)

train, (val_x, val_y), _ = get_mnist()
val_y = torch.from_numpy(val_y).to(device)
train_dataloader = DataLoader(CustomMnistDataset(train[0],train[1]),batch_size=hyper_params["BATCH_SIZE"],shuffle=True)

for EPOCH in range(hyper_params["EPOCHS"]):
  print(f'EPOCH #{EPOCH}')
  model.train()
  train_epoch(model, train_dataloader, optimizer, loss_fn, EPOCH, device)
  model.eval()

  with torch.no_grad():
    val_y_pred = model(val_x)
    v_loss = loss_fn(val_y_pred, val_y)
    v_acc = multiclass_accuracy(val_y_pred, val_y, num_classes=10)
    v_f1 = multiclass_f1_score(val_y_pred, val_y, num_classes=10)
    v_prec = multiclass_precision(val_y_pred, val_y, num_classes=10)
    v_recall = multiclass_recall(val_y_pred, val_y, num_classes=10)
    #v_auc = auc(val_y_pred, val_y)
  if EPOCH == 10:
    print(val_y[50:70])
    print(np.array([np.argmax(np.squeeze(t), axis=0) for t in val_y_pred.cpu()])[50:70])
  print(f'Validation Acc: {v_acc} | Loss: {v_loss}')



EPOCH #0
tensor([0.0928, 0.0818, 0.1163, 0.0998, 0.1112, 0.1028, 0.0946, 0.0925, 0.1003,
        0.1079], device='cuda:0', grad_fn=<SelectBackward0>)
tensor([8, 8, 9, 6, 3, 8, 5, 7], device='cuda:0')


  return self._call_impl(*args, **kwargs)


RuntimeError: size mismatch (got input: [10], target: [8])

In [7]:
train, (val_x, val_y), _ = get_mnist()
count = 0
for i in train[1]:
  if i ==4:
    count+=1

print(count)
print(len(train[1]))


4859
50000
