In [10]:
import torch, torchvision
import torch.nn.functional as F
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


In [11]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [12]:
def set_seed(seed: int=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

In [13]:
class CNN(nn.Module):
    def __init__(self,in_channels = 3,hidden_unit=8, num_classes = 10):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channel,hidden_unit,kernel_size=3, stride=1, padding=1) 
        self.pool = nn.MaxPool2d(kernel_size=(2,2), stride=2)
        self.conv2 = nn.Conv2d(hidden_unit,out_channels=16,kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(16*7*7, num_classes)
        
    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)   ###or use nn.Flatten() in __init__ method
        x = self.fc1(x)        
        return x

In [14]:
##hyperparameters
learning_rate = 0.001
in_channel = 1
hidden_unit = 8
num_classes = 10
batch_size = 32
epochs = 1

In [15]:
model = CNN(in_channel,hidden_unit,num_classes)
model.to(device)

CNN(
  (conv1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=784, out_features=10, bias=True)
)

In [16]:
# from torchinfo import summary
# summary(model,
#         input_size=(32,3,224,244),
#        col_names=["input_size","output_size"],
#        row_setting=["var_names"])

In [17]:
simple_transform = transforms.ToTensor()

In [18]:
train_data = datasets.MNIST("data/",train=True,transform=simple_transform
                            ,download=True)

In [19]:
next(model.parameters()).device

device(type='cuda', index=0)

In [20]:
from torch.utils.tensorboard import SummaryWriter
from tqdm.auto import tqdm
from time import perf_counter

In [21]:
learning_rates = [0.001]
batch_sizes = [256]
classes = ["0","1","2","3","4","5","6","7","8","9"]

In [22]:
# train_dataloader = DataLoader(train_data, batch_size, shuffle=True)
# next(iter(train_dataloader))[0].shape

In [27]:
from collections import defaultdict

step = 0

for learning_rate in learning_rates:
    for batch_size in batch_sizes:
            loss_fn = torch.nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(model.parameters(), learning_rate)
            train_dataloader = DataLoader(train_data, batch_size, shuffle=True)
            writer = SummaryWriter(f"runs\MiniBatchSize {batch_size} lr {learning_rate}")
            model.train()
#         for epochs in num_epochs:
            train_start_time = perf_counter()
            
            for epoch in tqdm(range(epochs)):
                losses = []
                accuracies = []
#                 results = defaultdict(list)

                for batch, (image, label) in enumerate(train_dataloader):
                                     
                    image, label = image.to(device), label.to(device)
                    
                    logits = model(image)
                    pred_prob = torch.softmax(logits,dim=1)
                    pred_label = torch.argmax(pred_prob, dim=1)
                    
                    loss = loss_fn(logits,label)
                    losses.append(loss.item())
                    
                    acc = torch.eq(label,pred_label).sum()/len(label)
                    accuracies.append(acc)
                    
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    
                    features = image.reshape(image.shape[0],-1)
                    class_labels = [classes[target] for target in label]
                    img_grid = torchvision.utils.make_grid(image)
                    writer.add_image("mnist_images",img_grid)
                    writer.add_histogram("fc1",model.fc1.weight)
                    
                    writer.add_scalar("train_loss",loss,global_step=step)
                    writer.add_scalar("train_acc",acc,global_step=step)
                    
                    if batch == 230:   ### 60000 samples / 256 batch size
                        writer.add_embedding(features, metadata= class_labels,
                                            label_img=image,global_step=batch)
                    
                    step+=1
                    
                    if batch ==len(train_dataloader)-1:
                        print(f"len label: {len(label)},len dataloader:{len(train_dataloader)},image.shape[0]: {image.shape[0]},batch:{batch}, batch size: {batch_size}")
            writer.add_hparams({"lr":learning_rate, "bsize":batch_size },
                              {"accuracy":sum(accuracies)/len(accuracies),
                              "loss":sum(losses)/len(losses)})
                                                                          
                
            train_end_time = perf_counter()
            print(f"total train time took {train_end_time-train_start_time} for batch size: {batch_size} and learning rate: {learning_rate} with {epochs} epochs")
print("operation finished")            
                    
                    

  0%|          | 0/1 [00:00<?, ?it/s]

len label: 96,len dataloader:235,image.shape[0]: 96,batch:234, batch size: 256
total train time took 34.60019860000011 for batch size: 256 and learning rate: 0.001 with 1 epochs
operation finished


In [29]:
len(train_data)/batch_size

234.375

In [30]:
len(train_dataloader)

235

In [32]:
%load_ext tensorboard
%tensorboard --logdir runs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 19884), started 0:00:19 ago. (Use '!kill 19884' to kill it.)