In [1]:
# Import
import torch 
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F 
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter # to print to tensorboard

In [2]:
# Simple CNN
class CNN(nn.Module):
    def __init__(self, in_channels=3, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channels,
                               out_channels=8, kernel_size=3, 
                               stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16,
                               kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(16 * 7 * 7, num_classes)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)
        return x 

In [3]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# Hyperparameters
learning_rate = 0.001
in_channels = 1
num_classes = 10
batch_size = 64
num_epochs = 5

In [None]:
# Load data
train_dataset = datasets.MNIST(root='dataset/', train='True',
                               transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

In [None]:
# Initialze network
model = CNN(in_channels=in_channels, num_classes=num_classes)
model.to(device)

In [None]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0)

writer = SummaryWriter(f'runs/MNIST/tryingout_tensorboard')

In [None]:
# _, predictions = scores.max(1)的注解
# 在torch.tensor中，max(axis=)按照给定的维度找到最大值，
# 并返回“值”和在该维度下的“索引”，0是按行比较，1是按列比较
x = torch.rand((10,2))
print(x)
print(x.max(1))

In [None]:
# Train
step = 0
for epoch in range(num_epochs):
    print(f"Epoch [{epoch+1}/{num_epochs}]:", end='')
    losses = []
    accuracies = []

    for batch_idx, (data, targets) in enumerate(train_loader):
        # Get data to cuda if available
        data = data.to(device)
        targets = targets.to(device)

        # forward
        scores = model(data)
        loss = criterion(scores, targets)
        losses.append(loss.item())

        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate 'running' training accuracy
        _, predictions = scores.max(1)
        num_correct = (predictions == targets).sum()

        running_train_acc = float(num_correct)/float(data.shape[0])

        # 在pytorch中使用tensorboard
        writer.add_scalar('Training Loss', loss, global_step=step)
        writer.add_scalar('Training Accuracy', running_train_acc, global_step=step)
        step += 1

    print(f'Mean loss this epoch was {sum(losses)/len(losses)}')

In [None]:
# Train in different batchsize and learning rate
# To find the optimal batchsize and learning rate  
batch_sizes = [2, 64 , 1024]
learning_rates = [0.1, 0.01, 0.001, 0.0001]
in_channels = 1
num_classes = 10
num_epochs = 1
train_dataset = datasets.MNIST(root='dataset/', train='True',
                               transform=transforms.ToTensor(), download=True)

for batch_size in batch_sizes:
    for learning_rate in learning_rates:
        step = 0
        model = CNN(in_channels=in_channels, num_classes=num_classes)
        model.to(device)
        model.train()
        writer = SummaryWriter(f'runs/MNIST/MiniBatchSize {batch_size} LR {learning_rate}')
        train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0)
        for epoch in range(num_epochs):
            print(f"Epoch [{epoch+1}/{num_epochs}]:", end='')
            losses = []
            accuracies = []

            for batch_idx, (data, targets) in enumerate(train_loader):
                # Get data to cuda if available
                data = data.to(device)
                targets = targets.to(device)

                # forward
                scores = model(data)
                loss = criterion(scores, targets)
                losses.append(loss.item())

                # backward
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # Calculate 'running' training accuracy
                _, predictions = scores.max(1)
                num_correct = (predictions == targets).sum()

                running_train_acc = float(num_correct)/float(data.shape[0])

                writer.add_scalar('Training Loss', loss, global_step=step)
                writer.add_scalar('Training Accuracy', running_train_acc, global_step=step)
                step += 1

            print(f'Mean loss this epoch was {sum(losses)/len(losses)}')

In [None]:
# Train in different batchsize and learning rate  
# To find the optimal batchsize and learning rate
# To make the tensorboard table more clear to find the hyperparameters    
batch_sizes = [2, 64 , 1024]
learning_rates = [0.1, 0.01, 0.001, 0.0001]
in_channels = 1
num_classes = 10
num_epochs = 1
train_dataset = datasets.MNIST(root='dataset/', train='True',
                               transform=transforms.ToTensor(), download=True)

for batch_size in batch_sizes:
    for learning_rate in learning_rates:
        step = 0
        model = CNN(in_channels=in_channels, num_classes=num_classes)
        model.to(device)
        model.train()
        writer = SummaryWriter(f'runs/MNIST/MiniBatchSize {batch_size} LR {learning_rate}')
        train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0)
        for epoch in range(num_epochs):
            print(f"Epoch [{epoch+1}/{num_epochs}]:", end='')
            losses = []
            accuracies = []

            for batch_idx, (data, targets) in enumerate(train_loader):
                # Get data to cuda if available
                data = data.to(device)
                targets = targets.to(device)

                # forward
                scores = model(data)
                loss = criterion(scores, targets)
                losses.append(loss.item())

                # backward
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # Calculate 'running' training accuracy
                _, predictions = scores.max(1)
                num_correct = (predictions == targets).sum()

                running_train_acc = float(num_correct)/float(data.shape[0])
                accuracies.append(running_train_acc)

                writer.add_scalar('Training Loss', loss, global_step=step)
                writer.add_scalar('Training Accuracy', running_train_acc, global_step=step)
                step += 1

            # 在tensorboard中增加超参数的关系图
            writer.add_hparams({'lr': learning_rate, 'bsize': batch_size},
                    {'accuracy': sum(accuracies)/len(accuracies),
                     'loss': sum(losses)/len(losses)})
            print(f'Mean loss this epoch was {sum(losses)/len(losses)}')

In [4]:
# Train in different batchsize and learning rate  
# To find the optimal batchsize and learning rate
# To make the tensorboard table more clear to find the hyperparameters  
# To visualize the model predict image in a batch
# To visualize the model's weight change in a batch  
batch_sizes = [64]
learning_rates = [0.001]
in_channels = 1
num_classes = 10
num_epochs = 1
train_dataset = datasets.MNIST(root='dataset/', train='True',
                               transform=transforms.ToTensor(), download=True)

for batch_size in batch_sizes:
    for learning_rate in learning_rates:
        step = 0
        model = CNN(in_channels=in_channels, num_classes=num_classes)
        model.to(device)
        model.train()
        writer = SummaryWriter(f'runs/MNIST/MiniBatchSize {batch_size} LR {learning_rate}')
        train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0)
        for epoch in range(num_epochs):
            print(f"Epoch [{epoch+1}/{num_epochs}]:", end='')
            losses = []
            accuracies = []

            for batch_idx, (data, targets) in enumerate(train_loader):
                # Get data to cuda if available
                data = data.to(device)
                targets = targets.to(device)

                # forward
                scores = model(data)
                loss = criterion(scores, targets)
                losses.append(loss.item())

                # backward
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # Calculate 'running' training accuracy
                # 在tensorboard中显示一个batch中data的所有图片
                # torchvision.utils.make_grid()将图片组合成一张图片
                img_grid = torchvision.utils.make_grid(data)
                writer.add_image('mnist_images', img_grid)
                # 在tensorboard中显示某个层的权重变换
                writer.add_histogram('fc1', model.fc1.weight)
                _, predictions = scores.max(1)
                num_correct = (predictions == targets).sum()

                running_train_acc = float(num_correct)/float(data.shape[0])
                accuracies.append(running_train_acc)

                writer.add_scalar('Training Loss', loss, global_step=step)
                writer.add_scalar('Training Accuracy', running_train_acc, global_step=step)
                step += 1

            writer.add_hparams({'lr': learning_rate, 'bsize': batch_size},
                    {'accuracy': sum(accuracies)/len(accuracies),
                     'loss': sum(losses)/len(losses)})
            print(f'Mean loss this epoch was {sum(losses)/len(losses)}')

Epoch [1/1]:Mean loss this epoch was 0.3509065203908791


In [4]:
# Train in different batchsize and learning rate  
# To find the optimal batchsize and learning rate
# To make the tensorboard table more clear to find the hyperparameters  
# To visualize the model predict image in a batch
# To visualize the model's weight change in a batch  
# To visualize how the model does the prediction
batch_sizes = [256]
learning_rates = [0.001]
in_channels = 1
num_classes = 10
num_epochs = 1
train_dataset = datasets.MNIST(root='dataset/', train='True',
                               transform=transforms.ToTensor(), download=True)
classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']


for batch_size in batch_sizes:
    for learning_rate in learning_rates:
        step = 0
        model = CNN(in_channels=in_channels, num_classes=num_classes)
        model.to(device)
        model.train()
        writer = SummaryWriter(f'runs/MNIST/MiniBatchSize {batch_size} LR {learning_rate}')
        train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0)
        for epoch in range(num_epochs):
            print(f"Epoch [{epoch+1}/{num_epochs}]:", end='')
            losses = []
            accuracies = []

            for batch_idx, (data, targets) in enumerate(train_loader):
                # Get data to cuda if available
                data = data.to(device)
                targets = targets.to(device)

                # forward
                scores = model(data)
                loss = criterion(scores, targets)
                losses.append(loss.item())

                # backward
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # Calculate 'running' training accuracy
                features = data.reshape(data.shape[0], -1) # 1 x 28 x 28 --> 784
                img_grid = torchvision.utils.make_grid(data)
                _, predictions = scores.max(1)
                num_correct = (predictions == targets).sum()
                running_train_acc = float(num_correct)/float(data.shape[0])
                accuracies.append(running_train_acc)

                # Plot things to tensorboard
                class_labels = [classes[label] for label in predictions]
                writer.add_image('mnist_images', img_grid)
                writer.add_histogram('fc1', model.fc1.weight)
                writer.add_scalar('Training Loss', loss, global_step=step)
                writer.add_scalar('Training Accuracy', running_train_acc, global_step=step)
                if batch_idx > 230: # 60000/256 = 234.375
                    writer.add_embedding(features, metadata=class_labels,
                                     label_img=data, global_step=batch_idx)
                step += 1

            writer.add_hparams({'lr': learning_rate, 'bsize': batch_size},
                    {'accuracy': sum(accuracies)/len(accuracies),
                     'loss': sum(losses)/len(losses)})
            print(f'Mean loss this epoch was {sum(losses)/len(losses)}')

Epoch [1/1]:Mean loss this epoch was 0.7071013694113873
