# model with bare run loop, using GPU

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

In [2]:
print(torch.__version__)
print(torchvision.__version__)

1.4.0
0.5.0


In [3]:
use_cuda = torch.cuda.is_available()

In [4]:
device = torch.device("cuda" if use_cuda else "cpu")

In [5]:
device

device(type='cuda')

In [6]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [7]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
    
    def forward(self, t):

        # hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)  # doesn't use the weights so call from Functional package
        t = F.max_pool2d(t, kernel_size=2, stride=2) # doesn't use the weights so call from Functional package

        # hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # hidden linear layer
        # 12 is from number of output channels from previous conf layer.
        # 4x4 is the height and width of each output channel
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = F.relu(t)

        # hidden linear layer
        t = self.fc2(t)
        t = F.relu(t)
        
        # output layer
        t = self.out(t)
        
        return t

In [8]:
# get fashion-MNIST
train_set = torchvision.datasets.FashionMNIST(
    root='./data',
    train=True,
    download=True,
    transform=transforms.Compose([transforms.ToTensor()])
)

In [9]:
lr = 0.01
batch_size = 100
shuffle = True


# network = Network()
network = Network().to(device)

train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=shuffle)
optimizer = optim.Adam(network.parameters(), lr=lr)

In [10]:
%%time
for epoch in range(3):
    
    total_loss = 0
    total_correct = 0
    
    for batch in train_loader: # Get Batch
        images, labels = batch
        images, labels, = images.to(device), labels.to(device)

        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss

        optimizer.zero_grad()
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

    print(
        "epoch", epoch, 
        "total_correct:", total_correct, 
        "loss:", total_loss
    )

epoch 0 total_correct: 46995 loss: 342.13093926012516
epoch 1 total_correct: 51525 loss: 230.0070315748453
epoch 2 total_correct: 52247 loss: 207.93506947159767
Wall time: 34.4 s


In [11]:


# model.train() # sets model in training mode, differenet from evaluation mode
# effects Dropout layers and BatchNorm layers, see documentation



# Todo

- cpu at 100% when not using GPU, but GPU not hardly being utalized at all?
    - GPU: 30s, 32s, 34s
    - CPU: 47s, 49s, 47s



# Questions

- so what about setting the like datatype to float or something, I remember the gpu takes different datatypes than cpu....
