In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torchvision

In [2]:
#set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
#hyperparameters
num_classes = 10
learning_rate = 0.1
batch_size = 1024
num_epochs = 100

In [8]:
#load pretrained model
model = torchvision.models.googlenet(pretrained=True)

for param in model.parameters():
    param.requires_grad = False
    
model.fc = nn.Linear(1024, num_classes)
model.to(device)

GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track

In [10]:
#load data
train_dataset = datasets.CIFAR10(root='dataset/', train=True, 
                                 transform = transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

Files already downloaded and verified


In [13]:
#loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, verbose=True)

In [15]:
#train network
for epoch in range(1, num_epochs):
    losses = []
    
    for batch_idx, (data,targets) in enumerate(train_loader):
        #get data to cuda if possible
        data = data.to(device=device)
        targets = targets.to(device=device)
        
        #forward
        scores = model(data)
        loss = criterion(scores, targets)
        
        losses.append(loss.item())
        
        #backward
        loss.backward()
        
        #gradient descent or adam step
        optimizer.step()
        optimizer.zero_grad()
        
    mean_loss = sum(losses)/len(losses)
    scheduler.step(mean_loss)
    print(f'cost at epoch {epoch} is {mean_loss}')

cost at epoch 1 is 3.561874657261128
cost at epoch 2 is 2.6538483804585984
cost at epoch 3 is 2.667833294187273
cost at epoch 4 is 4.148986792077824
cost at epoch 5 is 2.6450379430031288
cost at epoch 6 is 3.092898398029561
cost at epoch 7 is 3.4942228696784197
cost at epoch 8 is 2.986776035659167
cost at epoch 9 is 2.7125269928757025
cost at epoch 10 is 3.353666130377322
Epoch    11: reducing learning rate of group 0 to 1.0000e-02.
cost at epoch 11 is 3.1581495148794994
cost at epoch 12 is 2.1566303944101137
cost at epoch 13 is 1.9573873013866192
cost at epoch 14 is 1.8693868067799781
cost at epoch 15 is 1.8292952070430832
cost at epoch 16 is 1.8016193375295522
cost at epoch 17 is 1.7816002076985884
cost at epoch 18 is 1.7734185457229614
cost at epoch 19 is 1.76124868830856
cost at epoch 20 is 1.76334391564739
cost at epoch 21 is 1.7572741800425005
cost at epoch 22 is 1.7507766412228953
cost at epoch 23 is 1.755977530868686
cost at epoch 24 is 1.754045167747809
cost at epoch 25 is 1.7

KeyboardInterrupt: 