In [2]:
import torch
import torchvision
import tqdm

#### Configuration

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
device

device(type='cuda', index=0)

In [5]:
root = "data/"
logs = "lightning_logs/"

In [6]:
batch_size = 64
lr = 1e-3
num_workers = 4
epochs = 20

In [7]:
def accuracy(y, y_hat):
    return 100*((y==y_hat).sum().item())/y.size(0)

#### Data Pipeline

In [8]:
# transform
data_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])

In [13]:
# download the data
train = torchvision.datasets.cifar.CIFAR10(root=root, train=True, transform=data_transform, download=True)
val = torchvision.datasets.cifar.CIFAR10(root=root, train=False, transform=data_transform, download=True)

0it [00:00, ?it/s]

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|█████████▉| 169861120/170498071 [00:54<00:00, 6062847.66it/s]

Extracting data/cifar-10-python.tar.gz to data/
Files already downloaded and verified


170500096it [01:10, 6062847.66it/s]                               

In [16]:
# Data Loader
train_loader = torch.utils.data.DataLoader(dataset=train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
val_loader = torch.utils.data.DataLoader(dataset=train, batch_size=batch_size, shuffle=True, num_workers=num_workers)

### Pure PyTorch code

#### Model

In [78]:
class CNN(torch.nn.Module):
    
    def __init__(self, in_channels=3, n_classes=10):
        super(CNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=3, padding=1) # 32x32x3 -> 16x16x16
        self.conv2 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1) # 16x16x16 -> 8x8x32
        self.conv3 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1) # 4x4x64
        self.linear = torch.nn.Linear(in_features=4*4*64, out_features=124)
        self.output = torch.nn.Linear(in_features=124, out_features=n_classes)
        self.maxpool = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = torch.nn.Dropout(p=0.25)
    
    def forward(self, x):
        x = self.maxpool(torch.nn.functional.relu(self.conv1(x)))
        x = self.maxpool(torch.nn.functional.relu(self.conv2(x)))
        x = self.maxpool(torch.nn.functional.relu(self.conv3(x)))
        x = x.view(-1, 64 * 4 * 4) # 4x4x64 -> 4*4*64
        x = torch.nn.functional.relu(self.linear(x))
        out = self.output(x)
        return out
    

In [79]:
model = CNN().to(device)

#### Optimizer, Loss and Tensorboard writer

In [80]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
criterian = torch.nn.CrossEntropyLoss()
writer = torch.utils.tensorboard.SummaryWriter()

#### The Good, the Bad and the Ugly training loop

In [81]:
%time
steps = 0

epoch_progress = tqdm.tqdm(total=epochs, desc="Epoch", position=0)

for epoch in range(epochs):
    epoch_train_loss = []
    epoch_train_acc = []
    epoch_val_loss = []
    epoch_val_acc = []
    
    batch_progress = tqdm.tqdm(total=len(train_loader), desc="Batch", position=0)
    for i, (images, labels) in enumerate(train_loader):
        
        images = images.to(device)
        labels = labels.to(device)
        
        # forward pass
        outputs = model(images)
        
        # calculate the loss and batch accuracy
        loss = criterian(outputs, labels)
        acc = accuracy(labels, torch.argmax(outputs, 1))
        
        
        # backpropagate the loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        
        if steps%200==0:
            print(f'epoch {epoch} | steps  {steps} | loss {loss.item()} | accuracy {acc}')
        
        epoch_train_loss.append(loss.item())
        epoch_train_acc.append(acc)
        
        writer.add_scalar("step_wise_loss", loss.item(), steps)
        writer.add_scalar("step_wise_acc", acc, steps)
        
        batch_progress.update(1)
        steps += 1
        
        
    
    val_progress = tqdm.tqdm(total=len(val_loader), desc="Validation", position=0)
    for i, (images, labels) in enumerate(val_loader):
        images = images.to(device)
        labels = labels.to(device)
        with torch.no_grad():
            # forward pass with no grads
            outputs = model(images)
            
            # loss and acc calcualtion
            loss = criterian(outputs, labels)
            acc = accuracy(labels, torch.argmax(outputs, 1))
            
            epoch_val_loss.append(loss.item())
            epoch_val_acc.append(acc)
            
            val_progress.update(1)
    
    # calculate the epochs wise train, val loss and acc
    t_loss = sum(epoch_train_loss)/len(epoch_train_loss)
    t_acc = sum(epoch_train_acc)/len(epoch_train_acc)
    v_loss = sum(epoch_val_loss)/len(epoch_val_loss)
    v_acc = sum(epoch_val_acc)/len(epoch_val_acc)
    
    # add to tensorboard
    writer.add_scalar("train_loss", t_loss, epoch)
    writer.add_scalar("train_acc", t_acc, epoch)
    writer.add_scalar("val_loss", v_loss, epoch)
    writer.add_scalar("val_acc", v_acc, epoch)
    
    print(f"Epoch {epoch} | train_loss {t_loss} | train_acc {t_acc} | val_loss {v_loss} | val_acc {v_acc}")
    
    epoch_progress.update(1)
        

Batch:   0%|          | 0/391 [00:00<?, ?it/s]

CPU times: user 6 µs, sys: 0 ns, total: 6 µs
Wall time: 10.5 µs


Batch:   4%|▍         | 15/391 [00:00<01:58,  3.18it/s]

epoch 0 | steps  0 | loss 2.3017399311065674 | accuracy 10.9375


Batch:  59%|█████▉    | 231/391 [00:01<00:01, 88.46it/s]

epoch 0 | steps  200 | loss 1.4653867483139038 | accuracy 42.96875


Batch:   0%|          | 0/391 [00:00<?, ?it/s]76s/it]4.93it/s]

Epoch 0 | train_loss 1.7090767256134307 | train_acc 37.739769820971865 | val_loss 1.4385334853930851 | val_acc 48.21651214833759


Batch:   0%|          | 1/391 [00:00<00:44,  8.76it/s]

epoch 1 | steps  400 | loss 1.4200018644332886 | accuracy 42.1875


Batch:  58%|█████▊    | 228/391 [00:02<00:01, 108.94it/s]

epoch 1 | steps  600 | loss 1.2921485900878906 | accuracy 52.34375


Batch: 100%|██████████| 391/391 [00:04<00:00, 78.54it/s] it/s]

Epoch 1 | train_loss 1.3476582924118432 | train_acc 51.757912404092075 | val_loss 1.2525760712830916 | val_acc 55.40521099744245



Batch:  13%|█▎        | 49/391 [00:00<00:16, 21.10it/s]

epoch 2 | steps  800 | loss 1.2550618648529053 | accuracy 56.25


Batch:  59%|█████▉    | 231/391 [00:01<00:01, 135.76it/s]

epoch 2 | steps  1000 | loss 1.1927508115768433 | accuracy 59.375


Validation: 100%|██████████| 391/391 [00:05<00:00, 78.02it/s] 
Batch:   0%|          | 0/391 [00:00<?, ?it/s]93s/it]0.08it/s]

Epoch 2 | train_loss 1.206453739987005 | train_acc 57.12875639386189 | val_loss 1.1215903101979618 | val_acc 60.169037723785166


Batch:  13%|█▎        | 51/391 [00:00<00:15, 22.21it/s]

epoch 3 | steps  1200 | loss 1.040155291557312 | accuracy 63.28125


Batch:  63%|██████▎   | 247/391 [00:01<00:00, 146.52it/s]

epoch 3 | steps  1400 | loss 1.179186463356018 | accuracy 62.5


Batch: 100%|██████████| 391/391 [00:05<00:00, 66.15it/s] it/s]

Epoch 3 | train_loss 1.0982377253225089 | train_acc 61.1644820971867 | val_loss 1.0418566241288734 | val_acc 62.8360773657289



Batch:  15%|█▌        | 60/391 [00:00<00:13, 24.08it/s]

epoch 4 | steps  1600 | loss 0.9231236577033997 | accuracy 73.4375


Batch:  65%|██████▍   | 254/391 [00:01<00:00, 145.94it/s]

epoch 4 | steps  1800 | loss 0.9883641004562378 | accuracy 60.15625


Batch:   0%|          | 0/391 [00:00<?, ?it/s]27s/it]0.54it/s]

Epoch 4 | train_loss 1.0146730539134092 | train_acc 64.08407928388746 | val_loss 0.9535489679907289 | val_acc 66.167679028133


Batch:  21%|██        | 83/391 [00:00<00:07, 39.37it/s]

epoch 5 | steps  2000 | loss 0.8560163378715515 | accuracy 70.3125


Batch:  68%|██████▊   | 266/391 [00:01<00:00, 137.25it/s]

epoch 5 | steps  2200 | loss 0.9519298076629639 | accuracy 69.53125


Validation: 100%|██████████| 391/391 [00:05<00:00, 70.67it/s] 
Batch: 100%|██████████| 391/391 [00:05<00:00, 70.40it/s] t/s] 

Epoch 5 | train_loss 0.9460416146556435 | train_acc 66.80866368286445 | val_loss 0.8764826836793319 | val_acc 69.55722506393862



Batch:  18%|█▊        | 70/391 [00:00<00:09, 34.94it/s]

epoch 6 | steps  2400 | loss 1.1046710014343262 | accuracy 60.9375


Batch:  72%|███████▏  | 280/391 [00:02<00:00, 137.43it/s]it/s]

epoch 6 | steps  2600 | loss 0.8642760515213013 | accuracy 73.4375


Batch:   0%|          | 0/391 [00:00<?, ?it/s]59s/it]6.32it/s]

Epoch 6 | train_loss 0.8969542775922419 | train_acc 68.5465952685422 | val_loss 0.874965284791444 | val_acc 69.33343989769821


Batch:  20%|█▉        | 78/391 [00:00<00:07, 41.09it/s]

epoch 7 | steps  2800 | loss 0.7764450907707214 | accuracy 71.875


Batch:  73%|███████▎  | 287/391 [00:02<00:00, 108.12it/s]

epoch 7 | steps  3000 | loss 0.7623276114463806 | accuracy 73.4375


Validation: 100%|██████████| 391/391 [00:05<00:00, 67.35it/s] 
Batch:   0%|          | 0/391 [00:00<?, ?it/s]54s/it]3.99it/s]

Epoch 7 | train_loss 0.840111425465635 | train_acc 70.59662723785166 | val_loss 0.7811438735488736 | val_acc 72.71938938618926


Batch:  25%|██▌       | 99/391 [00:00<00:05, 48.86it/s]

epoch 8 | steps  3200 | loss 0.960803210735321 | accuracy 64.84375


Batch:  77%|███████▋  | 301/391 [00:02<00:00, 144.62it/s]

epoch 8 | steps  3400 | loss 0.7504127025604248 | accuracy 74.21875


Batch: 100%|██████████| 391/391 [00:05<00:00, 71.39it/s]8it/s]

Epoch 8 | train_loss 0.8008349454006576 | train_acc 71.91256393861893 | val_loss 0.7462484208519197 | val_acc 73.87547953964194



Batch:  27%|██▋       | 104/391 [00:00<00:05, 55.27it/s]

epoch 9 | steps  3600 | loss 0.7073818445205688 | accuracy 75.78125


Batch:  79%|███████▉  | 308/391 [00:01<00:00, 161.32it/s]

epoch 9 | steps  3800 | loss 0.7518342733383179 | accuracy 72.65625


Validation: 100%|██████████| 391/391 [00:04<00:00, 87.44it/s] 
Batch:   0%|          | 0/391 [00:00<?, ?it/s].45s/it].85it/s]

Epoch 9 | train_loss 0.7605257762210144 | train_acc 73.41272378516624 | val_loss 0.7185067005474549 | val_acc 74.89490089514067


Batch:  28%|██▊       | 110/391 [00:00<00:04, 59.97it/s]

epoch 10 | steps  4000 | loss 0.7514327764511108 | accuracy 75.0


Batch:  81%|████████  | 317/391 [00:02<00:00, 148.85it/s]

epoch 10 | steps  4200 | loss 0.6244916915893555 | accuracy 82.8125


Batch: 100%|██████████| 391/391 [00:06<00:00, 64.38it/s] it/s]

Epoch 10 | train_loss 0.7227150082130871 | train_acc 74.83296035805627 | val_loss 0.6625540265646737 | val_acc 77.10158248081841



Batch:  33%|███▎      | 130/391 [00:01<00:04, 60.28it/s]

epoch 11 | steps  4400 | loss 0.7792284488677979 | accuracy 75.0


Batch:  82%|████████▏ | 322/391 [00:03<00:00, 123.89it/s]it/s]

epoch 11 | steps  4600 | loss 0.646674394607544 | accuracy 75.78125


Batch:   0%|          | 0/391 [00:00<?, ?it/s].63s/it].03it/s]

Epoch 11 | train_loss 0.6947827723325061 | train_acc 75.73489450127877 | val_loss 0.6380498934432369 | val_acc 77.79132033248082


Batch:  33%|███▎      | 128/391 [00:01<00:04, 58.26it/s]

epoch 12 | steps  4800 | loss 0.5179007053375244 | accuracy 81.25


Batch:  87%|████████▋ | 339/391 [00:03<00:00, 103.96it/s]

epoch 12 | steps  5000 | loss 0.6501489877700806 | accuracy 80.46875


Validation: 100%|██████████| 391/391 [00:06<00:00, 62.19it/s] 
Batch: 100%|██████████| 391/391 [00:06<00:00, 60.96it/s] it/s]

Epoch 12 | train_loss 0.6612271098682033 | train_acc 76.8274456521739 | val_loss 0.6099795777626964 | val_acc 78.77157928388746



Batch:  38%|███▊      | 148/391 [00:01<00:02, 87.36it/s]4it/s]

epoch 13 | steps  5200 | loss 0.7454145550727844 | accuracy 75.0


Batch:  87%|████████▋ | 341/391 [00:02<00:00, 151.61it/s]

epoch 13 | steps  5400 | loss 0.5883215665817261 | accuracy 84.375


Batch:   0%|          | 0/391 [00:00<?, ?it/s].79s/it].21it/s]

Epoch 13 | train_loss 0.6364156612959664 | train_acc 77.5675351662404 | val_loss 0.574451186925249 | val_acc 79.97442455242967


Batch:  38%|███▊      | 150/391 [00:01<00:02, 88.55it/s]

epoch 14 | steps  5600 | loss 0.7146062850952148 | accuracy 75.0


Batch:  91%|█████████ | 354/391 [00:03<00:00, 135.99it/s]

epoch 14 | steps  5800 | loss 0.6260130405426025 | accuracy 80.46875


Validation: 100%|██████████| 391/391 [00:07<00:00, 55.68it/s] 
Batch:   0%|          | 0/391 [00:00<?, ?it/s].23s/it].19it/s]

Epoch 14 | train_loss 0.597675963207279 | train_acc 79.144820971867 | val_loss 0.6152856245522609 | val_acc 78.20092710997443


Batch:  43%|████▎     | 169/391 [00:01<00:02, 106.20it/s]

epoch 15 | steps  6000 | loss 0.6517689228057861 | accuracy 75.78125


Batch:  88%|████████▊ | 345/391 [00:02<00:00, 69.37it/s] 

epoch 15 | steps  6200 | loss 0.47682151198387146 | accuracy 84.375


Batch: 100%|██████████| 391/391 [00:05<00:00, 72.24it/s]8it/s]

Epoch 15 | train_loss 0.5799398330014075 | train_acc 79.74504475703324 | val_loss 0.5629044825311207 | val_acc 80.33288043478261



Batch:  43%|████▎     | 170/391 [00:01<00:02, 104.60it/s]

epoch 16 | steps  6400 | loss 0.5441251397132874 | accuracy 82.8125


Batch:  94%|█████████▍| 367/391 [00:02<00:00, 167.94it/s]

epoch 16 | steps  6600 | loss 0.46984192728996277 | accuracy 82.8125


Validation: 100%|██████████| 391/391 [00:04<00:00, 83.15it/s] 
Batch:   0%|          | 0/391 [00:00<?, ?it/s].67s/it].44it/s]

Epoch 16 | train_loss 0.5553489934148081 | train_acc 80.57145140664962 | val_loss 0.489836784503649 | val_acc 83.25567455242967


Batch:  48%|████▊     | 189/391 [00:01<00:01, 123.01it/s]

epoch 17 | steps  6800 | loss 0.48806992173194885 | accuracy 86.71875


Batch:  97%|█████████▋| 379/391 [00:02<00:00, 171.62it/s]

epoch 17 | steps  7000 | loss 0.530581533908844 | accuracy 80.46875


Batch: 100%|██████████| 391/391 [00:05<00:00, 68.03it/s] it/s]

Epoch 17 | train_loss 0.5249741957010821 | train_acc 81.56809462915601 | val_loss 0.45499724477453307 | val_acc 84.53804347826087



Batch:  48%|████▊     | 186/391 [00:01<00:01, 120.84it/s]

epoch 18 | steps  7200 | loss 0.48228850960731506 | accuracy 82.8125


Validation:   0%|          | 0/391 [00:00<?, ?it/s]4it/s]it/s]

epoch 18 | steps  7400 | loss 0.42572319507598877 | accuracy 85.9375


Batch:   0%|          | 0/391 [00:00<?, ?it/s].64s/it].68it/s]

Epoch 18 | train_loss 0.5052313443339999 | train_acc 82.40768861892583 | val_loss 0.47312118642775297 | val_acc 83.42990728900256


Batch:  48%|████▊     | 187/391 [00:01<00:01, 116.83it/s]

epoch 19 | steps  7600 | loss 0.30616268515586853 | accuracy 88.28125


Validation: 100%|██████████| 391/391 [00:05<00:00, 70.27it/s] 

epoch 19 | steps  7800 | loss 0.5120217800140381 | accuracy 80.46875



Epoch: 100%|██████████| 20/20 [01:51<00:00,  5.42s/it].62it/s]

Epoch 19 | train_loss 0.48032914387905384 | train_acc 83.12380115089515 | val_loss 0.43226598389923115 | val_acc 85.15305306905371


Validation: 100%|██████████| 391/391 [00:13<00:00, 208.62it/s]

In [85]:
torch.save(model, "model.pth")

  "type " + obj.__name__ + ". It won't be checked "


In [86]:
net = torch.load("model.pth")

In [87]:
net

CNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (linear): Linear(in_features=1024, out_features=124, bias=True)
  (output): Linear(in_features=124, out_features=10, bias=True)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.25, inplace=False)
)

### PyTorch-Lightning Code

In [9]:
import pytorch_lightning as pl

In [10]:
PATH = "lightning_logs/version_0/checkpoints/model"

In [34]:
class CNN(pl.LightningModule):
    
    def __init__(self, in_channels=3, n_classes=10):
        super(CNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=3, padding=1) # 32x32x3 -> 16x16x16
        self.conv2 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1) # 16x16x16 -> 8x8x32
        self.conv3 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1) # 8x8x32 -> 4x4x64
        self.linear = torch.nn.Linear(in_features=4*4*64, out_features=128)
        self.output = torch.nn.Linear(in_features=128, out_features=n_classes)
        self.maxpool = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = torch.nn.Dropout(p=0.25)
        
    def forward(self, x):
        x = self.dropout(torch.nn.functional.relu(self.maxpool(self.conv1(x))))
        x = self.dropout(torch.nn.functional.relu(self.maxpool(self.conv2(x))))
        x = self.dropout(torch.nn.functional.relu(self.maxpool(self.conv3(x))))
        x = x.view(-1, 4*4*64)
        x = self.dropout(self.linear(x))
        out = self.output(x)
        return out
    
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=lr)
    
    def train_dataloader(self):
        dataset = torchvision.datasets.cifar.CIFAR10(root=root, train=True, transform=data_transform, download=True)
        loader = torch.utils.data.DataLoader(dataset=dataset,  batch_size=batch_size, shuffle=True, num_workers=num_workers)
        return loader
    
    def training_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)
        
        # loss and accuracy
        loss = torch.nn.functional.cross_entropy(outputs, labels)
        acc = accuracy(labels, torch.argmax(outputs, 1))
        log = {"loss":loss, "acc":torch.tensor(acc)}
        return {"loss":loss, "acc":torch.tensor(acc), "log":log}
    
    def training_epoch_end(self, outputs):
        avg_loss = torch.stack([x["loss"] for x in outputs]).mean()
        avg_acc = torch.stack([x["acc"] for x in outputs]).mean()
        log = {"epoch_loss":avg_loss, "epoch_acc":avg_acc}
        return {"epoch_loss":avg_loss, "epoch_acc":avg_acc, "log":log}
    
    def val_dataloader(self):
        dataset = torchvision.datasets.cifar.CIFAR10(root=root, train=False, transform=data_transform, download=True)
        loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
        return loader
    
    def validation_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)
        
        loss = torch.nn.functional.cross_entropy(outputs, labels)
        acc = accuracy(labels, torch.argmax(outputs, 1))
        
        log = {"val_loss":loss, "val_acc":torch.tensor(acc)}
        return {"val_loss":loss, "val_acc":torch.tensor(acc), "log":log}
    
    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        avg_acc = torch.stack([x["val_acc"] for x in outputs]).mean()
        
        log = {"val_loss":avg_loss, "val_acc":avg_acc}
        return {"val_loss":avg_loss, "val_acc":avg_acc, "log":log}
    
    def test_dataloader(self):
        dataset = torchvision.datasets.cifar.CIFAR10(root=root, train=False, transform=data_transform, download=True)
        loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
        return loader
    
    def test_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)
        
        loss = torch.nn.functional.cross_entropy(outputs, labels)
        acc = accuracy(labels, torch.argmax(outputs, 1))
        
        log = {"test_loss":loss, "test_acc":torch.tensor(acc)}
        return {"test_loss":loss, "test_acc":torch.tensor(acc), "log":log}
    
    def test_epoch_end(self, outputs):
        avg_loss = torch.stack([x["test_loss"] for x in outputs]).mean()
        avg_acc = torch.stack([x["test_acc"] for x in outputs]).mean()
        
        log = {"test_loss":avg_loss, "test_acc":avg_acc}
        return {"test_loss":avg_loss, "test_acc":avg_acc, "log":log}
    
        

##### Callbacks

In [35]:
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint

In [36]:
early_stopping = EarlyStopping(monitor="val_loss", min_delta=0.01, patience=3)
model_checkpoint = ModelCheckpoint(filepath="lightning_logs/", monitor="val_loss")



##### Create model and trainer

In [37]:
# create pl trainer object 
trainer = pl.Trainer(
    gpus=[0],
    weights_summary=None,
    show_progress_bar=True,
    max_epochs=10,
#     callbacks=[early_stopping, model_checkpoint]
    
)

GPU available: True, used: True
No environment variable for node rank defined. Set as 0.
CUDA_VISIBLE_DEVICES: [0]


In [38]:
model = CNN()

In [39]:
trainer.fit(model)

Files already downloaded and verified
Files already downloaded and verified      
Files already downloaded and verified
Epoch 1:  83%|████████▎ | 782/939 [00:04<00:00, 174.87it/s, loss=1.426, v_num=0]
Epoch 1:  84%|████████▎ | 785/939 [00:04<00:00, 172.66it/s, loss=1.426, v_num=0]
Epoch 1:  88%|████████▊ | 828/939 [00:04<00:00, 178.16it/s, loss=1.426, v_num=0]
Epoch 1:  94%|█████████▎| 878/939 [00:04<00:00, 184.92it/s, loss=1.426, v_num=0]
Epoch 1:  99%|█████████▉| 928/939 [00:04<00:00, 191.00it/s, loss=1.426, v_num=0]
Epoch 1: 100%|██████████| 939/939 [00:04<00:00, 191.35it/s, loss=1.426, v_num=0]
Epoch 2:   0%|          | 0/939 [00:00<?, ?it/s, loss=1.426, v_num=0]           



Epoch 2:  83%|████████▎ | 782/939 [00:04<00:00, 169.64it/s, loss=1.282, v_num=0]
Validating: 0it [00:00, ?it/s][A
Epoch 2:  85%|████████▌ | 800/939 [00:04<00:00, 169.49it/s, loss=1.282, v_num=0]
Epoch 2:  91%|█████████ | 851/939 [00:04<00:00, 176.46it/s, loss=1.282, v_num=0]
Epoch 2:  96%|█████████▌| 902/939 [00:04<00:00, 182.59it/s, loss=1.282, v_num=0]
Epoch 2: 100%|██████████| 939/939 [00:05<00:00, 185.90it/s, loss=1.282, v_num=0]
Epoch 3:  83%|████████▎ | 782/939 [00:04<00:00, 166.02it/s, loss=1.255, v_num=0]
Validating: 0it [00:00, ?it/s][A
Epoch 3:  87%|████████▋ | 816/939 [00:04<00:00, 167.26it/s, loss=1.255, v_num=0]
Epoch 3:  92%|█████████▏| 867/939 [00:05<00:00, 173.35it/s, loss=1.255, v_num=0]
Validating:  57%|█████▋    | 89/157 [00:00<00:00, 160.26it/s][A
Epoch 3: 100%|██████████| 939/939 [00:05<00:00, 180.31it/s, loss=1.255, v_num=0]
Epoch 4:  83%|████████▎ | 782/939 [00:05<00:01, 155.72it/s, loss=1.178, v_num=0]
Validating: 0it [00:00, ?it/s][A
Epoch 4:  87%|████████▋

1

In [40]:
trainer.test()

Files already downloaded and verified
Testing:  94%|█████████▎| 147/157 [00:00<00:00, 200.66it/s]--------------------------------------------------------------------------------
TEST RESULTS
{'test_acc': tensor(69.2277), 'test_loss': tensor(0.8820, device='cuda:0')}
--------------------------------------------------------------------------------
Testing: 100%|██████████| 157/157 [00:01<00:00, 140.24it/s]


In [41]:
!tensorboard --logdir=lightning_logs

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.2.2 at http://localhost:6006/ (Press CTRL+C to quit)
^C
