<a href="https://colab.research.google.com/github/patimus-prime/ML_notebooks/blob/master/MNIST_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install --quiet pytorch-lightning datetime # torchvision pandas # not sure if those latter 2 necessary

[K     |████████████████████████████████| 706 kB 7.4 MB/s 
[K     |████████████████████████████████| 52 kB 717 kB/s 
[K     |████████████████████████████████| 419 kB 47.9 MB/s 
[K     |████████████████████████████████| 5.9 MB 50.9 MB/s 
[K     |████████████████████████████████| 251 kB 67.3 MB/s 
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.8.2+zzzcolab20220719082949 requires tensorboard<2.9,>=2.8, but you have tensorboard 2.10.0 which is incompatible.[0m
[?25h

In [2]:
import os
import pandas as pd
import seaborn as sns
import torch

from torch import optim, nn, utils, Tensor

# from IPython.core.display import display
from IPython import display
import pytorch_lightning as pl
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms 
from torchvision.transforms import ToTensor
from torchvision.datasets import MNIST 
import datetime as dt

# funky path stuff
PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")

In [3]:
# by default MNIST is 28x28; let's use some transforms, reduce computation on this puny EC2
newSize = 16
newDims = newSize*newSize

transform_resize_totensor = transforms.Compose([
    # transforms.Resize((newSize, newSize)),
    transforms.ToTensor() # ALL DATA MUST BE TENSORS
])

In [4]:
# now load MNIST data :)
# it is already split, so we can immediately define rather than using a ratio, which is an option below:
dataTrain = MNIST(root='./data', # dir created if don't exist
                  train=True,
                  download=True, # takes like 1.5 secs lol
                  transform=transform_resize_totensor)

dataTest = MNIST(root='./data', # dir created if don't exist
                  train=False,
                  download=True, # takes like 1.5 secs lol
                  transform=transform_resize_totensor)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [17]:
# now define our lightning class, which contains all methods to be employed
# during modeling this dataset of practicing MNIST

# gonna define some args in the __init__ method
# these are then passed in as qualities of the class
# one could also hard code these in and declare as qualities,
# not define in the init thing... but less adaptable that way

# ez way: hardcode some in and then pass upon function call to override

class cnnMNIST(pl.LightningModule):
    def __init__(self,
                 # dataset specific; these are defaults, overriden if passed in:
                 nClasses=10,  # 10 digits
                 dataDir=PATH_DATASETS,  # cray, see above
                 hiddenLayerSize=64,  # arbitrary
                 learningRate=2e-4,
                 ):
        super().__init__()
        
        # set all the shit that gets passed in
        self.save_hyperparameters()
        nClasses = 10, # 10 digits
        nSize = 28,
        nDims = 28*28, # could also split this into channel param, width param etc.
        inChannels = 1, # may be inappropriate for more complex models, require an array etc.
        outChannels = 1,
        kernelSize = 5, # one would determine this, along with dims, beforehand.
        learningRate = 2e-4
                 
        # alternative usage of this hyperparam method:
        
        # self.save_hyperparameters('nClasses') 
        # self.nDims = nDims
        
        # this way can specify what args are hparams (can pass no args to save all)
        # or
        # self.save_hyperparameters(ignore='arg2'); # here 'trainValRatio'
        # from this, ctrl+f 'save_h': https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html
        
        # ----------------------
        # Layer declaration; some implementations do this above outside class definition
        # all options: https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
        # params like kernel and padding sizes taken from IBM Deep Learning course on Coursera
        
        # sequential used; alternative module_list; but this way the order defined = order executed
        # https://pytorch.org/docs/stable/generated/torch.nn.Sequential.html
        
        out1=newSize
        out2=newSize*2 #FIXME: add justification pls
        
        self.CNNmodel = nn.Sequential(
            nn.Conv2d(in_channels=1,
                      out_channels=out1,
                      kernel_size=5,
                      padding=2,
                      ),
            nn.MaxPool2d(kernel_size=2),
            nn.ReLU(), # why not; also looked up and relu can go before or after maxpool
                        # why relu6: https://paperswithcode.com/method/relu6
                        # paper: https://arxiv.org/abs/1704.04861v1
            nn.Conv2d(in_channels=out1, # output of layer 1 -> into layer2
                      out_channels=out2, 
                      kernel_size=5,
                      stride=1,
                      padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.ReLU(),
            # final output layer:
            nn.Linear(out2*4*4,nClasses) # again these numbers need to get justified to the pat
            
        )
    def forward(self, x):
        x = self.CNNmodel(x)  # for ea x passed in, get model output
        # could we not put this at the end of declarations above?
        return F.log_softmax(x, dim=1)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = F.cross_entropy(y_hat, y)
        # 2 syntaxes, same tensorboard:
        self.log('train_loss', loss, prog_bar=True)
        return loss
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)  # again, implicit self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log('test_loss', loss, prog_bar=True)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(
            self.parameters(), lr=2e-2)
        return optimizer

    def train_dataloader(self):
        return DataLoader(dataTrain)  # , batch_size=BATCH_SIZE)

    def test_dataloader(self):
        return DataLoader(dataTest)



In [18]:
model = cnnMNIST() #therefore using defaults in class defined above
start = dt.datetime.now()
print('Training started at', start)

trainer = Trainer(
                  accelerator="auto",
                  # devices=1 if torch.cuda.is_available() else None,
                  auto_lr_find=True,
                  auto_scale_batch_size=True,
                  max_epochs=3, # default 1000, so; for the sake of development, just 3
                # #   a way to specify refresh rate 
                #   callbacks=[TQDMProgressBar(refresh_rate=20)],
                  )
trainer.fit(model)

print('Training duration:', dt.datetime.now() - start)


TypeError: ignored

In [None]:
# Tensorboard call:
%reload_ext tensorboard
%tensorboard --logdir lightning_logs/