* Let's use the Jupyter magic function to create a `.py` file for creating DataLoaders. 
* We can save a code cell's contents to a file using the Jupyter magic `%%writefile filename` - https://ipython.readthedocs.io/en/stable/interactive/magics.html

In [1]:
%%writefile going_modular/data_setup.py 
"""
Contains functionality for creating PyTorch DataLoader's for image classification data. 
"""
import os 
from torchvision import datasets, transforms 
from torch.utils.data import DataLoader 

NUM_WORKERS = 0  # os.cpu_count() 

def create_dataloaders(train_dir, test_dir, transform, batch_size): 
    """Create training and testing DataLoader 
    
    Takes in a training directory and testing directory path 
    and turns them into PyTorch Datasets and then into PyTorch DataLoaders 
    
    Args: 
        train_dir: train 데이터 폴더의 경로 
        test_dir: test 데이터 폴더의 경로 
        transform: 데이터에 적용될 transform 
        batch_size: 배치 사이즈 
        num_workers: DataLoader에서 사용할 cpu 개수 
        
    Return: 
        (train_dataloader, test_dataloader, class_names)가 튜플 형태로 반환 
        class_names는 target_class의 이름이 저장된 리스트  
    """
    
    train_data = datasets.ImageFolder(train_dir, transform) 
    test_data = datasets.ImageFolder(test_dir, transform) 
    
    class_names = train_data.classes 
    
    train_dataloader = DataLoader(
        train_data, 
        batch_size=batch_size, 
        shuffle=True, 
        num_workers=NUM_WORKERS
    )
    
    test_dataloader = DataLoader(
        test_data, 
        batch_size=batch_size, 
        shuffle=False, 
        num_workers=NUM_WORKERS
    )
    
    return train_dataloader, test_dataloader, class_names

Overwriting going_modular/data_setup.py


In [2]:
%%writefile going_modular/model_builder.py 
"""
Contains PyTorch model code to instantiate a TinyVGG model from the CNN Explainer website. 
"""
import torch 
from torch import nn 

class TinyVGG(nn.Module): 
    """Creates the TinyVGG architecture. 
    
    Replicates the TinyVGG architectoure from CNN explainer website in PyTorch. 
    See the original architecture here: https://poloclub.github.io/cnn-explainer/ 
    
    Args: 
        input_shape: 입력 채널 수 
        hidden_units: 중간 layer의 채널 수
        output_shape: 출력 벡터의 차원 수 (=target class의 수) 
    
    """
    def __init__(self, input_shape, hidden_unit, output_shape): 
        super().__init__() 
        self.layer_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape, out_channels=hidden_unit, kernel_size=3, stride=1, padding=0), 
            nn.ReLU(), 
            nn.Conv2d(in_channels=hidden_unit, out_channels=hidden_unit, kernel_size=3, stride=1, padding=0), 
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size=2, stride=2) 
        ) 
        self.layer_2 = nn.Sequential(
            nn.Conv2d(in_channels=hidden_unit, out_channels=hidden_unit, kernel_size=3, stride=1, padding=0), 
            nn.ReLU(), 
            nn.Conv2d(in_channels=hidden_unit, out_channels=hidden_unit, kernel_size=3, stride=1, padding=0), 
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size=2, stride=2) 
        )
        self.classifier = nn.Sequential(
            nn.Flatten(), 
            nn.Linear(in_features=hidden_unit*13*13, out_features=output_shape) 
        )
        
    def forward(self, x): 
        x = self.layer_1(x) 
        # print(x.shape)
        x = self.layer_2(x) 
        # print(x.shape)
        x = self.classifier(x) 
        return x

Overwriting going_modular/model_builder.py


In [3]:
%%writefile going_modular/engine.py 
"""
Contains functions fo training and testing a PyTorch model 
"""
import torch 

def train_step(model, dataloader, loss_fn, optimizer, device): 
    """Trains a PyTorch model for a single epoch. 
    
    Turns a target PyTorch model to training mode and then runs through all of the required training steps 
    (forward pass, loss calculation, optimizer step) 
    
    Args: 
        model: A PyTorch model to be trained. 
        dataloader: A DataLoader instance for the model to be trained on. 
        loss_fn: A PyTorch loss function to minimize. 
        optimizer: A PyTorch optimizer to help minimize the loss function. 
        device: A target device to compute on 
    
    Returns: 
        A tuple of training loss and training accuracy metrics. 
        In the form (train_loss, train_accuracy)  
    """
    model.train() 
    train_loss, train_acc = 0, 0 
    for batch, (X, y) in enumerate(dataloader): 
        X, y = X.to(device), y.to(device) 
        
        y_pred = model(X) 
        
        loss = loss_fn(y_pred, y) 
        train_loss += loss.item() 
        
        optimizer.zero_grad() 
        loss.backward() 
        optimizer.step() 
        
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=-1), dim=-1) 
        train_acc += (y_pred_class == y).sum().item() / len(X) 
        
    train_loss = train_loss / len(dataloader) 
    train_acc = train_acc / len(dataloader) 
    
    return train_loss, train_acc 


def test_step(model, dataloader, loss_fn, device): 
    """Tests a PyTorch model for a single epoch. 
    
    Turns a target PyTorch model to "eval" mode and then performs a forward pass on a testing dataset.  
    
    Args: 
        model: A PyTorch model to be tested. 
        dataloader: A DataLoader instance for the model to be tested on. 
        loss_fn: A PyTorch loss function to calculate loss on the test data. 
        device: A target device to compute on 
    
    Returns: 
        A tuple of testing loss and testing accuracy metrics. 
        In the form (test_loss, test_accuracy)    
    """
    model.eval() 
    test_loss, test_acc = 0, 0 
    with torch.inference_mode(): 
        for batch, (X, y) in enumerate(dataloader): 
            X, y = X.to(device), y.to(device) 
            
            test_pred_logits = model(X) 
            
            loss = loss_fn(test_pred_logits, y) 
            test_loss += loss.item() 
            
            test_pred_labels = test_pred_logits.argmax(dim=-1) 
            test_acc += ((test_pred_labels == y).sum().item() / len(test_pred_labels)) 
            
        test_loss = test_loss / len(dataloader) 
        test_acc = test_acc / len(dataloader) 
        
    return test_loss, test_acc 


def train(model, train_dataloader, test_dataloader, optimizer, loss_fn, epochs, device): 
    """Trains and tests a PyTorch model.  
    
    Passes a target PyTorch models through train_step() and test_step() functions for a number of epochs, 
    training and testing the model in the same epoch loop. 
    
    Callculates, prints and stores evaluation metrics throughout. 
    
    Args: 
        model: A PyTorch model to be trained and tested. 
        train_dataloader: A DataLoader instance for the model to be trained on. 
        test_dataloader: A DataLoader instance for the model to be tested on. 
        optimizer: A PyTorch optimizer to help minimize the loss function. 
        loss_fn: A PyTorch loss function to calculate loss on both datasets. 
        epochs: An integer indicating how many epochs to train for. 
        device: A target device to compute on 
        
    Returns: 
        A dictionary of training and testing loss as well as training and testing accuracy metrics. 
        Each metric has a value in a list for each epoch. 
    """
    results = {"train_loss": [], "train_acc": [], "test_loss": [], "test_acc": []} 
    
    for epoch in range(epochs): 
        train_loss, train_acc = train_step(model, train_dataloader, loss_fn, optimizer, device) 
        test_loss, test_acc = test_step(model, test_dataloader, loss_fn, device) 
        
        print(f"Epoch: {epoch+1} | "
              f"train_loss: {train_loss:.4f} | " 
              f"train_acc: {train_acc:.4f} | "
              f"test_loss: {test_loss:.4f} | "
              f"test_acc: {test_acc:.4f}") 
        
        results["train_loss"].append(train_loss) 
        results["train_acc"].append(train_acc) 
        results["test_loss"].append(test_loss) 
        results["test_acc"].append(test_acc)  
        
    return results 
    

Overwriting going_modular/engine.py


In [4]:
%%writefile going_modular/utils.py 
""" 
Contains various utility functions for PyTorch model training and saving. 
"""
import torch 
from pathlib import Path 

def save_model(model, target_dir, model_name): 
    """Saves a PyTorchj model to a target directory. 
    
    Args: 
        model: A target PyTorch model to. 
        target_dir: A directory for saving the model to. 
        model_name; A filename for the saved model. Should include either ".pth" or ".pt" as the file extension.  
    """
    target_dir_path = Path(target_dir) 
    target_dir_path.mkdir(parents=True, exist_ok=True) 
    
    assert model_name.endswith(".pth") or model_name.endswith(".pt"), "model_name should end with '.pt' or 'pth'"
    model_save_path = target_dir / Path(model_name) 
    
    print(f"[INfO] Saving model to: {model_save_path}") 
    torch.save(obj=model.state_dict(), f=model_save_path) 

Overwriting going_modular/utils.py


In [5]:
%%writefile train.py 
""" 
Trains a PyTorch image classification model using device-agnostic code. 
"""
import os 
import torch 
from going_modular import data_setup, engine, model_builder, utils 

from torchvision import transforms 


torch.manual_seed(42) 
torch.cuda.manual_seed(42) 

# Setup hyperparameters 
save_model_name = "05_going_modular_script_mode_tinyvgg_model.pth"

NUM_EPOCHS = 5 
BATCH_SIZE = 32 
HIDDEN_UNITS = 10 
LEARNING_RATE = 0.001 

train_dir = "data/pizza_steak_sushi/train" 
test_dir = "data/pizza_steak_sushi/test" 

device = "cuda" if torch.cuda.is_available() else "cpu" 

data_transform = transforms.Compose([
    transforms.Resize((64, 64)), 
    transforms.ToTensor() 
])

train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir, test_dir, data_transform, BATCH_SIZE) 

model = model_builder.TinyVGG(input_shape=3, hidden_unit=10, output_shape=len(class_names)).to(device) 

loss_fn = torch.nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) 

engine.train(model=model, 
             train_dataloader=train_dataloader, 
             test_dataloader=test_dataloader, 
             loss_fn=loss_fn, 
             optimizer=optimizer, 
             epochs=NUM_EPOCHS, 
             device=device) 

utils.save_model(model=model, target_dir="saved_models", model_name=save_model_name) 

Overwriting train.py


In [6]:
!python train.py

Epoch: 1 | train_loss: 1.1063 | train_acc: 0.3047 | test_loss: 1.0983 | test_acc: 0.3011
Epoch: 2 | train_loss: 1.0998 | train_acc: 0.3281 | test_loss: 1.0697 | test_acc: 0.5417
Epoch: 3 | train_loss: 1.0869 | train_acc: 0.4883 | test_loss: 1.0808 | test_acc: 0.4924
Epoch: 4 | train_loss: 1.0842 | train_acc: 0.3984 | test_loss: 1.0609 | test_acc: 0.5833
Epoch: 5 | train_loss: 1.0662 | train_acc: 0.4141 | test_loss: 1.0655 | test_acc: 0.5644
[INfO] Saving model to: saved_models\05_going_modular_script_mode_tinyvgg_model.pth
