# MNIST Example with Data Logging in DataFed


## Import Libraries


In [1]:
import os  
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from m3util.util.IO import make_folder 
import random
import numpy as np
import matplotlib.pyplot as plt


sys.path.append(os.path.abspath("/home/jg3837/DataFed_TorchFlow/DataFed_TorchFlow/src"))
from datafed_torchflow.pytorch import TorchLogger


## Paramters to Update


## Builds the CNN


In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output


## Training function

This function calls TorchLogger.save, which does the following:

1. Saves the model checkpoint
1. Identifies the approprate metadata for the model (including DataFed provenance dependencies)
1. Identifies and navigates to the approprate DataFed project and collection
1. Creates a DataFed data record with this metadata
1. Saves the model weights file or, gets the local zip file the user specified instead in order to upload multiple files to the same DataFed data record
1. Uploads the zip file to the DataFed data record generated in the previous steps


In [None]:
def train(
    model,
    device,
    train_loader,
    optimizer,
    epoch,
    base_local_file_name,
    local_vars,
):
    make_folder(base_local_file_name)  # ensure the path exists to save the weights

    model.train()  # Set the model to training mode
    
    total_loss = 0
    correct = 0
        
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()  # Zero the gradients

        # Forward pass
        output = model(data)
        
        loss = F.nll_loss(output, target)

        # Backward pass and optimization
        loss.backward()
        
        
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = torch.max(output, 1)
        correct += (predicted == target).sum().item()
        
        if batch_idx % 100 == 0:
            print(
                f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} "
                f"({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}"
            )

    avg_loss = total_loss / len(train_loader)
    accuracy = 100.* correct / len(train_loader.dataset)
    print(f"Train Epoch: {epoch} [ Train Loss: {avg_loss:.4f}, Train Accuracy: {accuracy:.4f}%]")

    
    file_name = f"MNSIT_epoch_{epoch}_loss_{loss.item():.4e}"
    local_file_path = f"{base_local_file_name}/{file_name}.pkl"

    torchlogger.save(
        file_name,
        epoch=epoch,
        #training_loss=loss.item(),
        local_file_path=local_file_path,
        local_vars=local_vars,
        model_hyperparameters={"learning_rate": learning_rate},
    )


## Testing function

In [None]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


## set seed and device

In [None]:
torch.manual_seed(42)

device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")


## Define transformations for data preprocessing


In [None]:
transform = transforms.Compose(
    [
        transforms.ToTensor(),  # Convert images to PyTorch tensors
        transforms.Normalize(
            (0.1307,), (0.3081,)
        ),  # Normalize with mean and std of MNIST dataset
    ]
)

## Load the MNIST dataset

In [None]:
train_dataset = datasets.MNIST(
    root="./data", train=True, download=True, transform=transform
)
test_dataset = datasets.MNIST(
    root="./data", train=False, download=True, transform=transform
)

# Create data loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True,num_workers=1, pin_memory=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=1000, shuffle=False ,num_workers=1, pin_memory=True)


# Define the model and optimizer

In [None]:
model = Net().to(device)
learning_rate = 0.1
optimizer = optim.Adadelta(model.parameters(), lr=learning_rate)

## Instantiate the DataFed TorchLogger

In [None]:
suffix = "111424"
notebook_path = (
    "./PytorchModelLogger.ipynb"
)

model_dict = {"model": Net(), "optimizer": optimizer}

torchlogger = TorchLogger(
    model_dict=model_dict,
    DataFed_path=f"2024_test_pytorch/delete_me/{suffix}",
    script_path=notebook_path,
    input_data_shape=train_dataset[0][0].shape,
    dataset_id_or_path= [file.path for file in os.scandir("./data/MNIST/raw")],
    local_model_path=f"examples/model/{suffix}",
    logging=True
)

Unable to connect to pypi: <Fault -32500: 'RuntimeError: PyPI no longer supports the XMLRPC package_releases method. Use JSON or Simple API instead. See https://warehouse.pypa.io/api-reference/xml-rpc.html#deprecated-methods for more information.'>


## Train the model

In [None]:
n_epochs = 5

for epoch in range(1, n_epochs + 1):
    local_vars = locals()
   
    train(
        model=model,
        device=device,
        train_loader=train_loader,
        optimizer=optimizer,
        epoch=epoch,
        base_local_file_name="model/111324/weights",
        local_vars=list(local_vars.items()),
    )
    test(model, device, test_loader)


Train Epoch: 1 [ Train Loss: 0.3413, Train Accuracy: 89.5717%]

Test set: Average loss: 0.0846, Accuracy: 9732/10000 (97%)

Train Epoch: 2 [ Train Loss: 0.1176, Train Accuracy: 96.4933%]

Test set: Average loss: 0.0532, Accuracy: 9821/10000 (98%)

