# Import libraries


In [16]:
import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from tqdm import tqdm
from pathlib import Path
import os

In [17]:
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
)

# Load the Fashion MNIST dataset
fashion_mnist_train = datasets.FashionMNIST(
    root="./data", train=True, download=True, transform=transform
)
# Create a dataloader for the training
train_loader = torch.utils.data.DataLoader(
    fashion_mnist_train, batch_size=10, shuffle=True
)

# Load the Fashion MNIST test set
fashion_mnist_test = datasets.FashionMNIST(
    root="./data", train=False, download=True, transform=transform
)
test_loader = torch.utils.data.DataLoader(
    fashion_mnist_test, batch_size=10, shuffle=True
)

# Define the device
device = "cpu"

# Define the model


In [18]:
class FashionMNISTNet(nn.Module):
    def __init__(self):
        super(FashionMNISTNet, self).__init__()
        # First convolutional layer
        self.conv1 = nn.Conv2d(
            in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1
        )
        # Second convolutional layer
        self.conv2 = nn.Conv2d(
            in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1
        )
        # Max pooling layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        # Fully connected layers
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)  # Output layer with 10 classes for Fashion MNIST

    def forward(self, x):
        # Forward pass through first convolutional layer
        x = F.relu(self.conv1(x))
        # Max pooling
        x = self.pool(x)
        # Forward pass through second convolutional layer
        x = F.relu(self.conv2(x))
        # Max pooling
        x = self.pool(x)
        # Flatten the tensor for fully connected layers
        x = x.view(-1, 64 * 7 * 7)
        # Forward pass through fully connected layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [19]:
net = FashionMNISTNet().to(device)

# Train the Model


In [21]:
def train(train_loader, net, epochs=5, total_iterations_limit=None):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    net.to(device)

    cross_el = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

    total_iterations = 0

    for epoch in range(epochs):
        net.train()

        loss_sum = 0
        num_iterations = 0

        data_iterator = tqdm(train_loader, desc=f"Epoch {epoch+1}")
        if total_iterations_limit is not None:
            data_iterator.total = total_iterations_limit
        for data in data_iterator:
            num_iterations += 1
            total_iterations += 1
            x, y = data
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            output = net(x)
            loss = cross_el(output, y)
            loss_sum += loss.item()
            avg_loss = loss_sum / num_iterations
            data_iterator.set_postfix(loss=avg_loss)
            loss.backward()
            optimizer.step()

            if (
                total_iterations_limit is not None
                and total_iterations >= total_iterations_limit
            ):
                return


def print_size_of_model(model):
    torch.save(model.state_dict(), "temp_delme.p")
    print("Size (KB):", os.path.getsize("temp_delme.p") / 1e3)
    os.remove("temp_delme.p")


MODEL_FILENAME = "simplenet_ptq.pt"

if Path(MODEL_FILENAME).exists():
    net.load_state_dict(torch.load(MODEL_FILENAME))
    print("Loaded model from disk")
else:
    train(train_loader, net, epochs=1)
    # Save the model to disk
    torch.save(net.state_dict(), MODEL_FILENAME)

Epoch 1: 100%|██████████| 6000/6000 [00:16<00:00, 357.96it/s, loss=0.353]


# Define the testing loop


In [27]:
def test(net):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    net.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # Disable gradient computation during inference
        data_iterator = tqdm(test_loader, desc="Testing")
        for data in data_iterator:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            accuracy = 100 * correct / total
            data_iterator.set_postfix(accuracy=accuracy)

    accuracy = 100 * correct / total
    print("Accuracy on test set: {:.2f}%".format(accuracy))

# Print weights and size of the model before quantization


In [24]:
# Print the weights matrix of the model before quantization
print("Weights before quantization")
print(net.conv1.weight)
print(net.conv1.weight.dtype)

Weights before quantization
Parameter containing:
tensor([[[[-0.2513, -0.1076, -0.0871],
          [-0.1587, -0.4168, -0.1259],
          [ 0.0252,  0.1552,  0.2302]]],


        [[[ 0.2138,  0.1370, -0.0802],
          [ 0.2687, -0.2193, -0.2469],
          [ 0.0560, -0.2383,  0.1258]]],


        [[[-0.1809, -0.1614,  0.0100],
          [-0.0992, -0.0296, -0.1993],
          [-0.3367, -0.3854, -0.0999]]],


        [[[-0.3667, -0.4436, -0.3355],
          [-0.0392, -0.2447,  0.2916],
          [ 0.0837,  0.1865,  0.1145]]],


        [[[-0.3207,  0.3156, -0.1251],
          [ 0.2831,  0.0210, -0.3048],
          [-0.1081,  0.2689, -0.2202]]],


        [[[-0.3246, -0.0110,  0.3484],
          [-0.3568,  0.3324, -0.0680],
          [-0.2925,  0.3689,  0.0460]]],


        [[[-0.3064,  0.2084,  0.2716],
          [ 0.2391, -0.3100,  0.1197],
          [ 0.3175, -0.0753, -0.2631]]],


        [[[ 0.0092,  0.0445,  0.2723],
          [-0.3734, -0.0521,  0.3880],
          [-0.2184, -0.26

In [33]:
print("Size of the model before quantization")
print_size_of_model(net)

Size of the model before quantization
Size (KB): 1689.828


In [32]:
print("Accuracy of the model before quantization: ")
test(test_loader, net)

Accuracy of the model before quantization: 


Testing: 100%|██████████| 1000/1000 [00:01<00:00, 590.94it/s, accuracy=88.5]

Accuracy on test set: 88.49%



