In [1]:
import torch.nn.utils.prune as prune
from torch import nn
import torch
import torchvision.models as models

In [2]:
# Load the model architecture
trained_vgg_model = models.vgg16()

num_classes = 10

trained_vgg_model.classifier[-1] = nn.Linear(trained_vgg_model.classifier[-1].in_features, num_classes)


# Load the saved weights into the model
trained_vgg_model.load_state_dict(torch.load("../data/trained_vgg_model.pt"))

trained_vgg_model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [26]:
def prune_model(model, pruning_rate):
    for module in model.modules():
        if isinstance(module, nn.Conv2d):
            prune.l1_unstructured(module, name='weight', amount=pruning_rate)
            prune.remove(module, 'weight')  # Make the pruning permanent
    return model


In [43]:
pruning_rate = 0.3  # Define the pruning rate (e.g., 0.2 means pruning 20% of the weights)
pruned_vgg_model = prune_model(trained_vgg_model, pruning_rate)


In [44]:
torch.save(pruned_vgg_model.state_dict(), "../data/trained_vgg_model_pruned.pt")


In [45]:
# TODO test accuracy and speed

In [46]:
# Load the model architecture
trained_vgg_model = models.vgg16()

num_classes = 10

trained_vgg_model.classifier[-1] = nn.Linear(trained_vgg_model.classifier[-1].in_features, num_classes)


# Load the saved weights into the model
trained_vgg_model.load_state_dict(torch.load("../data/trained_vgg_model.pt"))

trained_vgg_model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [47]:
# Define the input data shape (batch size, channels, height, width)
input_shape = (1, 3, 224, 224)

# Generate some random input data
input_data = torch.randn(input_shape)

# Quantize the model
quantized_vgg_model = torch.quantization.quantize_dynamic(
    trained_vgg_model, {torch.nn.Conv2d}, dtype=torch.qint8
)

# Run inference on the original model
with torch.no_grad():
    output = trained_vgg_model(input_data)

# Run inference on the quantized model
with torch.no_grad():
    quantized_output = quantized_vgg_model(input_data)

# Compare the results
print("Original output shape:", output.shape)
print("Quantized output shape:", quantized_output.shape)
print("Max absolute error:", torch.max(torch.abs(output - quantized_output)))


Original output shape: torch.Size([1, 10])
Quantized output shape: torch.Size([1, 10])
Max absolute error: tensor(0.)


In [48]:
torch.save(quantized_vgg_model.state_dict(), "../data/trained_vgg_model_quantized.pt")


In [65]:
input_data = torch.randn((100, 3, 224, 224))


In [66]:
%timeit trained_vgg_model(input_data)

7.58 s ± 172 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [67]:
%timeit pruned_vgg_model(input_data)

7.43 s ± 233 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [68]:
%timeit quantized_vgg_model(input_data)

7.39 s ± 174 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [71]:
import sys

def get_model_memory_usage(model):
    total_memory = 0
    for param in model.parameters():
        param_memory = param.data.numel() * param.data.element_size()
        total_memory += param_memory
    return total_memory/1048576

trained_vgg_memory = get_model_memory_usage(trained_vgg_model)
pruned_vgg_memory = get_model_memory_usage(pruned_vgg_model)
quantized_vgg_memory = get_model_memory_usage(quantized_vgg_model)

print(f"Original VGG model memory usage: {trained_vgg_memory:.2f} MB")
print(f"Pruned VGG model memory usage: {pruned_vgg_memory:.2f} MB")


Original VGG model memory usage: 512.32 MB
Pruned VGG model memory usage: 512.32 MB


In [56]:
def get_quantized_model_memory_usage(model):
    total_memory = 0
    for name, param in model.named_parameters():
        param_memory = 0
        if "weight" in name and hasattr(param, "q_per_channel_scales"):
            # Quantized parameter
            quantized_bits = 8  # Assuming qint8 quantization
            param_memory = param.data.numel() * (quantized_bits // 8)
        else:
            # Non-quantized parameter
            param_memory = param.data.numel() * param.data.element_size()
        total_memory += param_memory
    return total_memory/1048576

quantized_vgg_memory = get_quantized_model_memory_usage(quantized_vgg_model)

print(f"Quantized VGG model memory usage: {quantized_vgg_memory:.2f} MB")


Quantized VGG model memory usage: 128.12 MB


In [57]:
def check_sparsity(model):
    total_elements = 0
    zero_elements = 0
    for param in model.parameters():
        total_elements += param.numel()
        zero_elements += (param == 0).sum().item()
    sparsity = zero_elements / total_elements * 100
    return sparsity

trained_vgg_sparsity = check_sparsity(trained_vgg_model)
pruned_vgg_sparsity = check_sparsity(pruned_vgg_model)

print(f"Original VGG model sparsity: {trained_vgg_sparsity:.2f}%")
print(f"Pruned VGG model sparsity: {pruned_vgg_sparsity:.2f}%")


Original VGG model sparsity: 0.00%
Pruned VGG model sparsity: 3.29%


In [69]:
!pip install psutil




In [83]:
import torch
import os
import psutil

def memory_usage():
    process = psutil.Process(os.getpid())
    return process.memory_info().rss

# Create input tensor
input_data = torch.randn((1, 3, 224, 224))

# Measure memory usage before inference
before_memory_usage = memory_usage()

# Run inference on the original model
with torch.no_grad():
    output = trained_vgg_model(input_data)

# Measure memory usage after inference
after_memory_usage = memory_usage()

print(f"Original VGG model memory usage during inference: {(after_memory_usage - before_memory_usage)/1048576} MB")

# Measure memory usage before inference
before_memory_usage_pruned = memory_usage()

# Run inference on the pruned model
with torch.no_grad():
    pruned_output = pruned_vgg_model(input_data)

# Measure memory usage after inference
after_memory_usage_pruned = memory_usage()

print(f"Pruned VGG model memory usage during inference: {(after_memory_usage_pruned - before_memory_usage_pruned)/1048576} MB")


Original VGG model memory usage during inference: 797.140625 MB
Pruned VGG model memory usage during inference: 529.875 MB


In [14]:
import torch
import torchvision
import onnx


# Set the model to evaluation mode
trained_vgg_model.eval()

# Define an example input tensor
dummy_input = torch.randn(1, 3, 224, 224)

# Convert the model to ONNX format
torch.onnx.export(trained_vgg_model, dummy_input, "../data/vgg.onnx", export_params=True)

verbose: False, log level: Level.ERROR



In [None]:
from flask import Flask, request, jsonify
from PIL import Image
import torchvision.transforms as transforms
import torch

app = Flask(__name__)

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

@app.route('/predict', methods=['POST'])
def predict():
    if request.files.get('image'):
        image = Image.open(request.files['image'].stream).convert('RGB')
        input_tensor = transform(image).unsqueeze(0)
        with torch.no_grad():
            output = vgg11_model(input_tensor)
        _, predicted = torch.max(output, 1)
        class_id = int(predicted.item())
        return jsonify({'class_id': class_id})

    return jsonify({'error': 'No image provided'})

if __name__ == '__main__':
    app.run()
