In [13]:
import torch
import torch.nn as nn
import torch.nn.utils as utils
import torch.nn.utils.prune as prune

In [2]:
model = nn.Sequential(
    nn.Linear(10, 128),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, 1)
)

**Model Pruning**

In [5]:
# Prune weights
for module in model.modules():
    if isinstance(module, nn.Linear):
        prune.l1_unstructured(module, name='weight', amount=0.2)
        prune.remove(module, 'weight')

In [6]:
# Or use structured pruning
prune.ln_structured(module, name='weight', amount=0.5, n=2, dim=0)

Linear(in_features=64, out_features=1, bias=True)

**Quantization**

- Post-training quantization

In [15]:
model.eval()

for m in model.modules():
    try:
        utils.remove_weight_norm(m)
    except Exception:
        pass

model_quantized = torch.quantization.quantize_dynamic(
    model, {nn.Linear}, dtype=torch.qint8
)

For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  model_quantized = torch.quantization.quantize_dynamic(
