In [2]:
# Core imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from efficientnet_pytorch import EfficientNet
from tqdm import tqdm

# QPyTorch imports
from qtorch.quant import Quantizer, quantizer
from qtorch import FixedPoint, BlockFloatingPoint, FloatingPoint
from qtorch.optim import OptimLP
from qtorch.auto_low import lower

# Try importing Posit from forked QPyTorch
try:
    from qtorch import Posit
    HAS_POSIT = True
    print(" Posit support found in QPyTorch")
except ImportError:
    HAS_POSIT = False
    print(" No Posit type found — falling back to FP8-like format")

# Config
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_NAME = "efficientnet-b0"
BATCH_SIZE = 64
LR = 0.05
MOMENTUM = 0.9
WEIGHT_DECAY = 5e-4
EPOCHS = 10
DATA_PATH = "./data"

torch.manual_seed(42)
if DEVICE.type == "cuda":
    torch.cuda.manual_seed_all(42)

print(f"Using device: {DEVICE}")


✅ Posit support found in QPyTorch
Using device: cuda


In [25]:
# ImageNet normalization
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

train_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normalize,
])
val_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    normalize,
])

# CIFAR100 for demo
train_set = datasets.CIFAR10(root=DATA_PATH, train=True, download=True, transform=train_tf)
val_set   = datasets.CIFAR10(root=DATA_PATH, train=False, download=True, transform=val_tf)

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True,
                          num_workers=4, pin_memory=True, persistent_workers=True)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False,
                        num_workers=4, pin_memory=True, persistent_workers=True)

NUM_CLASSES = 10
print(f"Train samples: {len(train_set)}, Val samples: {len(val_set)}")


Files already downloaded and verified
Files already downloaded and verified
Train samples: 50000, Val samples: 10000


In [26]:
# Define number formats
def make_quant_numbers():
    if HAS_POSIT:
        W_NUM   = Posit(8, 1)
        A_NUM   = Posit(8, 1)
        WG_NUM  = Posit(8, 1)
        AG_NUM  = Posit(8, 1)
    else:
        W_NUM   = FloatingPoint(exp=3, man=4)
        A_NUM   = FloatingPoint(exp=3, man=4)
        WG_NUM  = FloatingPoint(exp=5, man=10)
        AG_NUM  = FloatingPoint(exp=5, man=10)
    return W_NUM, A_NUM, WG_NUM, AG_NUM

# Apply quantization to conv and linear layers
def quantize_model(net):
    # Example: both weights and activations quantized to 8 bits
    W_NUM, A_NUM, WG_NUM, AG_NUM = make_quant_numbers()

    qnet = lower(
        net,
        layer_types=["conv", "linear"],   # layers to quantize
        forward_number=W_NUM,             # number of bits for forward pass (weights + activations)
        backward_number=WG_NUM,           # number of bits for backward pass (gradients)
        forward_rounding="nearest",       # rounding method for forward pass
        backward_rounding="nearest"       # rounding method for backward pass
    )
    return qnet



In [13]:
import inspect
from qtorch.quant import quantizer  # replace with actual import path
print(inspect.signature(lower))

(model, layer_types=[], forward_number=None, backward_number=None, forward_rounding='stochastic', backward_rounding='stochastic')


In [27]:
import torch
import torch.nn as nn
from efficientnet_pytorch import EfficientNet
from qtorch import FloatingPoint
from qtorch.auto_low import lower

class QuantizedEfficientNet(nn.Module):
    def __init__(self, model_name, num_classes, pretrained=True):
        super().__init__()
        
        # Load base EfficientNet model
        if pretrained:
            base = EfficientNet.from_pretrained(model_name)
            print(f"Loaded pretrained {model_name}")
        else:
            base = EfficientNet.from_name(model_name)
            print(f"Created {model_name} from scratch")
        
        # Check parameters before modification
        print(f"Parameters before classifier change: {sum(p.numel() for p in base.parameters()):,}")
        
        # Modify classifier for target number of classes
        in_features = base._fc.in_features
        base._fc = nn.Linear(in_features, num_classes)
        print(f"Modified classifier: {in_features} -> {num_classes} classes")
        print(f"Parameters after classifier change: {sum(p.numel() for p in base.parameters()):,}")
        
        # Store the base model WITHOUT quantization first
        self.net = base
        print(f"Parameters after assignment to self.net: {sum(p.numel() for p in self.net.parameters()):,}")
    
    def apply_quantization_later(self):
        """Apply quantization after model is fully created"""
        print("Applying quantization...")
        print(f"Parameters before quantization: {sum(p.numel() for p in self.net.parameters()):,}")
        
        try:
            self.net = lower(
                self.net,
                layer_types=['conv', 'linear'],
                forward_number=FloatingPoint(exp=3, man=4),
                backward_number=FloatingPoint(exp=5, man=10),
                forward_rounding="nearest",
                backward_rounding="nearest"
            )
            print(f"Parameters after quantization: {sum(p.numel() for p in self.net.parameters()):,}")
        except Exception as e:
            print(f"Quantization failed: {e}")
    
    def forward(self, x):
        return self.net(x)

# Create the model WITHOUT quantization first
MODEL_NAME = 'efficientnet-b0'
NUM_CLASSES = 10
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = QuantizedEfficientNet(MODEL_NAME, NUM_CLASSES, pretrained=True)
print(f"Model parameters after creation: {sum(p.numel() for p in model.parameters()):,}")

# Move to device
model = model.to(DEVICE)
print(f"Model parameters after moving to device: {sum(p.numel() for p in model.parameters()):,}")


Loaded pretrained weights for efficientnet-b0
Loaded pretrained efficientnet-b0
Parameters before classifier change: 5,288,548
Modified classifier: 1280 -> 10 classes
Parameters after classifier change: 4,020,358
Parameters after assignment to self.net: 4,020,358
Model parameters after creation: 4,020,358
Model parameters after moving to device: 4,020,358


In [28]:
@torch.no_grad()
def validate(model, loader, device, criterion):
    model.eval()
    total_loss, correct, total = 0.0, 0, 0
    for inputs, targets in tqdm(loader, desc="Val", leave=False):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        total_loss += loss.item() * targets.size(0)
        correct += (outputs.argmax(1) == targets).sum().item()
        total += targets.size(0)
    return total_loss / total, 100.0 * correct / total

def train_epoch(model, loader, device, criterion, optimizer):
    model.train()
    total_loss, correct, total = 0.0, 0, 0
    for inputs, targets in tqdm(loader, desc="Train", leave=False):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad(set_to_none=True)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
        optimizer.step()
        total_loss += loss.item() * targets.size(0)
        correct += (outputs.argmax(1) == targets).sum().item()
        total += targets.size(0)
    return total_loss / total, 100.0 * correct / total


In [None]:
criterion = nn.CrossEntropyLoss()
_, _, WG_NUM, _ = make_quant_numbers()

# Create base optimizer
base_optimizer = optim.SGD(
    model.parameters(),
    lr=LR,
    momentum=MOMENTUM,
    weight_decay=WEIGHT_DECAY
)

# Create quantizers for different components
weight_quantizer = quantizer(forward_number=WG_NUM, forward_rounding="nearest")
grad_quantizer = quantizer(forward_number=WG_NUM, forward_rounding="nearest")
momentum_quantizer = quantizer(forward_number=WG_NUM, forward_rounding="nearest")

# Wrap with OptimLP with full quantization
optimizer = OptimLP(
    optim=base_optimizer,
    weight_quant=weight_quantizer,  # Quantize weights
    grad_quant=grad_quantizer,      # Quantize gradients
    momentum_quant=momentum_quantizer,  # Quantize momentum
    grad_scaling=1.0,
    acc_quant=None  # Can add accumulator quantization if needed
)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)





best_acc = 0.0
for epoch in range(1, EPOCHS + 1):
    print(f"\nEpoch {epoch}/{EPOCHS}")
    train_loss, train_acc = train_epoch(model, train_loader, DEVICE, criterion, optimizer)
    val_loss, val_acc = validate(model, val_loader, DEVICE, criterion)
    scheduler.step()

    print(f"Train  | Loss: {train_loss:.4f} | Acc: {train_acc:.2f}%")
    print(f"Val    | Loss: {val_loss:.4f} | Acc: {val_acc:.2f}%")
    print(f"LR     | {optimizer.param_groups[0]['lr']:.6f}")

    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "best_posit8_efficientnet.pth")
        print(f"✅ New best model saved with Acc: {best_acc:.2f}%")

print(f"\nTraining completed. Best Val Acc: {best_acc:.2f}%")



Epoch 1/10


Train:   8%|█████▉                                                                     | 62/782 [01:45<19:07,  1.59s/it]