Validation of Quantized VIT model on validation split from Hugging face

In [8]:
# Cell 1: Imports and Modal Setup
import modal
import torch
import pandas as pd
import numpy as np
from PIL import Image
import io
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import onnxruntime as ort
import base64

# Modal setup
stub = modal.App("vit-fairface-validation")

# Define the image for Modal
image = modal.Image.debian_slim().pip_install(
    "torch",
    "torchvision",
    "pandas",
    "pillow",
    "tqdm",
    "pyarrow",
    "onnxruntime-gpu"
)

# Create volumes
data_volume = modal.Volume.from_name("fairface-data")
output_volume = modal.Volume.from_name("vit-validation-output", create_if_missing=True)

In [9]:
# Cell 2: Dataset Class
class FairFaceDataset(Dataset):
    def __init__(self, parquet_file, transform=None):
        self.data = pd.read_parquet(parquet_file)
        self.transform = transform or transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        image = Image.open(io.BytesIO(row['image']['bytes'])).convert('RGB')
        label = row['race']
        if self.transform:
            image = self.transform(image)
        return image, label

In [10]:
# Cell 3: Validation Function
@stub.function(
    image=image,
    gpu="A100",
    volumes={
        "/root/data": data_volume,
        "/root/output": output_volume
    },
    timeout=14400
)
def validate_quantized_model(batch_size=32, model_bytes=None):
    import onnxruntime as ort
    
    # Class mapping for FairFace
    fairface_classes = [
        "White", "Black", "Latino_Hispanic", "East Asian",
        "Southeast Asian", "Indian", "Middle Eastern"
    ]
    
    # Create ONNX Runtime session from bytes
    session = ort.InferenceSession(
        model_bytes,
        providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
    )
    
    # Create dataset and dataloader
    val_dataset = FairFaceDataset("/root/data/validation.parquet")
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # Initialize metrics
    correct = 0
    total = 0
    all_predictions = []
    
    # Validation loop
# Validation loop
    for images, labels in tqdm(val_loader):
        images_np = images.numpy()
        outputs = session.run(
            None,
            {'pixel_values': images_np}
        )[0]
        probs = torch.softmax(torch.tensor(outputs), dim=1)
        top5_probs, top5_preds = probs.topk(5, dim=1)
        top1_pred = top5_preds[:, 0]
        top1_prob = top5_probs[:, 0]
        _, predicted = probs.max(1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        for i in range(len(labels)):
            all_predictions.append({
                'true_label': fairface_classes[labels[i].item()],
                'predicted_label': fairface_classes[predicted[i].item()],
                'confidence': probs[i][predicted[i]].item(),
                'top1_pred': fairface_classes[top1_pred[i].item()],
                'top1_prob': top1_prob[i].item(),
                'top5_preds': [fairface_classes[idx] for idx in top5_preds[i].cpu().numpy()],
                'top5_probs': top5_probs[i].cpu().numpy().tolist()
            })
    # Calculate accuracy
    accuracy = 100. * correct / total
    
    # Save results
    results_df = pd.DataFrame(all_predictions)
    results_df.to_csv("/root/output/quantized_vit_validation_results.csv", index=False)
    
    return accuracy, results_df

In [11]:
# Cell 4: Local Model Loading and Validation
def run_validation():
    # Load the quantized ONNX model locally
    with open("vit_modal-quantized-new.onnx", "rb") as f:  # Update this path to your local model path
        model_bytes = f.read()
    
    # Run validation on Modal
    with stub.run():
        accuracy, results = validate_quantized_model.remote(
            batch_size=32,
            model_bytes=model_bytes
        )
        print(f"Validation Accuracy: {accuracy:.2f}%")
        
        # Download results from Modal

        
        return accuracy

In [12]:
# Cell 5: Run Validation
accuracy = run_validation()
print(f"Validation Accuracy: {accuracy:.2f}%")


Validation Accuracy: 68.00%
Validation Accuracy: 68.00%


Validation of SWIN quantized INT 8 model on same Validation set from hugging face

In [13]:
import modal
import torch
import pandas as pd
import numpy as np
from PIL import Image
import io
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import onnxruntime as ort

stub = modal.App("swin-onnx-validation")

image = modal.Image.debian_slim().pip_install(
    "torch",
    "torchvision",
    "pandas",
    "pillow",
    "pyarrow",
    "onnxruntime-gpu"
)

data_volume = modal.Volume.from_name("fairface-data")
model_volume = modal.Volume.from_name("vit-quantization-volume")
output_volume = modal.Volume.from_name("swin-validation-output", create_if_missing=True)

class FairFaceDataset(Dataset):
    def __init__(self, parquet_file, transform=None):
        self.data = pd.read_parquet(parquet_file)
        self.transform = transform or transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        image = Image.open(io.BytesIO(row['image']['bytes'])).convert('RGB')
        label = row['race']  # Adjust if your label column is different
        if self.transform:
            image = self.transform(image)
        return image, label

@stub.function(
    image=image,
    gpu="T4",
    timeout=1800,
    volumes={
        "/data": data_volume,
        "/models": model_volume,
        "/output": output_volume
    }
)
def validate_swin_onnx(
    onnx_model_path="/models/swin_fairface_best_int8.onnx",
    parquet_path="/data/validation.parquet",
    output_csv="/output/swin_val_predictions.csv",
    batch_size=32
):
    fairface_classes = [
        "White", "Black", "Latino_Hispanic", "East Asian",
        "Southeast Asian", "Indian", "Middle Eastern"
    ]
    session = ort.InferenceSession(
        onnx_model_path,
        providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
    )
    input_name = session.get_inputs()[0].name
    output_name = session.get_outputs()[0].name
    val_dataset = FairFaceDataset(parquet_path)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    correct = 0
    total = 0
    all_predictions = []
    for images, labels in val_loader:
        images_np = images.numpy()
        outputs = session.run([output_name], {input_name: images_np})[0]
        probs = torch.softmax(torch.tensor(outputs), dim=1)
        top5_probs, top5_preds = probs.topk(5, dim=1)
        top1_pred = top5_preds[:, 0]
        top1_prob = top5_probs[:, 0]
        _, predicted = probs.max(1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        for i in range(len(labels)):
            all_predictions.append({
                'true_label': fairface_classes[labels[i].item()],
                'predicted_label': fairface_classes[predicted[i].item()],
                'confidence': probs[i][predicted[i]].item(),
                'top1_pred': fairface_classes[top1_pred[i].item()],
                'top1_prob': top1_prob[i].item(),
                'top5_preds': [fairface_classes[idx] for idx in top5_preds[i].cpu().numpy()],
                'top5_probs': top5_probs[i].cpu().numpy().tolist()
            })
    accuracy = 100. * correct / total
    print(f"Validation Accuracy: {accuracy:.2f}%")
    results_df = pd.DataFrame(all_predictions)
    results_df.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")
    return accuracy

# Run validation
with stub.run():
    acc = validate_swin_onnx.remote()
    print(f"Final Validation Accuracy: {acc:.2f}%")

Final Validation Accuracy: 70.88%
