In [1]:
# %%
# ===================================================================
#                      IMPORTS AND SETUP
# ===================================================================
import torch
import torchvision.models as models
import torch_tensorrt
from pathlib import Path

# --- Environment Setup for Performance ---
torch.backends.cudnn.benchmark = True

print(f"PyTorch Version: {torch.__version__}")
print(f"Torch-TensorRT Version: {torch_tensorrt.__version__}")
print(f"Is CUDA available? {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
else:
    raise RuntimeError("CUDA is not available. TensorRT compilation requires a GPU.")

PyTorch Version: 2.3.0a0+6ddf5cf85e.nv24.04
Torch-TensorRT Version: 2.3.0a0
Is CUDA available? True
CUDA device: NVIDIA GeForce RTX 3070 Ti


In [2]:
# %%
# ===================================================================
#                      USER CONFIGURATION
# ===================================================================
# Define paths as they exist INSIDE the container.
BASE_DIR = Path('/workspace')

# --- ‼️ IMPORTANT: UPDATE THIS PATH ‼️ ---
# Path to your fine-tuned and KNOWLEDGE-DISTILLED ResNet-18 model file
MODEL_PATH = BASE_DIR / 'saved_models_and_logs' / 'knowledge_distillation' / 'resnet50_to_resnet18pretrained_kd' / 'model_final.pth' 

# Path to save the optimized TensorRT models.
OPTIMIZED_MODEL_DIR = BASE_DIR / 'saved_models_and_logs' / 'kd_tensorrt'
# ===================================================================

# Create the output directory if it doesn't exist
OPTIMIZED_MODEL_DIR.mkdir(parents=True, exist_ok=True)

print(f"✅ Base Directory inside container: {BASE_DIR}")
print(f"✅ Model will be loaded from: {MODEL_PATH}")
print(f"✅ Optimized models will be saved to: {OPTIMIZED_MODEL_DIR}")

# Verify that the model file actually exists at that path
if not MODEL_PATH.exists():
    raise FileNotFoundError(f"Model file not found inside container at: {MODEL_PATH}")

✅ Base Directory inside container: /workspace
✅ Model will be loaded from: /workspace/saved_models_and_logs/knowledge_distillation/resnet50_to_resnet18pretrained_kd/model_final.pth
✅ Optimized models will be saved to: /workspace/saved_models_and_logs/kd_tensorrt


In [3]:
# %%
# ===================================================================
#                LOAD THE DISTILLED RESNET-18 MODEL
# ===================================================================

print("-> Loading the custom distilled ResNet-18 model...")

# 1. Instantiate the ResNet-18 model architecture.
#    Ensure num_classes matches the output of your distilled model.
model = models.resnet18(num_classes=1000)

# 2. Load the state dictionary from your .pth file.
state_dict = torch.load(MODEL_PATH, map_location='cpu')

# 3. Load the weights into the model.
#    This handles various ways a model checkpoint might be saved.
if 'model_state_dict' in state_dict:
    model.load_state_dict(state_dict['model_state_dict'])
elif 'state_dict' in state_dict:
    model.load_state_dict(state_dict['state_dict'])
else:
    model.load_state_dict(state_dict)

# 4. Set the model to evaluation mode and move to GPU.
model.eval()
model = model.to("cuda")

print("✅ Custom distilled ResNet-18 model loaded and moved to GPU successfully.")

-> Loading the custom distilled ResNet-18 model...
✅ Custom distilled ResNet-18 model loaded and moved to GPU successfully.


In [4]:
# %%
# ===================================================================
#             COMPILE AND SAVE TENSORRT FP32 MODEL
# ===================================================================
print("\n" + "="*50)
print("Compiling model with Torch-TensorRT (FP32)...")

try:
    # Use a representative batch size for compilation.
    # This determines the static batch size of the resulting engine.
    trt_model_fp32 = torch_tensorrt.compile(
        model,
        inputs=[torch_tensorrt.Input((32, 3, 224, 224), dtype=torch.float32)],
        enabled_precisions={torch.float32},
        workspace_size=1 << 28,  # 256MB workspace
        ir="torchscript"
    )
    print("✅ Compilation complete.")

    # Save the FP32 optimized model
    fp32_model_path = OPTIMIZED_MODEL_DIR / 'resnet18_distilled_trt_fp32.ts'
    torch.jit.save(trt_model_fp32, fp32_model_path)
    print(f"✅ FP32 optimized model saved to: {fp32_model_path}")

except Exception as e:
    print(f"✗ ERROR during FP32 compilation: {e}")

INFO:torch_tensorrt._compile:Module was provided as a torch.nn.Module, trying to script the module with torch.jit.script. In the event of a failure please preconvert your module to TorchScript



Compiling model with Torch-TensorRT (FP32)...




✅ Compilation complete.
✅ FP32 optimized model saved to: /workspace/saved_models_and_logs/kd_tensorrt/resnet18_distilled_trt_fp32.ts


In [5]:
# %%
# ===================================================================
#             COMPILE AND SAVE TENSORRT FP16 MODEL
# ===================================================================
print("\n" + "="*50)
print("Compiling model with Torch-TensorRT (FP16)...")

try:
    # Compile for FP16 precision
    trt_model_fp16 = torch_tensorrt.compile(
        model,
        inputs=[torch_tensorrt.Input((32, 3, 224, 224), dtype=torch.float16)],
        enabled_precisions={torch.float16},
        workspace_size=1 << 28,  # 256MB workspace
        ir="torchscript"
    )
    print("✅ Compilation complete.")

    # Save the FP16 optimized model
    fp16_model_path = OPTIMIZED_MODEL_DIR / 'resnet18_distilled_trt_fp16.ts'
    torch.jit.save(trt_model_fp16, fp16_model_path)
    print(f"✅ FP16 optimized model saved to: {fp16_model_path}")

except Exception as e:
    print(f"✗ ERROR during FP16 compilation: {e}")

print("\n🎉 All tasks complete.")

INFO:torch_tensorrt._compile:Module was provided as a torch.nn.Module, trying to script the module with torch.jit.script. In the event of a failure please preconvert your module to TorchScript



Compiling model with Torch-TensorRT (FP16)...
✅ Compilation complete.
✅ FP16 optimized model saved to: /workspace/saved_models_and_logs/kd_tensorrt/resnet18_distilled_trt_fp16.ts

🎉 All tasks complete.


The compiler is going to use the user setting Half
This conflict may cause an error at runtime due to partial compilation being enabled and therefore
compatibility with PyTorch's data type convention is required.
If you do indeed see errors at runtime either:
- Remove the dtype spec for x.1
- Disable partial compilation by setting require_full_compilation to True
