In [None]:
!pip install fastapi uvicorn onnxruntime-gpu onnxruntime pyngrok requests numpy




In [None]:
!pip install python-multipart



In [None]:
import torch
import torch.nn.functional as F
from PIL import Image
from fastapi import FastAPI, File, UploadFile, HTTPException
import onnxruntime as ort
import numpy as np
import io
import torchvision.transforms as transforms

# Load ONNX model with GPU if available, else use CPU
providers = ["CUDAExecutionProvider"] if "CUDAExecutionProvider" in ort.get_available_providers() else ["CPUExecutionProvider"]
ort_session = ort.InferenceSession("/content/resnet50_dog_cat.onnx", providers=providers)

# Define class labels
CLASS_NAMES = ["Cat", "Dog"]

app = FastAPI()

# Define image preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Standard ImageNet normalization
])

def preprocess_image(image: Image.Image) -> np.ndarray:
    """Preprocess the image to match model input size."""
    image = transform(image).numpy().astype(np.float32)  # Convert to NumPy array
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    return image

@app.post("/predict_cuda/")
async def predict_cuda(file: UploadFile = File(...)):
    try:
        # Read and process the image
        image_bytes = await file.read()
        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")

        input_tensor = preprocess_image(image)

        # Run inference
        ort_inputs = {ort_session.get_inputs()[0].name: input_tensor}
        ort_outs = ort_session.run(None, ort_inputs)

        # Apply softmax to get valid probabilities
        probabilities = F.softmax(torch.tensor(ort_outs[0][0]), dim=0).numpy()

        # Get prediction
        predicted_index = np.argmax(probabilities)  # Get the class index
        predicted_class = CLASS_NAMES[predicted_index]
        confidence_score = round(float(probabilities[predicted_index]), 4)  # Round to 4 decimal places
        # Get confidence score

        return {
            "prediction": predicted_class,
            "confidence": confidence_score  # Confidence score as a float (0 to 1)
        }

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")


In [None]:
!ngrok config add-authtoken 2v3gxXjMShIuTuJJqO976oroL5I_2rtERLHqNan6dtstWMR46

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
from pyngrok import ngrok
import uvicorn
import threading

# Expose FastAPI with ngrok
ngrok_tunnel = ngrok.connect(8000)
print("Public URL:", ngrok_tunnel.public_url)

# Run FastAPI server
def run():
    uvicorn.run(app, host="0.0.0.0", port=8000)

thread = threading.Thread(target=run)
thread.start()


Public URL: https://4766-34-169-130-168.ngrok-free.app


INFO:     Started server process [24906]
INFO:     Waiting for application startup.
