In [None]:
!pip3 install torch torchvision torchaudio onnx onnxruntime pillow fastapi uvicorn python-multipart

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets
from torch.utils.data import Dataset, DataLoader
import torchvision.utils as TUtils
import torchvision.transforms as T
import torchvision.models as models
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import glob
import pathlib
import onnxruntime as ort
import io
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
import nest_asyncio
import uvicorn

In [None]:
transform = T.Compose([
    T.Resize((32, 32)),  # Resize images to fit the network
    T.ToTensor(),
    T.Normalize((0.5,), (0.5,))
])

batch_size = 16
ataset = datasets.ImageFolder(root='data', transform=transform)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
trainset = datasets.MNIST(root='./data/test', train=True,
                           download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=batch_size,
                        shuffle=True, num_workers=0)

testset = datasets.MNIST(root='./data', train=False,
                          download=True, transform=transform)
testloader = DataLoader(testset, batch_size=batch_size,
                       shuffle=False, num_workers=0)

# Redefine classes as locations instead of objects
location_classes = (
    'New York', 'Paris', 'Tokyo', 'London',
    'Sydney', 'Dubai', 'Rio', 'Cape Town', 'Venice', 'Hong Kong'
)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(locationNet.parameters(), lr=0.001, momentum=0.9)

In [None]:
test_losses = []
test_accuracies = []

# Training function
def train_model(model, trainloader, criterion, optimizer, num_epochs=5):
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        batch_losses = []
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            # Calculate batch loss and accuracy
            running_loss += loss.item()
            batch_losses.append(loss.item())
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            if i % 2000 == 1999:  # print every 2000 mini-batches
                print(f'[{epoch + 1}, {i + 1}] loss: {running_loss / 2000:.3f}')
                running_loss = 0.0
                
            # Calculate epoch metrics
        model_loss = sum(batch_losses) / len(batch_losses)
        model_accuracy = 100 * correct / total
        
        test_losses.append(model_loss)
        test_accuracies.append(model_accuracy)

        print(f'Epoch {epoch+1} - Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')
    
    print('Finished Training')

        # Plot training metrics
    plt.figure(figsize=(12, 5))
    
    # Plot loss
    plt.subplot(1, 2, 1)
    plt.plot(range(1, num_epochs+1), epoch_losses, 'b-', label='Training Loss')
    plt.title('Training Loss per Epoch')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.grid(True)
    plt.legend()
    
    # Plot accuracy
    plt.subplot(1, 2, 2)
    plt.plot(range(1, num_epochs+1), epoch_accuracies, 'r-', label='Training Accuracy')
    plt.title('Training Accuracy per Epoch')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.grid(True)
    plt.legend()
    
    plt.tight_layout()
    plt.savefig('location_training_metrics.png')
    plt.show()
    
    return epoch_losses, epoch_accuracies

In [None]:
#Train the model

In [None]:
def test_model(model, testloader, classes):
    # Prepare to count predictions for each class
    correct_pred = {classname: 0 for classname in classes}
    total_pred = {classname: 0 for classname in classes}
    
    all_labels = []
    all_predictions = []
    
        # No gradient calculation needed
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = model(images)
            _, predictions = torch.max(outputs, 1)
            
            # Store all labels and predictions for confusion matrix
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predictions.cpu().numpy())
            
            # Collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[classes[label]] += 1
                total_pred[classes[label]] += 1
    
    # Print accuracy for each location
    accuracies = []
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        accuracies.append(accuracy)
        print(f'Accuracy for location {classname}: {accuracy:.1f}%')
    
    # Calculate overall accuracy
    total_correct = sum(correct_pred.values())
    total = sum(total_pred.values())
    overall_accuracy = 100 * total_correct / total
    print(f'Overall location detection accuracy: {overall_accuracy:.1f}%')
    
        # Plot class accuracies
    plt.figure(figsize=(12, 6))
    bars = plt.bar(classes, accuracies, color='skyblue')
    plt.title('Location Detection Accuracy by City')
    plt.xlabel('Location')
    plt.ylabel('Accuracy (%)')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    
    # Add accuracy values on top of bars
    for bar, accuracy in zip(bars, accuracies):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1, 
                f'{accuracy:.1f}%', ha='center')

In [None]:
# Test the model
print("Testing location detection accuracy...")
test_model(locationNet, testloader, location_classes)

# Get some test images
dataiter = iter(testloader)
images, labels = next(dataiter)

# Print images with location labels
print('Actual locations: ', ' '.join(f'{location_classes[labels[j]]}' for j in range(4)))
imshow(TUtils.make_grid(images))

# Print predicted locations
outputs = locationNet(images)
_, predicted = torch.max(outputs, 1)
print('Predicted locations: ', ' '.join(f'{location_classes[predicted[j]]}' for j in range(4)))

# Calculate confusion matrix for location detection
print("\nDetailed Location Detection Performance:")
confusion_matrix = torch.zeros(10, 10, dtype=torch.int)
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = locationNet(images)
        _, predictions = torch.max(outputs, 1)
        for label, prediction in zip(labels, predictions):
            confusion_matrix[label][prediction] += 1

print("Top misclassifications:")
for i in range(10):
    for j in range(10):
        if i != j and confusion_matrix[i][j] > 10:
            print(f"{location_classes[i]} mistaken as {location_classes[j]}: {confusion_matrix[i][j]} times")

In [None]:
model = MonResNet(num_classes=5)
model.load_state_dict(torch.load("model/location_recognition.pth"))

# Exemple d'entrée factice avec les bonnes dimensions [batch_size, channels, height, width]
dummy_input = torch.randn(1, 3, 224, 224)

# Export du modèle vers ONNX
torch.onnx.export(
    model,
    dummy_input,
    "location_recognition.onnx",
    input_names=["input"],
    output_names=["output"],
    opset_version=11
)

print("✅ Modèle exporté avec succès en location_recognition.onnx")

In [None]:
session = ort.InferenceSession("location_recognition.onnx")
input_name = session.get_inputs()[0].name

def preprocess(image: Image.Image) -> np.ndarray:
    image = image.resize((224, 224))
    image = np.array(image).astype(np.float32) / 255.0
    if image.ndim == 2:
        image = np.stack([image] * 3, axis=-1)
    image = image.transpose(2, 0, 1)  # [C, H, W]

    # Normalisation comme pour ResNet
    mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
    std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
    image = (image - mean) / std

    image = np.expand_dims(image, axis=0)  # [1, C, H, W]
    return image

app = FastAPI()

@app.post("/predict")
async def predict(file: UploadFile = File(...)):
    content = await file.read()
    image = Image.open(io.BytesIO(content)).convert("RGB")
    input_tensor = preprocess(image)
    
    outputs = session.run(None, {input_name: input_tensor})
    prediction = np.argmax(outputs[0])  # À adapter selon la sortie du modèle

    class_names = ["campagne", "foret", "montagne", "plage", "ville"]
    predicted_label = class_names[prediction] if prediction < len(class_names) else "Inconnu"

    return JSONResponse(content={"prediction": predicted_label})

nest_asyncio.apply()
uvicorn.run(app, host="0.0.0.0", port=8000)

#Test du modèle côté back
image = Image.open("miami.PNG").convert("RGB")
input_tensor = preprocess(image)
output = session.run(None, {input_name: input_tensor})
print("Prédiction :", class_names[np.argmax(output[0])])

