In [1]:
!pip install -U opencv-python tensorflow scikit-learn pandas matplotlib tensorflow_datasets requests

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


# IMPORTATION DES LIBRAIRIES

In [2]:
import pandas as pd
import numpy as np
from PIL import Image
import requests
import zipfile
import os
import time
import psutil
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm

2025-08-14 09:35:22.202382: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
if device.type == 'cuda':
    print(f'GPU Name: {torch.cuda.get_device_name(0)}')

Using device: cuda
GPU Name: NVIDIA RTX 6000 Ada Generation


# Chargement du dataset

In [4]:
def telecharger_dezip(url, chemin_sauv="plant_village_dataset.zip", extract_path="."):
    print(" Début du téléchargement")
    try:
        response=requests.get(url, stream=True)
        response.raise_for_status()

        #Taille totale du fichier pour la barre de progression
        total_size=int(response.headers.get('content-length',0))
        block_size=1064
        bar_progression = tqdm(total=total_size, unit='iB', unit_scale=True)

        #Téléchargement
        with open(chemin_sauv, 'wb') as file:
            for data in response.iter_content(block_size):
                bar_progression.update(len(data))
                file.write(data)
        bar_progression.close()

        if total_size != 0 and bar_progression.n != total_size:
            print("ERREUR, quelque chose s'est mal passé pendant le téléchargement.")
            return

        print(f"Téléchargement terminé. Fichier sauvegardé sous : {chemin_sauv}")

        # Créer le dossier d'extraction s'il n'existe pas
        if not os.path.exists(extract_path):
            os.makedirs(extract_path)

        # Décompresser le fichier ZIP
        print(f"Décompression du fichier dans le dossier : {extract_path}")
        with zipfile.ZipFile(chemin_sauv, 'r') as zip_ref:
            zip_ref.extractall(extract_path)

        print("Décompression terminée.")

        # Optionnel : Supprimer le fichier .zip après extraction pour économiser de l'espace
        print(f"Suppression du fichier {chemin_sauv}...")
        os.remove(chemin_sauv)
        print("Opération terminée avec succès !")

    except requests.exceptions.RequestException as e:
        print(f"Une erreur de réseau est survenue: {e}")
    except zipfile.BadZipFile:
        print("Erreur: Le fichier téléchargé n'est pas un fichier ZIP valide.")
    except Exception as e:
        print(f"Une erreur inattendue est survenue: {e}")

In [5]:
URL = "https://data.mendeley.com/datasets/tywbtsjrjv/1/files/b4e3a32f-c0bd-4060-81e9-6144231f2520/file_downloaded"

In [6]:
extract_folder = "plant_village_dataset"

In [7]:
telecharger_dezip(URL, "PlantVillage.zip", extract_folder)

 Début du téléchargement


100%|██████████| 949M/949M [00:41<00:00, 23.0MiB/s]  


Téléchargement terminé. Fichier sauvegardé sous : PlantVillage.zip
Décompression du fichier dans le dossier : plant_village_dataset
Décompression terminée.
Suppression du fichier PlantVillage.zip...
Opération terminée avec succès !


In [8]:
path="/workspace/plant_village_dataset/Plant_leave_diseases_dataset_with_augmentation"

In [9]:
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

In [10]:
data_gen=ImageDataGenerator(rescale=1./255)

In [11]:
data=data_gen.flow_from_directory(
    path,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

Found 61486 images belonging to 39 classes.


# MODELISATION

## VGG19

In [12]:
# --------- 1. Préparer les données ---------
filepaths = []
labels = []
folds = os.listdir(path)
for fold in folds:
    f_path = os.path.join(path, fold)
    if not os.path.isdir(f_path):
        continue
    for file in os.listdir(f_path):
        filepaths.append(os.path.join(f_path, file))
        labels.append(fold)

df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})
print(f"Total des images trouvées : {len(df)}")

Total des images trouvées : 61486


In [13]:
# Split 80/20 avec stratification
train_df, test_df = train_test_split(
    df,
    test_size=0.1,
    random_state=42,
    stratify=df['labels']
)
train_df, val_df = train_test_split(
    train_df,
    test_size=0.2,
    random_state=42,
    stratify=train_df['labels']
)

In [14]:
# Mapping des classes en indices
class_names = sorted(df['labels'].unique())
class_to_idx = {cls: idx for idx, cls in enumerate(class_names)}
num_classes = len(class_names)

In [15]:
# --------- 2. Dataset personnalisé ---------
class CustomImageDataset(Dataset):
    def __init__(self, df, class_to_idx, transform=None):
        self.df = df.reset_index(drop=True)
        self.class_to_idx = class_to_idx
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.loc[idx, 'filepaths']
        label_name = self.df.loc[idx, 'labels']
        label = self.class_to_idx[label_name]
        
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

In [16]:
# --------- 3. Data augmentation et loaders ---------
train_transforms = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.RandomRotation(30),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.RandomResizedCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [17]:
train_dataset = CustomImageDataset(train_df, class_to_idx, transform=train_transforms)
val_dataset = CustomImageDataset(val_df, class_to_idx, transform=val_transforms)
test_dataset = CustomImageDataset(test_df, class_to_idx, transform=val_transforms)

In [18]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [19]:
# Charger le modèle MobileNetV2 pré-entraîné
model = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.DEFAULT)

In [20]:
for param in model.parameters():
    param.requires_grad = False

In [21]:
# Modifier le classificateur final pour notre nombre de classes
num_ftrs = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_ftrs, num_classes)

In [22]:
model = model.to(device)

In [23]:
optimizer = optim.Adam(model.classifier[1].parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
criterion = nn.CrossEntropyLoss()

In [24]:
def train_model(model, criterion, optimizer, scheduler, num_epochs, train_loader, val_loader, device):
    best_val_accuracy = 0.0
    best_model_path = '/workspace/models/best_mobilenetv2_model.pth'
    start_time = time.time()
    
    for epoch in range(num_epochs):
        epoch_start_time = time.time()
        
        # Phase d'entraînement
        model.train()
        train_loss = 0.0
        train_corrects = 0
        
        for inputs, labels in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * inputs.size(0)
            train_corrects += torch.sum(preds == labels.data)
            
        scheduler.step()
        
        train_loss = train_loss / len(train_loader.dataset)
        train_accuracy = train_corrects.double() / len(train_loader.dataset)
        
        # Phase de validation
        model.eval()
        val_loss = 0.0
        val_corrects = 0
        
        with torch.no_grad():
            for inputs, labels in tqdm(val_loader, desc=f"Validation Epoch {epoch+1}/{num_epochs}"):
                inputs, labels = inputs.to(device), labels.to(device)
                
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item() * inputs.size(0)
                val_corrects += torch.sum(preds == labels.data)
        
        val_loss = val_loss / len(val_loader.dataset)
        val_accuracy = val_corrects.double() / len(val_loader.dataset)
        
        # Affichage des métriques de performance
        print(f'Epoch {epoch+1}/{num_epochs} - '
              f'Temps: {time.time()-epoch_start_time:.2f}s | '
              f'Train Loss: {train_loss:.4f} Acc: {train_accuracy:.4f} | '
              f'Val Loss: {val_loss:.4f} Acc: {val_accuracy:.4f}')

        # Sauvegarde du meilleur modèle
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), best_model_path)
            print(f'Meilleur modèle sauvegardé avec une précision de validation de {best_val_accuracy:.4f}')
            
    total_time = time.time() - start_time
    print(f'Formation terminée en {total_time:.2f} secondes.')
    print(f'Meilleure précision de validation : {best_val_accuracy:.4f}')
    
    return model

In [25]:
NUM_EPOCHS = 20
trained_model = train_model(model, criterion, optimizer, scheduler, NUM_EPOCHS, train_loader, val_loader, device)

Training Epoch 1/20: 100%|██████████| 1384/1384 [01:10<00:00, 19.72it/s]
Validation Epoch 1/20: 100%|██████████| 346/346 [00:14<00:00, 23.62it/s]


Epoch 1/20 - Temps: 84.86s | Train Loss: 1.2087 Acc: 0.7027 | Val Loss: 0.6486 Acc: 0.8505
Meilleur modèle sauvegardé avec une précision de validation de 0.8505


Training Epoch 2/20: 100%|██████████| 1384/1384 [01:09<00:00, 20.05it/s]
Validation Epoch 2/20: 100%|██████████| 346/346 [00:13<00:00, 24.97it/s]


Epoch 2/20 - Temps: 82.91s | Train Loss: 0.7090 Acc: 0.8027 | Val Loss: 0.5290 Acc: 0.8641
Meilleur modèle sauvegardé avec une précision de validation de 0.8641


Training Epoch 3/20: 100%|██████████| 1384/1384 [01:11<00:00, 19.24it/s]
Validation Epoch 3/20: 100%|██████████| 346/346 [00:14<00:00, 24.23it/s]


Epoch 3/20 - Temps: 86.22s | Train Loss: 0.6267 Acc: 0.8178 | Val Loss: 0.4558 Acc: 0.8844
Meilleur modèle sauvegardé avec une précision de validation de 0.8844


Training Epoch 4/20: 100%|██████████| 1384/1384 [01:10<00:00, 19.75it/s]
Validation Epoch 4/20: 100%|██████████| 346/346 [00:13<00:00, 24.77it/s]


Epoch 4/20 - Temps: 84.06s | Train Loss: 0.5931 Acc: 0.8241 | Val Loss: 0.4056 Acc: 0.8944
Meilleur modèle sauvegardé avec une précision de validation de 0.8944


Training Epoch 5/20: 100%|██████████| 1384/1384 [01:11<00:00, 19.25it/s]
Validation Epoch 5/20: 100%|██████████| 346/346 [00:15<00:00, 22.82it/s]


Epoch 5/20 - Temps: 87.07s | Train Loss: 0.5817 Acc: 0.8233 | Val Loss: 0.4138 Acc: 0.8904


Training Epoch 6/20: 100%|██████████| 1384/1384 [01:15<00:00, 18.38it/s]
Validation Epoch 6/20: 100%|██████████| 346/346 [00:17<00:00, 20.24it/s]


Epoch 6/20 - Temps: 92.40s | Train Loss: 0.5678 Acc: 0.8309 | Val Loss: 0.3976 Acc: 0.8950
Meilleur modèle sauvegardé avec une précision de validation de 0.8950


Training Epoch 7/20: 100%|██████████| 1384/1384 [01:34<00:00, 14.69it/s]
Validation Epoch 7/20: 100%|██████████| 346/346 [00:21<00:00, 16.45it/s]


Epoch 7/20 - Temps: 115.23s | Train Loss: 0.5587 Acc: 0.8327 | Val Loss: 0.3535 Acc: 0.8999
Meilleur modèle sauvegardé avec une précision de validation de 0.8999


Training Epoch 8/20: 100%|██████████| 1384/1384 [01:18<00:00, 17.72it/s]
Validation Epoch 8/20: 100%|██████████| 346/346 [00:15<00:00, 22.78it/s]


Epoch 8/20 - Temps: 93.30s | Train Loss: 0.5317 Acc: 0.8403 | Val Loss: 0.3337 Acc: 0.9078
Meilleur modèle sauvegardé avec une précision de validation de 0.9078


Training Epoch 9/20: 100%|██████████| 1384/1384 [01:11<00:00, 19.48it/s]
Validation Epoch 9/20: 100%|██████████| 346/346 [00:14<00:00, 24.67it/s]


Epoch 9/20 - Temps: 85.11s | Train Loss: 0.5210 Acc: 0.8417 | Val Loss: 0.3200 Acc: 0.9127
Meilleur modèle sauvegardé avec une précision de validation de 0.9127


Training Epoch 10/20: 100%|██████████| 1384/1384 [01:10<00:00, 19.70it/s]
Validation Epoch 10/20: 100%|██████████| 346/346 [00:14<00:00, 23.58it/s]


Epoch 10/20 - Temps: 84.95s | Train Loss: 0.5255 Acc: 0.8413 | Val Loss: 0.3263 Acc: 0.9126


Training Epoch 11/20: 100%|██████████| 1384/1384 [01:08<00:00, 20.07it/s]
Validation Epoch 11/20: 100%|██████████| 346/346 [00:13<00:00, 24.73it/s]


Epoch 11/20 - Temps: 82.96s | Train Loss: 0.5190 Acc: 0.8420 | Val Loss: 0.3653 Acc: 0.9026


Training Epoch 12/20: 100%|██████████| 1384/1384 [01:06<00:00, 20.92it/s]
Validation Epoch 12/20: 100%|██████████| 346/346 [00:13<00:00, 25.52it/s]


Epoch 12/20 - Temps: 79.74s | Train Loss: 0.5106 Acc: 0.8455 | Val Loss: 0.3461 Acc: 0.9050


Training Epoch 13/20: 100%|██████████| 1384/1384 [01:08<00:00, 20.06it/s]
Validation Epoch 13/20: 100%|██████████| 346/346 [00:14<00:00, 23.93it/s]


Epoch 13/20 - Temps: 83.46s | Train Loss: 0.5196 Acc: 0.8428 | Val Loss: 0.3099 Acc: 0.9164
Meilleur modèle sauvegardé avec une précision de validation de 0.9164


Training Epoch 14/20: 100%|██████████| 1384/1384 [01:12<00:00, 19.15it/s]
Validation Epoch 14/20: 100%|██████████| 346/346 [00:14<00:00, 23.24it/s]


Epoch 14/20 - Temps: 87.17s | Train Loss: 0.5125 Acc: 0.8450 | Val Loss: 0.3388 Acc: 0.9087


Training Epoch 15/20: 100%|██████████| 1384/1384 [01:12<00:00, 19.03it/s]
Validation Epoch 15/20: 100%|██████████| 346/346 [00:14<00:00, 23.72it/s]


Epoch 15/20 - Temps: 87.34s | Train Loss: 0.5196 Acc: 0.8432 | Val Loss: 0.3229 Acc: 0.9119


Training Epoch 16/20: 100%|██████████| 1384/1384 [01:10<00:00, 19.59it/s]
Validation Epoch 16/20: 100%|██████████| 346/346 [00:12<00:00, 27.15it/s]


Epoch 16/20 - Temps: 83.40s | Train Loss: 0.5142 Acc: 0.8475 | Val Loss: 0.3277 Acc: 0.9134


Training Epoch 17/20: 100%|██████████| 1384/1384 [01:08<00:00, 20.18it/s]
Validation Epoch 17/20: 100%|██████████| 346/346 [00:14<00:00, 23.50it/s]


Epoch 17/20 - Temps: 83.31s | Train Loss: 0.5179 Acc: 0.8430 | Val Loss: 0.3406 Acc: 0.9074


Training Epoch 18/20: 100%|██████████| 1384/1384 [01:09<00:00, 19.98it/s]
Validation Epoch 18/20: 100%|██████████| 346/346 [00:13<00:00, 25.25it/s]


Epoch 18/20 - Temps: 82.97s | Train Loss: 0.5207 Acc: 0.8408 | Val Loss: 0.3420 Acc: 0.9096


Training Epoch 19/20: 100%|██████████| 1384/1384 [01:09<00:00, 19.98it/s]
Validation Epoch 19/20: 100%|██████████| 346/346 [00:14<00:00, 23.39it/s]


Epoch 19/20 - Temps: 84.08s | Train Loss: 0.5091 Acc: 0.8446 | Val Loss: 0.3559 Acc: 0.9046


Training Epoch 20/20: 100%|██████████| 1384/1384 [01:07<00:00, 20.49it/s]
Validation Epoch 20/20: 100%|██████████| 346/346 [00:13<00:00, 25.79it/s]

Epoch 20/20 - Temps: 80.97s | Train Loss: 0.5119 Acc: 0.8452 | Val Loss: 0.3036 Acc: 0.9186
Meilleur modèle sauvegardé avec une précision de validation de 0.9186
Formation terminée en 1732.25 secondes.
Meilleure précision de validation : 0.9186





### Evaluation

In [26]:
trained_model.load_state_dict(torch.load('/workspace/models/best_mobilenetv2_model.pth'))
trained_model.eval()

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [27]:
all_preds = []
all_labels = []

In [28]:
start_time = time.time()
with torch.no_grad():
    for i, (inputs, labels) in enumerate(test_loader):
        batch_start = time.time()
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = trained_model(inputs)
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

        # --- Profiling ---
        cpu_usage = psutil.cpu_percent(interval=None)
        ram = psutil.virtual_memory()
        if torch.cuda.is_available():
            gpu_mem = torch.cuda.memory_allocated() / 1024**2
        else:
            gpu_mem = 0.0
        print(f"[Batch {i+1}] Time: {time.time()-batch_start:.2f}s | CPU: {cpu_usage:.1f}% | RAM: {ram.used/1024**3:.2f}GB | GPU: {gpu_mem:.2f}MB")

end_time = time.time()

[Batch 1] Time: 0.02s | CPU: 8.5% | RAM: 130.66GB | GPU: 44.09MB
[Batch 2] Time: 0.01s | CPU: 37.7% | RAM: 130.68GB | GPU: 44.09MB
[Batch 3] Time: 0.01s | CPU: 11.0% | RAM: 130.70GB | GPU: 44.09MB
[Batch 4] Time: 0.01s | CPU: 7.9% | RAM: 130.69GB | GPU: 44.09MB
[Batch 5] Time: 0.01s | CPU: 19.1% | RAM: 130.69GB | GPU: 44.09MB
[Batch 6] Time: 0.01s | CPU: 15.0% | RAM: 130.70GB | GPU: 44.09MB
[Batch 7] Time: 0.01s | CPU: 7.1% | RAM: 130.70GB | GPU: 44.09MB
[Batch 8] Time: 0.01s | CPU: 6.3% | RAM: 130.70GB | GPU: 44.09MB
[Batch 9] Time: 0.02s | CPU: 17.9% | RAM: 130.70GB | GPU: 44.09MB
[Batch 10] Time: 0.01s | CPU: 7.9% | RAM: 130.71GB | GPU: 44.09MB
[Batch 11] Time: 0.01s | CPU: 8.0% | RAM: 130.69GB | GPU: 44.09MB
[Batch 12] Time: 0.01s | CPU: 20.5% | RAM: 130.70GB | GPU: 44.09MB
[Batch 13] Time: 0.01s | CPU: 14.1% | RAM: 130.71GB | GPU: 44.09MB
[Batch 14] Time: 0.01s | CPU: 19.3% | RAM: 130.71GB | GPU: 44.09MB
[Batch 15] Time: 0.01s | CPU: 24.6% | RAM: 130.72GB | GPU: 44.09MB
[Batch 16]

In [29]:
total_time = end_time - start_time
print(f"\nTemps Test Total: {total_time:.2f} sec")
print(f"Throughput: {len(test_dataset) / total_time:.2f} images/sec")


Temps Test Total: 8.44 sec
Throughput: 728.68 images/sec


In [31]:
# Rapport complet
print("=== Rapport complet d'évaluation sur l'ensemble de test (VGG11) ===")
print(classification_report(all_labels, all_preds, target_names=class_names))

=== Rapport complet d'évaluation sur l'ensemble de test (VGG11) ===
                                               precision    recall  f1-score   support

                           Apple___Apple_scab       0.96      0.81      0.88       100
                            Apple___Black_rot       0.99      0.93      0.96       100
                     Apple___Cedar_apple_rust       0.97      0.96      0.96       100
                              Apple___healthy       0.89      0.91      0.90       164
                    Background_without_leaves       0.96      0.94      0.95       114
                          Blueberry___healthy       0.93      0.98      0.95       150
                      Cherry___Powdery_mildew       0.97      0.90      0.94       105
                             Cherry___healthy       0.96      0.98      0.97       100
   Corn___Cercospora_leaf_spot Gray_leaf_spot       0.89      0.88      0.88       100
                           Corn___Common_rust       0.97     