In [1]:
!pip install wandb
!pip install torch
!pip install --upgrade pip setuptools
!pip install --upgrade typing_extensions pydantic pydantic-core





In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm  # Progress bar
import wandb  # Weights and Biases
import os
from torch.utils.data import random_split
# --- Configuration des hyperparamètres ---
config = {
    "batch_size": 32,
    "learning_rate": 0.001,
    "epochs": 5,
    "model_type": "SimpleNN",
    "dataset": "sample_10000",
    "project": "PyTorch_WandB_Tqdm",
    "entity": "mickaelassaraf",  # Remplacez par votre nom d'utilisateur ou équipe W&B
    "wandb_mode": "online"  # "online", "offline", ou "disabled"
}

# --- Initialisation de W&B ---
wandb.init(
    project=config["project"],
    entity=config["entity"],
    config=config,
    name="SimpleNN_Training",
    mode=config["wandb_mode"]
)

# --- Modèle simple ---
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 256, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )
    
    def forward(self, x):
        return self.model(x)

# --- Préparation des données ---


# --- Préparation des données ---
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Forcer 1 canal
    transforms.Resize((256, 256)),  # Optionnel : redimensionner les images à 256x256
    transforms.ToTensor(),  # Convertir en tenseur
])

# Charger toutes les données depuis votre dossier "sampled_10000"
full_dataset = datasets.ImageFolder(root="data/sampled_10000", transform=transform)

# Calculer la taille de l'ensemble d'entraînement (par exemple, 80% pour l'entraînement, 20% pour les tests)
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size

# Séparer les données en deux parties : entraînement et test
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

# Créer les DataLoader pour les ensembles d'entraînement et de test
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=config["batch_size"], shuffle=False)


# --- Fonction d'entraînement ---
def train_model():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = SimpleNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])
    
    for epoch in range(config["epochs"]):
        model.train()
        running_loss = 0.0
        
        # Utilisation de tqdm pour la barre de progression
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{config['epochs']}", leave=False)
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Backward pass et optimisation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            progress_bar.set_postfix(loss=loss.item())
        
        # Calcul de la perte moyenne pour l'époque
        avg_train_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch + 1}/{config['epochs']}], Loss: {avg_train_loss:.4f}")
        
        # Enregistrement automatique des métriques dans W&B
        wandb.log({"train_loss": avg_train_loss, "epoch": epoch + 1})
    
    # Évaluation sur les données de test
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        progress_bar = tqdm(test_loader, desc="Testing", leave=False)
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_accuracy = 100 * correct / total
    print(f"Test Accuracy: {test_accuracy:.2f}%")
    
    # Enregistrement automatique des métriques de test dans W&B
    wandb.log({"test_accuracy": test_accuracy})
    
    # Sauvegarde du modèle localement et dans W&B
    model_path = "models/simple_nn.pth"
    os.makedirs("models", exist_ok=True)
    torch.save(model.state_dict(), model_path)
    wandb.save(model_path)  # Enregistrement du modèle comme artefact dans W&B

    print("Training complete, metrics and model logged in Weights & Biases.")

# --- Lancement de l'entraînement ---
if __name__ == "__main__":
    train_model()


                                                                        

Epoch [1/5], Loss: 3.4767


                                                                        

Epoch [2/5], Loss: 1.3010


                                                                        

Epoch [3/5], Loss: 0.8442


                                                                        

Epoch [4/5], Loss: 0.6710


                                                                        

Epoch [5/5], Loss: 0.6688


                                                        

Test Accuracy: 72.61%
Training complete, metrics and model logged in Weights & Biases.




In [14]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Dossier contenant toutes les images
source_folder = "data/sampled_10000/Negative"  # Changez en fonction de votre dossier

# Dossiers de sortie pour les ensembles train/test
train_dir = "data/sampled_grouped/train_data/Negative"
test_dir = "data/sampled_grouped/test_data/Negative"

# Créer les dossiers si nécessaire
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Dictionnaire pour stocker les images par source
source_dict = {}

# Lire les fichiers dans le dossier source
for filename in os.listdir(source_folder):
    print(filename)
    
    if filename.endswith(".jpeg") or filename.endswith(".png"):  # Changez en fonction de vos extensions d'images
        # Extraire la source du nom du fichier (avant le dernier "_")
        source = filename.rsplit('_', 1)[0]  # Divise et garde tout avant le dernier "_"
        if source not in source_dict:
            source_dict[source] = []
        
        source_dict[source].append(filename)

# Séparer les sources en ensembles d'entraînement et de test
train_sources, test_sources = train_test_split(list(source_dict.keys()), test_size=0.2, random_state=42)

# Copier les fichiers vers les dossiers train/test en fonction de la source
for source in train_sources:
    source_folder_path = os.path.join(source_folder, source)
    train_folder_path = os.path.join(train_dir, source)
    os.makedirs(train_folder_path, exist_ok=True)
    
    for filename in source_dict[source]:
        shutil.copy(os.path.join(source_folder, filename), os.path.join(train_folder_path, filename))

for source in test_sources:
    source_folder_path = os.path.join(source_folder, source)
    test_folder_path = os.path.join(test_dir, source)
    os.makedirs(test_folder_path, exist_ok=True)
    
    for filename in source_dict[source]:
        shutil.copy(os.path.join(source_folder, filename), os.path.join(test_folder_path, filename))

print(f"Train data: {len(train_sources)} sources, Test data: {len(test_sources)} sources.")


Lm_462218_20x_14_03_2019_b0s5c0x22221-2776y96584-2080m44_15361792x0256.jpeg
Experiment-68_b0s0c0x9994-2776y9360-2080m44_512768x10241280.jpeg
Lm_462218_20x_14_03_2019_b0s1c0x25522-2776y24054-2080m91_17922048x512768.jpeg
Experiment-101_b0s1c0x61977-2776y22947-2080m74_0256x17922048.jpeg
Experiment-88_b0s2c0x28229-2776y63383-2080m127_17922048x0256.jpeg
Experiment-102_b0s4c0x74932-2776y23071-2080m7_12801536x23042560.jpeg
Experiment-91_b0s0c0x77980-2776y11232-2080m67_17922048x256512.jpeg
Experiment-97_b0s4c0x105150-2776y9480-2080m20_512768x512768.jpeg
Experiment-68_b0s3c0x79853-2776y52695-2080m80_256512x23042560.jpeg
Experiment-102_b0s0c0x99049-2776y7487-2080m33_12801536x256512.jpeg
Experiment-93_b0s4c0x68264-2776y48645-2080m21_7681024x20482304.jpeg
Snap-151_b0s0c0x78430-2776y0-2080m3_7681024x20482304.jpeg
Experiment-100_b0s1c0x67532-2776y35224-2080m121_10241280x0256.jpeg
Experiment-68_b0s1c0x108869-2776y16949-2080m80_12801536x0256.jpeg
Experiment-91_b0s3c0x58007-2776y65610-2080m179_15361792

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm  # Progress bar
import wandb  # Weights and Biases
import os
from torch.utils.data import random_split
# --- Configuration des hyperparamètres ---
config = {
    "batch_size": 32,
    "learning_rate": 0.001,
    "epochs": 5,
    "model_type": "SimpleNN",
    "dataset": "sample_10000",
    "project": "PyTorch_WandB_Tqdm",
    "entity": "mickaelassaraf",  # Remplacez par votre nom d'utilisateur ou équipe W&B
    "wandb_mode": "online"  # "online", "offline", ou "disabled"
}

# --- Initialisation de W&B ---
wandb.init(
    project=config["project"],
    entity=config["entity"],
    config=config,
    name="SimpleNN_Training",
    mode=config["wandb_mode"]
)

# --- Modèle simple ---
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 256, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )
    
    def forward(self, x):
        return self.model(x)

# --- Préparation des données ---


# --- Préparation des données ---
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Forcer 1 canal
    transforms.Resize((256, 256)),  # Optionnel : redimensionner les images à 256x256
    transforms.ToTensor(),  # Convertir en tenseur
])

# Charger toutes les données depuis votre dossier "sampled_10000"
train_dataset = datasets.ImageFolder(root="data/sampled_grouped/train_data", transform=transform)
train_dataset = datasets.ImageFolder(root="data/sampled_grouped/test_data", transform=transform)



# Créer les DataLoader pour les ensembles d'entraînement et de test
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=config["batch_size"], shuffle=False)


# --- Fonction d'entraînement ---
def train_model():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = SimpleNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])
    
    for epoch in range(config["epochs"]):
        model.train()
        running_loss = 0.0
        
        # Utilisation de tqdm pour la barre de progression
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{config['epochs']}", leave=False)
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Backward pass et optimisation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            progress_bar.set_postfix(loss=loss.item())
        
        # Calcul de la perte moyenne pour l'époque
        avg_train_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch + 1}/{config['epochs']}], Loss: {avg_train_loss:.4f}")
        
        # Enregistrement automatique des métriques dans W&B
        wandb.log({"train_loss": avg_train_loss, "epoch": epoch + 1})
    
    # Évaluation sur les données de test
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        progress_bar = tqdm(test_loader, desc="Testing", leave=False)
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_accuracy = 100 * correct / total
    print(f"Test Accuracy: {test_accuracy:.2f}%")
    
    # Enregistrement automatique des métriques de test dans W&B
    wandb.log({"test_accuracy": test_accuracy})
    
    # Sauvegarde du modèle localement et dans W&B
    model_path = "models/simple_nn.pth"
    os.makedirs("models", exist_ok=True)
    torch.save(model.state_dict(), model_path)
    wandb.save(model_path)  # Enregistrement du modèle comme artefact dans W&B

    print("Training complete, metrics and model logged in Weights & Biases.")

# --- Lancement de l'entraînement ---
if __name__ == "__main__":
    train_model()


                                                                      

Epoch [1/5], Loss: 10.3118


                                                                      

Epoch [2/5], Loss: 5.9736


                                                                      

Epoch [3/5], Loss: 4.7880


                                                                      

Epoch [4/5], Loss: 1.9846


                                                                      

Epoch [5/5], Loss: 0.9583


                                                        

Test Accuracy: 71.26%
Training complete, metrics and model logged in Weights & Biases.


