2. Load and Split Dataset Automatically

In [9]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split

# Define transformations (resize, normalize, convert to tensor)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet requires 224x224 images
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Standard normalization
])

# Load dataset from a single folder
dataset = datasets.ImageFolder(root="W:\gasSensor_ws\others\data for resnet\\nav_dataset", transform=transform)

# Split dataset into training (80%) and validation (20%)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

# Print dataset info
print(f"Total Images: {len(dataset)}")
print(f"Training Images: {len(train_dataset)}")
print(f"Validation Images: {len(val_dataset)}")
print("Classes:", dataset.classes)


Total Images: 1114
Training Images: 891
Validation Images: 223
Classes: ['empty_floor', 'object_on_floor']


3. Load Pretrained ResNet-18 and Modify It for Binary Classification

In [10]:
import torchvision.models as models
import torch.nn as nn

# Select device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load ResNet-18 (pretrained on ImageNet)
model = models.resnet18(pretrained=True)

# Modify the last fully connected layer for binary classification
num_classes = 2  # Empty Floor vs Objects on Floor
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Move model to device
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Binary classification with two classes
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


4. Train the Model

In [11]:
from tqdm import tqdm

num_epochs = 10  # Change based on dataset size

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0

    loop = tqdm(train_loader, leave=True)
    for images, labels in loop:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        loop.set_description(f"Epoch [{epoch+1}/{num_epochs}]")
        loop.set_postfix(loss=loss.item())

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")


Epoch [1/10]: 100%|██████████| 28/28 [00:58<00:00,  2.09s/it, loss=0.143]  


Epoch [1/10], Loss: 0.1906


Epoch [2/10]: 100%|██████████| 28/28 [00:58<00:00,  2.09s/it, loss=0.0231]


Epoch [2/10], Loss: 0.0883


Epoch [3/10]: 100%|██████████| 28/28 [01:01<00:00,  2.20s/it, loss=0.0123] 


Epoch [3/10], Loss: 0.0499


Epoch [4/10]: 100%|██████████| 28/28 [01:01<00:00,  2.18s/it, loss=0.265]  


Epoch [4/10], Loss: 0.0581


Epoch [5/10]: 100%|██████████| 28/28 [01:03<00:00,  2.28s/it, loss=0.00534]


Epoch [5/10], Loss: 0.0710


Epoch [6/10]: 100%|██████████| 28/28 [01:03<00:00,  2.27s/it, loss=0.0215] 


Epoch [6/10], Loss: 0.0539


Epoch [7/10]: 100%|██████████| 28/28 [01:03<00:00,  2.28s/it, loss=0.156]  


Epoch [7/10], Loss: 0.0245


Epoch [8/10]: 100%|██████████| 28/28 [01:05<00:00,  2.32s/it, loss=0.0734]  


Epoch [8/10], Loss: 0.0110


Epoch [9/10]: 100%|██████████| 28/28 [01:06<00:00,  2.39s/it, loss=0.00671]


Epoch [9/10], Loss: 0.0368


Epoch [10/10]: 100%|██████████| 28/28 [01:05<00:00,  2.34s/it, loss=0.00254]

Epoch [10/10], Loss: 0.0129





5. Validate the Model

In [12]:
def evaluate_model(model, val_loader):
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # No need to calculate gradients
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    accuracy = 100 * correct / total
    print(f"Validation Accuracy: {accuracy:.2f}%")

evaluate_model(model, val_loader)


Validation Accuracy: 99.10%


6. Save and Load the Model

In [13]:
torch.save(model.state_dict(), "resnet18_floor_detection.pth")


In [14]:
model.load_state_dict(torch.load("resnet18_floor_detection.pth"))
model.eval()  # Set to evaluation mode


  model.load_state_dict(torch.load("resnet18_floor_detection.pth"))


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

7. Make Predictions on a New Image

In [27]:
from PIL import Image

def predict_image(image_path, model, transform):
    model.eval()  # Set to evaluation mode
    image = Image.open(image_path)
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)

    class_name = dataset.classes[predicted.item()]
    print(f"Predicted class: {class_name}")

predict_image("W:\gasSensor_ws\GasSensor_ws\src\BT\exp_images_3_3_25\\realsense_195229.png", model, transform)


Predicted class: empty_floor
