In [5]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torchvision.models as models
import torch.optim as optim

In [6]:
# Load MobileNetV2 model
mobilenet = models.mobilenet_v2(pretrained=True)



In [8]:
# Define transforms for data preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to fit MobileNetV2 input size
    transforms.ToTensor(),           # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize images
])

In [None]:
# Load custom room classification dataset
train_dataset = ImageFolder(root='train_data/', transform=transform)
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [None]:
test_dataset = ImageFolder(root='test_data/', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [7]:
# Load MobileNetV2 model
mobilenet = models.mobilenet_v2(pretrained=True)

# Freeze pretrained weights
for param in mobilenet.parameters():
    param.requires_grad = False

# Remove classification layer
num_features = mobilenet.classifier[1].in_features
mobilenet.classifier = nn.Sequential(
    nn.Dropout(0.2),  # Add dropout layer for regularization
    nn.Linear(num_features, 128),  # Add custom fully connected layer
    nn.ReLU(),  # Add ReLU activation function
    nn.Linear(128, len(train_dataset.classes))  # Add custom output layer for room classification
)

# Optionally, unfreeze the last few layers for fine-tuning
for param in mobilenet.classifier.parameters():
    param.requires_grad = True


MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [None]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mobilenet.parameters(), lr=0.001)

In [None]:
# Train the model
mobilenet.train()
for epoch in range(5):  # Example: Train for 5 epochs
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = mobilenet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 99:  # Print average loss every 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Finished Training')


In [None]:
# Evaluate the model on the test dataset
mobilenet.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = mobilenet(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on test images: %d %%' % (100 * correct / total))

In [None]:
# Save entire model with custom name
torch.save(mobilenet, 'robot_eyes.pth')


LOADING AND RUNNING

In [None]:

#load Model
import torch
from torchvision import transforms
from PIL import Image

model = torch.load('robot_eyes.pth')

In [None]:
# Load the image
image_path = "path/to/your/image.jpg"
image = Image.open(image_path)

In [None]:
# Preprocess the image
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to match model's input size
    transforms.ToTensor(),           # Convert to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])
input_tensor = transform(image).unsqueeze(0)  # Add batch dimension


In [None]:
# Set model to evaluation mode
model.eval()

# Forward pass
with torch.no_grad():
    output = model(input_tensor)

# Get predicted class index
_, predicted = torch.max(output, 1)
predicted_class_index = predicted.item()

# Print predicted class index
print("Predicted class index:", predicted_class_index)