In [1]:
import os
import torch
import torchvision
from torch.utils.data import random_split
import torch.nn as nn
import torch.nn.functional as F

random_seed = 123
torch.manual_seed(random_seed)

<torch._C.Generator at 0x1e4239fda90>

In [2]:
data_dir  = '../data/Images'

classes = os.listdir(data_dir)
print(classes)
print(f"length: {len(classes)}")

['artstudio', 'bathroom', 'bedroom', 'children_room', 'closet', 'computerroom', 'dining_room', 'gameroom', 'kitchen', 'livingroom', 'locker_room', 'meeting_room']
length: 12


In [3]:
from matplotlib import pyplot as plt
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
from torch.utils.data.dataloader import DataLoader
import numpy as np
from torch.utils.data.dataloader import DataLoader
from torchvision.models import ResNet152_Weights, EfficientNet_B0_Weights, Inception_V3_Weights


def dataset_setup(model_name='resnet18'):
    if model_name == 'resnet18':
        transformations = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(degrees=10),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
            transforms.ToTensor()])
    elif model_name == 'efficientnet_b0' or model_name == 'vit_b_16' or model_name == 'resnet152':
        transformations = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(degrees=10),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
            transforms.ToTensor()])
    elif model_name == 'inception_v3':
        transformations = transforms.Compose([
            transforms.Resize((299, 299)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(degrees=10),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
            transforms.ToTensor()])
    return transformations

def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    acc = torch.sum(preds == labels).item() / len(preds)
    return acc

class ConvClassifier(nn.Module):
    def __init__(self, model_name='resnet18', dataset=None):
        super().__init__()
        self.model_name = model_name
        if self.model_name == 'resnet18':
            self.network = torchvision.models.resnet18(pretrained=True)
            self.network.fc = nn.Linear(self.network.fc.in_features, len(dataset.classes))
        elif self.model_name == 'resnet152':
            self.network = torchvision.models.resnet152(weights=ResNet152_Weights.IMAGENET1K_V1)
            self.network.fc = nn.Linear(self.network.fc.in_features, len(dataset.classes))
        elif self.model_name == 'efficientnet_b0':
            self.network = torchvision.models.efficientnet_b0(weights=EfficientNet_B0_Weights.IMAGENET1K_V1)
            self.network.classifier[1] = nn.Linear(self.network.classifier[1].in_features, len(dataset.classes))
        elif self.model_name == 'vit_b_16':
            self.network = torchvision.models.vit_b_16(pretrained=True)
            self.network.heads = nn.Linear(self.network.heads.head.in_features, len(dataset.classes))
        elif self.model_name == 'inception_v3':
            self.network = torchvision.models.inception_v3(weights=Inception_V3_Weights.IMAGENET1K_V1)
            self.network.fc = nn.Linear(self.network.fc.in_features, len(dataset.classes))

    def forward(self, xb):
        return self.network(xb)
    
    def training_step(self, batch):
        images, labels = batch[0].to(device), batch[1].to(device)
        out = self(images)
        loss = F.cross_entropy(out, labels)
        return loss
    
    @torch.no_grad()
    def valid_step(self, batch):
        images, labels = batch[0].to(device), batch[1].to(device) 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss.item(), 'val_acc': acc}


def train_model(model, num_epochs = 10, train_loader=None, val_loader=None):
    history = []
    for epoch in range(num_epochs):  # Training Phase
        model.train()
        train_losses = []
        for batch in train_loader:
            loss = model.training_step(batch)
            optim.zero_grad()
            loss.backward()
            optim.step()

            train_losses.append(loss.item())

        # Validation phase
        model.eval()
        outputs = [model.valid_step(batch) for batch in val_loader]
        result = {
            'val_loss': np.mean([out['val_loss'] for out in outputs]), 
            'val_acc': np.mean([out['val_acc'] for out in outputs]), 
            'train_loss': np.mean(train_losses)
        }
        print(f"[Epoch {epoch+1}] train_loss: {result['train_loss']:.4f}, val_loss: {result['val_loss']:.4f}, val_acc: {result['val_acc']:.4f}")
        history.append(result)

    return history, model

def save_model(model_name='resnet18', model=None):
    torch.save(model.state_dict(), f"{model_name}.pth")
    model.load_state_dict(torch.load(f"{model_name}.pth"))

def plot_accuracies(history, model_name='resnet18'):
    accuracies = [x['val_acc'] for x in history]
    plt.plot(accuracies, '-')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.title(model_name)


def plot_losses(history, model_name='resnet18'):
    train_losses = [x.get('train_loss') for x in history]
    val_losses = [x['val_loss'] for x in history]
    plt.plot(train_losses, '-b')
    plt.plot(val_losses, '-r')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(['Train loss', 'Val loss'])
    plt.title(model_name);

import gc
gc.collect()
torch.cuda.empty_cache()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# models_to_train = ['resnet152', 'efficientnet_b0', 'inception_v3']
models_to_train = ['resnet152']
for model_name in models_to_train:
    print(f"Training Model: {model_name}")
    transformations = dataset_setup(model_name=model_name)
    dataset = ImageFolder(data_dir, transform = transformations)
    print(f"dataset size: {len(dataset)}")

    batch_size = 64
    train_ds, val_ds, test_ds = random_split(dataset, [2800, 500, 383])
    train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_ds, batch_size*2, num_workers=4, pin_memory=True)

    model = ConvClassifier(model_name=model_name, dataset=dataset).to(device)
    optim = torch.optim.Adam(model.parameters(), 3e-5)
    history, model = train_model(model=model, train_loader=train_loader, val_loader=val_loader)

    plot_accuracies(history, model_name=model_name)
    plot_losses(history, model_name=model_name)

    save_model(model_name=model_name, model=model)

cuda
Training Model: resnet152
dataset size: 3683


RuntimeError: Caught RuntimeError in pin memory thread for device 0.
Original Traceback (most recent call last):
  File "C:\Users\DELL\AppData\Roaming\Python\Python312\site-packages\torch\utils\data\_utils\pin_memory.py", line 41, in do_one_step
    data = pin_memory(data, device)
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\DELL\AppData\Roaming\Python\Python312\site-packages\torch\utils\data\_utils\pin_memory.py", line 98, in pin_memory
    clone[i] = pin_memory(item, device)
               ^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\DELL\AppData\Roaming\Python\Python312\site-packages\torch\utils\data\_utils\pin_memory.py", line 64, in pin_memory
    return data.pin_memory(device)
           ^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.



# Visualizing Predictions:

In [None]:
def predict_image(img, model):
    xb = img.unsqueeze(0).to(device)         # Convert to a batch of 1
    yb = model(xb)                           # Get predictions from model
    prob, preds  = torch.max(yb, dim=1)      # Pick index with highest probability
    return dataset.classes[preds[0].item()]  # Retrieve the class label

Let us see the model's predictions on the test dataset:

In [None]:
img, label = test_ds[17]
plt.imshow(img.permute(1, 2, 0))
print('Label:', dataset.classes[label], ', Predicted:', predict_image(img, model))

In [None]:
img, label = test_ds[23]
plt.imshow(img.permute(1, 2, 0))
print('Label:', dataset.classes[label], ', Predicted:', predict_image(img, model))

In [None]:
img, label = test_ds[51]
plt.imshow(img.permute(1, 2, 0))
print('Label:', dataset.classes[label], ', Predicted:', predict_image(img, model))

# Predicting External Images:

Let's now test with external images.

I'll use `urllib` for downloading external images.

In [None]:
import urllib.request
# urllib.request.urlretrieve("https://images.squarespace-cdn.com/content/v1/5a7497e29f8dcee376b70f7e/1591630503059-FBBWAYXPWYOK9BTIBMZY/ke17ZwdGBToddI8pDm48kA_SSaoz4elkj-HsZd8gX3Z7gQa3H78H3Y0txjaiv_0fDoOvxcdMmMKkDsyUqMSsMWxHk725yiiHCCLfrh8O1z5QPOohDIaIeljMHgDF5CVlOqpeNLcJ80NK65_fV7S1UWPwZyNcweDIvdeL5kotwkIXjs9g0WibSO_cU-Ijy4Pwg6poS-6WGGnXqDacZer4yQ/74586587_10157705983079085_1307946016988725248_o+%281%29.jpg?format=2500w", "bar.jpg")
# urllib.request.urlretrieve("https://www.bocadolobo.com/en/inspiration-and-ideas/wp-content/uploads/2018/03/Discover-the-Ultimate-Master-Bedroom-Styles-and-Inspirations-6_1.jpg", "bedroom.jpg")    
# urllib.request.urlretrieve("https://sika.scene7.com/is/image/sika/glo-elevator-appliances?wid=1280&crop=0%2C80%2C4615%2C3212", "elevator.jpg") 
urllib.request.urlretrieve("https://i.pinimg.com/originals/2b/15/9d/2b159da035e4e3aaa30c03ec8ba7816c.jpg", "gameroom.jpg")
# urllib.request.urlretrieve("https://i.pinimg.com/originals/a6/d9/d7/a6d9d743da7017a7bcf4a53e46d22f81.jpg", "inside_bus.jpg")
# urllib.request.urlretrieve("https://s.wsj.net/public/resources/images/ON-CE927_moviet_B1280_20170714200426.jpg", "theatre.jpg")

Let us load the model. You can load an external pre-trained model too!

In [None]:
loaded_model = model

This function takes the image's name and prints the predictions:

In [None]:
from PIL import Image
from pathlib import Path

def predict_external_image(image_name):
    image = Image.open(Path('./' + image_name))

    example_image = transformations(image)
    plt.imshow(example_image.permute(1, 2, 0))
    print("The image resembles", predict_image(example_image, loaded_model) + ".")

In [None]:
predict_external_image('bar.jpg')

In [None]:
predict_external_image('bedroom.jpg')

In [None]:
predict_external_image('elevator.jpg')

In [None]:
predict_external_image('gameroom.jpg')

In [None]:
predict_external_image('inside_bus.jpg')

In [None]:
predict_external_image('theatre.jpg')

# Conclusion:

Our model is able to classify indoor with **76% accuracy (max)**!

It's great to see the model's predictions on the test set. It works pretty good on external images too!

You can try experimenting with more images and see the results!

### If you liked the kernel, don't forget to show some appreciation :)