In [1]:
import os
import pandas as pd
from PIL import Image
import torch
from torchvision import transforms
from torch.utils.data import Dataset

In [2]:
class NYUDepthDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_path = self.data.iloc[idx, 0]
        depth_path = self.data.iloc[idx, 1]

        image = Image.open(img_path).convert('RGB')
        depth = Image.open(depth_path).convert('L')

        sample = { "image": image, "depth": depth }
        
        if self.transform:
            sample = self.transform(sample)

        return sample

In [3]:
class NYUDepthTransform:
    def __init__(self, img_size=(224,224)):
        self.img_transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
        ])
        self.depth_transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.ToTensor()
        ])

    def __call__(self, sample):
        image = self.img_transform(sample["image"])
        depth = self.depth_transform(sample["depth"])
        return { "image": image, "depth": depth }

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

In [5]:
train_csv_path = "data/nyu2_train.csv"
test_csv_path = "data/nyu2_test.csv"
train_dataset = NYUDepthDataset(csv_file=train_csv_path, transform=NYUDepthTransform())
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)
test_dataset = NYUDepthDataset(csv_file=test_csv_path, transform=NYUDepthTransform())
test_loader = DataLoader(test_dataset, batch_size=1024, shuffle=False)

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"using device: {device}");

using device: cuda


In [7]:
class DepthEstimationModel(nn.Module):
    def __init__(self):
        super(DepthEstimationModel, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2), # 224 -> 112
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2), # 112 -> 56
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2), # 56 -> 28
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1), # 28 -> 56
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1), # 56 -> 112
            nn.ReLU(),
            nn.ConvTranspose2d(64, 1, kernel_size=4, stride=2, padding=1), # 112 -> 224
            nn.ReLU(),
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [8]:
criterion = nn.MSELoss()
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"using device: {device}");
model = DepthEstimationModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 5

for epoch in range(num_epochs):
    print(f"epoch {epoch+1}/{num_epochs} - starting")
    model.train()
    train_loss = 0
    for batch in train_loader:
        images = batch["image"].to(device)
        depths = batch["depth"].to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, depths)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)
    print(f"epoch {epoch+1}/{num_epochs}, loss: {train_loss:.4f}")

    model.eval()
    test_loss = 0
    with torch.no_grad():
        for batch in test_loader:
            images = batch["image"].to(device)
            depths = batch["depth"].to(device)
            outputs = model(images)
            loss = criterion(outputs, depths)
            test_loss += loss.item()

    test_loss /= len(test_loader)
    print(f"test loss: {test_loss:.4f}")

torch.save(model.state_dict(), "depth_estimation_model_002.pth")
print("model saved")

using device: cuda
epoch 1/5 - starting
epoch 1/5, loss: 0.0241
test loss: 0.5150
epoch 2/5 - starting
epoch 2/5, loss: 0.0208
test loss: 0.4995
epoch 3/5 - starting
epoch 3/5, loss: 0.0197
test loss: 0.5185
epoch 4/5 - starting
epoch 4/5, loss: 0.0190
test loss: 0.5154
epoch 5/5 - starting
epoch 5/5, loss: 0.0188
test loss: 0.5240
model saved
