In [1]:
!git clone https://github.com/vinvino02/GLPDepth.git

## and then move to GLPDepth/code

In [2]:
import os
import cv2
import numpy as np
from collections import OrderedDict
from transformers import GLPNFeatureExtractor
from torchvision import transforms


import torch
from PIL import Image
import matplotlib.pyplot as plt
import glob
from tqdm import tqdm

In [3]:
import torch
import torch.nn as nn

from mmcv.runner import load_checkpoint
from models.mit import mit_b4

class GLPDepth(nn.Module):
    def __init__(self, max_depth=10.0, is_train=False):
        super().__init__()
        self.max_depth = max_depth

        self.encoder = mit_b4()
        if is_train:            
            ckpt_path = './code/models/weights/mit_b4.pth'
            try:
                load_checkpoint(self.encoder, ckpt_path, logger=None)
            except:
                import gdown
                print("Download pre-trained encoder weights...")
                id = '1BUtU42moYrOFbsMCE-LTTkUE-mrWnfG2'
                url = 'https://drive.google.com/uc?id=' + id
                output = './code/models/weights/mit_b4.pth'
                gdown.download(url, output, quiet=False)

        channels_in = [512, 320, 128]
        channels_out = 64
            
        self.decoder = Decoder(channels_in, channels_out)
    
        self.last_layer_depth = nn.Sequential(
            nn.Conv2d(channels_out, channels_out, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=False),
            nn.Conv2d(channels_out, 1, kernel_size=3, stride=1, padding=1))

    def forward(self, x):                
        conv1, conv2, conv3, conv4 = self.encoder(x)
        out, _ = self.decoder(conv1, conv2, conv3, conv4)
        out_depth = self.last_layer_depth(out)
        out_depth = torch.sigmoid(out_depth) * self.max_depth

        return {'pred_d': out_depth}
    
    
    def extract_embeddings(self, x):
        conv1, conv2, conv3, conv4 = self.encoder(x)
        out, embedding = self.decoder(conv1, conv2, conv3, conv4)
        
        
#         if save_emb:
#             filepath = '/home3/fsml62/LLM_and_SGG_for_MDE/MDE/GLPDepth/results/embeddings/'
#             torch.save(embeddings, filepath)

        return embedding
    #####
    
        


class Decoder(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()

        self.bot_conv = nn.Conv2d(
            in_channels=in_channels[0], out_channels=out_channels, kernel_size=1)
        self.skip_conv1 = nn.Conv2d(
            in_channels=in_channels[1], out_channels=out_channels, kernel_size=1)
        self.skip_conv2 = nn.Conv2d(
            in_channels=in_channels[2], out_channels=out_channels, kernel_size=1)

        self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)
        
        self.fusion1 = SelectiveFeatureFusion(out_channels)
        self.fusion2 = SelectiveFeatureFusion(out_channels)
        self.fusion3 = SelectiveFeatureFusion(out_channels)

    def forward(self, x_1, x_2, x_3, x_4):
        x_4_ = self.bot_conv(x_4)
        out = self.up(x_4_)

        x_3_ = self.skip_conv1(x_3)
        out = self.fusion1(x_3_, out)
        out = self.up(out)

        x_2_ = self.skip_conv2(x_2)
        out = self.fusion2(x_2_, out)
        out = self.up(out)

        out = self.fusion3(x_1, out)
        out = self.up(out)
        
#         embedding = out
        out = self.up(out)
        embedding = out

        return out, embedding


class SelectiveFeatureFusion(nn.Module):
    def __init__(self, in_channel=64):
        super().__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=int(in_channel*2),
                      out_channels=in_channel, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(in_channel),
            nn.ReLU())

        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=in_channel, 
                      out_channels=int(in_channel / 2), kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(int(in_channel / 2)),
            nn.ReLU())

        self.conv3 = nn.Conv2d(in_channels=int(in_channel / 2), 
                               out_channels=2, kernel_size=3, stride=1, padding=1)

        self.sigmoid = nn.Sigmoid()

    def forward(self, x_local, x_global):
        x = torch.cat((x_local, x_global), dim=1)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        attn = self.sigmoid(x)

        out = x_local * attn[:, 0, :, :].unsqueeze(1) + \
              x_global * attn[:, 1, :, :].unsqueeze(1)

        return out




In [4]:
max_depth = 10
ckpt_dir = '/home3/fsml62/LLM_and_SGG_for_MDE/MDE/GLPDepth/code/best_nyu_preds/best_model_nyu.ckpt'

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("Using CPU")



print("\n1. Define Model")
model = GLPDepth(max_depth=max_depth, is_train=False).to(device)
model_weight = torch.load(ckpt_dir)
if 'module' in next(iter(model_weight.items()))[0]:
    model_weight = OrderedDict((k[7:], v) for k, v in model_weight.items())
model.load_state_dict(model_weight)
model.eval()
model.to(device)

Using GPU: NVIDIA A100 80GB PCIe MIG 1g.10gb

1. Define Model


GLPDepth(
  (encoder): mit_b4(
    (patch_embed1): OverlapPatchEmbed(
      (proj): Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
      (norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    )
    (patch_embed2): OverlapPatchEmbed(
      (proj): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    )
    (patch_embed3): OverlapPatchEmbed(
      (proj): Conv2d(128, 320, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (norm): LayerNorm((320,), eps=1e-05, elementwise_affine=True)
    )
    (patch_embed4): OverlapPatchEmbed(
      (proj): Conv2d(320, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    )
    (block1): ModuleList(
      (0): Block(
        (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (q): Linear(in_features=64, out_features=64

In [5]:
# Function to extract embeddings
# feature_extractor = GLPNFeatureExtractor.from_pretrained("vinvino02/glpn-nyu")

def extract_embeddings(image_path):

    image = Image.open(image_path).convert('RGB')
#     inputs = feature_extractor(images=image, return_tensors="pt").to(device)
    transform = transforms.Compose([
        transforms.ToTensor(),
        # Apply additional transforms if needed (normalize, resize, etc.)
    ])
    inputs = transform(image).unsqueeze(0).to(device)
    

    with torch.no_grad():
        embedding = model.extract_embeddings(inputs)
    
    return embedding

In [7]:
data_path = "/home3/fsml62/LLM_and_SGG_for_MDE/dataset/nyu_depth_v2"
save_path = "/home3/fsml62/LLM_and_SGG_for_MDE/GNN_for_MDE/results/depth_embedding/nyu_depth_v2"
save_map_path = "/home3/fsml62/LLM_and_SGG_for_MDE/GNN_for_MDE/results/depth_map/nyu_depth_v2"

# Check if save_path exists
if not os.path.exists(save_path):
    print(f"Creating directory: {save_path}")
    os.makedirs(save_path, exist_ok=True)
else:
    print(f"Directory already exists: {save_path}")
    
if not os.path.exists(save_map_path):
    print(f"Creating directory: {save_map_path}")
    os.makedirs(save_map_path, exist_ok=True)
else:
    print(f"Directory already exists: {save_map_path}")

Directory already exists: /home3/fsml62/LLM_and_SGG_for_MDE/GNN_for_MDE/results/depth_embedding/nyu_depth_v2
Directory already exists: /home3/fsml62/LLM_and_SGG_for_MDE/GNN_for_MDE/results/depth_map/nyu_depth_v2


In [8]:
# Get all .jpg files in the data_path
jpg_files = glob.glob(os.path.join(data_path, '**', '*.jpg'), recursive=True)

# Ensure the save path exists
os.makedirs(save_path, exist_ok=True)

for img_path in tqdm(jpg_files, total=len(jpg_files)):
    relative_path = os.path.relpath(img_path, data_path)
    stored_path = os.path.join(save_path, '{}.pt'.format(relative_path.split('.')[0]))

    os.makedirs(os.path.dirname(stored_path), exist_ok=True)
    
    with torch.no_grad():
        pred = extract_embeddings(img_path)
        torch.save(pred, stored_path)


100%|██████████| 1449/1449 [12:41<00:00,  1.90it/s]


In [66]:
p = torch.load('/home3/fsml62/LLM_and_SGG_for_MDE/GNN_for_MDE/results/nyu_depth_v2/official_splits/train/printer_room/rgb_00448.pt')
p.shape

torch.Size([1, 64, 480, 640])