In [3]:
import torch
import os
from transformers import AutoImageProcessor, AutoModel
from PIL import Image
import requests
import json
import numpy as np
from torch.utils.data import Dataset
from torchvision import transforms
import albumentations as A
from torch.utils.data import DataLoader

In [21]:
# Create dataloaders

class TreeDataset(Dataset):
    def __init__(self, root_dir, transform, zones=['Z1', 'Z2'], class_map_path='./data/classes.json'):
        with open(class_map_path, 'r') as f:
            self.class_map = {v:int(k) for k, v in json.load(f).items()}
        
        self.root_dir = root_dir
        self.classes = [d for d in os.listdir(root_dir)]
        self.image_files = []
        self.transform = transform
        for c in self.classes:
            for img in os.listdir(os.path.join(root_dir, c)):
                if any(z for z in zones if z in img):
                    self.image_files.append((os.path.join(root_dir, c, img), self.class_map[c]))
        self.toTensor = transforms.ToTensor()

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path, c = self.image_files[idx]
        img_np = np.array(Image.open(img_path))[:,:,:3]
        transformed = self.transform(image=img_np)['image']
        return self.toTensor(transformed), torch.tensor(c)

# These are the mean/std I took from the complete tiff of Z1
ADE_MEAN = np.array([51.61087416176021, 70.54108897685563, 43.65073194868197]) / 255
ADE_STD = np.array([66.21302035582556, 82.09431586857384, 54.93294965405881]) / 255

train_transform = A.Compose([
    A.Resize(width=224, height=224),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.Normalize(mean=ADE_MEAN, std=ADE_STD),
])

val_transform = A.Compose([
    A.Resize(width=224, height=224),
    A.Normalize(mean=ADE_MEAN, std=ADE_STD),
])

train_dataset = TreeDataset('./data/tree_classification_with_background', train_transform)
val_dataset = TreeDataset('./data/tree_classification_with_background', val_transform, zones=['Z3'])

train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=8, shuffle=False)



# Pick this up tomorrow: https://github.com/NielsRogge/Transformers-Tutorials/blob/master/DINOv2/Train_a_linear_classifier_on_top_of_DINOv2_for_semantic_segmentation.ipynb


18288

In [3]:
url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
image = Image.open(requests.get(url, stream=True).raw)

processor = AutoImageProcessor.from_pretrained('facebook/dinov2-base')
model = AutoModel.from_pretrained('facebook/dinov2-base')

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
last_hidden_states = outputs[0]

# We have to force return_dict=False for tracing
model.config.return_dict = False

with torch.no_grad():
    traced_model = torch.jit.trace(model, [inputs.pixel_values])
    traced_outputs = traced_model(inputs.pixel_values)

print((last_hidden_states - traced_outputs[0]).abs().max())

tensor(0.0001, grad_fn=<MaxBackward1>)
