In [1]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
import cv2
import supervision as sv
import numpy as np
import matplotlib.pyplot as plt

# Import the SAM Module

In [18]:
from segment_anything import SamPredictor, sam_model_registry

# Load in the current model

In [41]:
sam = sam_model_registry['vit_h'](checkpoint='../weights/sam_vit_h_4b8939.pth')
sam.to(device)
#predictor = SamPredictor(sam)

Sam(
  (image_encoder): ImageEncoderViT(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 1280, kernel_size=(16, 16), stride=(16, 16))
    )
    (blocks): ModuleList(
      (0-31): 32 x Block(
        (norm1): LayerNorm((1280,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=1280, out_features=3840, bias=True)
          (proj): Linear(in_features=1280, out_features=1280, bias=True)
        )
        (norm2): LayerNorm((1280,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (lin1): Linear(in_features=1280, out_features=5120, bias=True)
          (lin2): Linear(in_features=5120, out_features=1280, bias=True)
          (act): GELU(approximate='none')
        )
      )
    )
    (neck): Sequential(
      (0): Conv2d(1280, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): LayerNorm2d()
      (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (3): LayerNorm2d

# Define dataset with masks

In [33]:
class GisTrainingDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir, mask_dir, transform=None):
        self.root_dir = root_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = os.listdir(self.root_dir)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

            img_name = os.path.join(self.root_dir, self.images[idx])
            mask_name = os.path.join(self.mask_dir, self.images[idx])

            image = Image.open(img_name)
            mask = Image.open(mask_name)

            if self.transform:
                image = self.transform(image)
                mask = self.transform(mask)

            return image, mask

# Define Transformers (like an image preprocessing pipeline)

In [34]:
transform = transforms.Compose([
    transforms.Resize((286, 286)), # this may be unnecessary, but resizes images to 286x286 pixels
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Need to replace with mean and STD of all our org photos
])

# Load custom dataset

In [56]:
import IPython
from pathlib import Path
import sys
#NBK_DIR = IPython.extract_module_locals()[1]["_dh"][0]
ORIGINAL_PATH = '/Users/ryantenbarge/code/sstollunderwood/solar_potential_map/data_for_ml/original'
MASK_PATH = '/Users/ryantenbarge/code/sstollunderwood/solar_potential_map/data_for_ml/masked'

In [57]:
dataset = GisTrainingDataset(root_dir=ORIGINAL_PATH,
                             mask_dir=MASK_PATH, transform=transform)

In [53]:
next(iter(dataset))

In [37]:
# Create a DataLoader
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

In [49]:
next(iter(dataloader))

TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>

In [42]:
# Finetuning the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#predictor.to(device)
sam.train()

Sam(
  (image_encoder): ImageEncoderViT(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 1280, kernel_size=(16, 16), stride=(16, 16))
    )
    (blocks): ModuleList(
      (0-31): 32 x Block(
        (norm1): LayerNorm((1280,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=1280, out_features=3840, bias=True)
          (proj): Linear(in_features=1280, out_features=1280, bias=True)
        )
        (norm2): LayerNorm((1280,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (lin1): Linear(in_features=1280, out_features=5120, bias=True)
          (lin2): Linear(in_features=5120, out_features=1280, bias=True)
          (act): GELU(approximate='none')
        )
      )
    )
    (neck): Sequential(
      (0): Conv2d(1280, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): LayerNorm2d()
      (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (3): LayerNorm2d

Define loss function and optimizer

In [43]:
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(sam.parameters(), lr=0.01, momentum=0.9)

In [48]:
type(dataloader)
print(dataloader)

<torch.utils.data.dataloader.DataLoader object at 0x357af1a50>


In [45]:
for epoch in range(1):
    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):
        inputs, masks = data
        inputs = inputs.to(device)
        masks = masks.to(device)

        optimizer.zero_grad()

        outputs = sam(inputs)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 2000 == 1999: #print every 2000 mini-batches
            print(f'Loss: {running_loss/2000}')
            running_loss = 0.0
print('Finished Training')

TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>

In [46]:
sam(inputs)

NameError: name 'inputs' is not defined