In [1]:
# !pip install datasets evaluate torch torchvision 
import os
from tqdm import tqdm
from datasets import load_dataset
import torch 
from torch.utils.data import DataLoader
from torchvision import transforms
import numpy as np
from matplotlib import pyplot as plt
from coco_hf_dataset import (
    coco_hf_dataset_hf,
    expand_gray_channel, 
    download_gcs_data, 
    coco_hf_dataset_disk
)

'''ds = load_dataset(
    "CVdatasets/CocoSegmentationOnlyVal5000",
    use_auth_token="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"
)'''

'ds = load_dataset(\n    "CVdatasets/CocoSegmentationOnlyVal5000",\n    use_auth_token="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"\n)'

In [2]:
# download the data from our public gcs bucket and save it to disk
dataset_path, img_path, mask_path = download_gcs_data()

IMG_SIZE = 128
NC = 21  # Number of classes

img_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((IMG_SIZE, IMG_SIZE), interpolation=transforms.InterpolationMode.BICUBIC),
    expand_gray_channel(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
mask_transforms = transforms.Compose([
    transforms.PILToTensor(),
    transforms.Resize((IMG_SIZE, IMG_SIZE), interpolation=transforms.InterpolationMode.NEAREST),
])


coco_dataset = coco_hf_dataset_disk(dataset_path='../../../',
                                    relative_img_path=img_path, 
                                    relative_mask_path=mask_path,
                                    mask_transform=mask_transforms,
                                    img_transform=img_transforms,
                                    size=IMG_SIZE)

Found dataset in /Users/derek/Desktop
There are 4031 images and 4031 masks
Skipping download...
Found dataset, there are 4030 images and 4030 masks


In [3]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = .00001)

# coco_hf = coco_hf_dataset(ds['train'], mask_transform=mask_transforms, img_transform=img_transforms, size=IMG_SIZE)
train_loader = DataLoader(coco_dataset, batch_size=2, shuffle=False, num_workers=4, pin_memory=True)

Using cache found in /Users/derek/.cache/torch/hub/pytorch_vision_v0.10.0


In [4]:
try:
    import dataquality as dq
except:
    import sys
    sys.path.append("../../../dataquality/")

os.environ['GALILEO_CONSOLE_URL']="http://localhost:8088"
os.environ["GALILEO_USERNAME"]="user@example.com"
os.environ["GALILEO_PASSWORD"]="Th3secret_"

import dataquality as dq
dq.configure()

dq.init("semantic_segmentation", "Derek-Elliott-Proj", "Pls-Work")
dq.set_labels_for_run([str(i) for i in range(NC + 1)]) # 0 background, plus each class



📡 http://localhost:8088
🔭 Logging you into Galileo

🚀 You're logged in to Galileo as user@example.com!
✨ Initializing existing public project 'Derek-Elliott-Proj'
🏃‍♂️ Fetching existing run 'Pls-Work'
🛰 Connected to existing project 'Derek-Elliott-Proj', and existing run 'Pls-Work'.




In [5]:
from dataquality.integrations.cv.torch.semantic_segmentation import watch
watch(model, 
            bucket_name='galileo-public-data', 
            dataset_path='../../../',
            dataloaders =[train_loader, train_loader])
epochs = 1
scaler = torch.cuda.amp.GradScaler()


with torch.autocast('cuda'):
    for epoch in range(epochs):
        dq.set_epoch_and_split(epoch, "training")
        for j, sample in enumerate(tqdm(train_loader)):
            imgs, masks = sample['image'], sample['mask']
            out = model(imgs.to(device))

            # reshape to have loss for each pixel (bs * h * w, 21)\n",
            pred = out['out'].permute(0, 2, 3, 1).contiguous().view( -1, 21)
            masks = masks.long()
            msks_for_loss = masks.view(-1).to(device)

            loss = criterion(pred, msks_for_loss)
            optimizer.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            if j == 0: break
        if epoch == 0: break

We assume the dataloaders passed only have transforms that Tensor, Resize, and Normalize the image and mask
‼ Any cropping or shearing transforms passed will lead to unexpected results
See docs at https://dq.readthedocs.io/en/latest/ (placeholder) for more info 
 

Attaching dataquality to model and dataloaders
Found layer classifier in model layers: backbone, classifier


  0%|          | 0/2015 [00:00<?, ?it/s]

Mask column name is mask


  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label


> [0;32m/Users/derek/Desktop/dataquality/dataquality/utils/semantic_segmentation/contours.py[0m(86)[0;36mupload_contours[0;34m()[0m
[0;32m     84 [0;31m        [0mint_misclassified_obj[0m [0;34m=[0m [0;34m[[0m[0mint[0m[0;34m([0m[0mobj[0m[0;34m)[0m [0;32mfor[0m [0mobj[0m [0;32min[0m [0mmisclassified_obj[0m[0;34m.[0m[0msplit[0m[0;34m([0m[0;34m','[0m[0;34m)[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     85 [0;31m    [0;32mimport[0m [0mpdb[0m[0;34m;[0m [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 86 [0;31m    [0;32mfor[0m [0mobj[0m [0;32min[0m [0mint_misclassified_obj[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     87 [0;31m        [0mpred_polygon_map[0m[0;34m[[0m[0mobj[0m[0;34m][0m[0;34m[[0m[0;34m'error_type'[0m[0;34m][0m [0;34m=[0m [0;34m'misclassified'[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     88 [0;31m[0;34m[0m[0m
[0m
[0]
> 

  0%|          | 0/2015 [00:29<?, ?it/s]


In [6]:
dq.finish()

Running dataquality on dataloader:  training


GalileoException: An issue occurred while logging model outputs. Address any issues in your logging and make sure to call dq.init before restarting:
AttributeError("'list' object has no attribute 'split'")

In [None]:
from dataquality.integrations.torch import unwatch
unwatch(model)

In [None]:
import json
with open("/Users/derek/Downloads/1 (1).json", "r") as f:
    data = json.load(f)
data

[{'id': 0,
  'label_int': 15,
  'error_type': 'none',
  'polygon': [[[[[125, 1]],
     [[124, 2]],
     [[124, 4]],
     [[123, 5]],
     [[123, 6]],
     [[122, 7]],
     [[122, 10]],
     [[121, 11]],
     [[121, 12]],
     [[120, 13]],
     [[117, 13]],
     [[116, 12]],
     [[114, 12]],
     [[114, 14]],
     [[116, 16]],
     [[117, 16]],
     [[118, 17]],
     [[117, 18]],
     [[116, 18]],
     [[117, 19]],
     [[117, 22]],
     [[119, 22]],
     [[120, 23]],
     [[121, 23]],
     [[122, 22]],
     [[123, 22]],
     [[124, 21]],
     [[124, 20]],
     [[127, 17]],
     [[127, 9]],
     [[127, 10]],
     [[126, 11]],
     [[126, 13]],
     [[125, 14]],
     [[125, 15]],
     [[124, 16]],
     [[124, 17]],
     [[123, 18]],
     [[122, 18]],
     [[121, 17]],
     [[121, 16]],
     [[122, 15]],
     [[122, 14]],
     [[123, 13]],
     [[123, 11]],
     [[124, 10]],
     [[124, 9]],
     [[125, 8]],
     [[125, 6]],
     [[126, 5]],
     [[126, 3]],
     [[127, 2]],
     [[127, 