In [7]:
# !pip install datasets evaluate torch torchvision 
import os
from tqdm import tqdm
from datasets import load_dataset
import torch 
from torch.utils.data import DataLoader
from torchvision import transforms
import numpy as np
from matplotlib import pyplot as plt
from coco_hf_datasets import (
    expand_gray_channel, 
    download_gcs_data, 
    coco_hf_dataset_disk
)

'''ds = load_dataset(
    "CVdatasets/CocoSegmentationOnlyVal5000",
    use_auth_token="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"
)'''

'ds = load_dataset(\n    "CVdatasets/CocoSegmentationOnlyVal5000",\n    use_auth_token="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"\n)'

In [8]:
# download the data from our public gcs bucket and save it to disk
# dataset_path, img_path, mask_path = download_gcs_data()
dataset_path = "../../../CV_datasets/"
img_path = "COCO_seg_val_5000/all_images"
mask_path = "COCO_seg_val_5000/all_masks"

IMG_SIZE = 128
NC = 21  # Number of classes

img_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((IMG_SIZE, IMG_SIZE), interpolation=transforms.InterpolationMode.BICUBIC),
    expand_gray_channel(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
mask_transforms = transforms.Compose([
    transforms.PILToTensor(),
    transforms.Resize((IMG_SIZE, IMG_SIZE), interpolation=transforms.InterpolationMode.NEAREST),
])


coco_dataset = coco_hf_dataset_disk(dataset_path=dataset_path,
                                    relative_img_path=img_path, 
                                    relative_mask_path=mask_path,
                                    mask_transform=mask_transforms,
                                    img_transform=img_transforms,
                                    size=IMG_SIZE)
coco_dataset = torch.utils.data.Subset(coco_dataset, range(4))

Found dataset, there are 4030 images and 4030 masks


In [9]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = .00001)

# coco_hf = coco_hf_dataset(ds['train'], mask_transform=mask_transforms, img_transform=img_transforms, size=IMG_SIZE)
train_loader = DataLoader(coco_dataset, batch_size=2, shuffle=False, num_workers=4, pin_memory=True)

Using cache found in /Users/derek/.cache/torch/hub/pytorch_vision_v0.10.0


In [10]:
try:
    import dataquality as dq
except:
    import sys
    sys.path.append("../../../dataquality/")

# os.environ['GALILEO_CONSOLE_URL']="http://localhost:8088"
# os.environ["GALILEO_USERNAME"]="user@example.com"
# os.environ["GALILEO_PASSWORD"]="Th3secret_"

os.environ['GALILEO_CONSOLE_URL']="https://console.dev.rungalileo.io/"
os.environ["GALILEO_USERNAME"]="galileo@rungalileo.io"
os.environ["GALILEO_PASSWORD"]="A11a1una!"

import dataquality as dq
dq.configure()

dq.init("semantic_segmentation", "Derek-Elliott-Proj", 'polygon_dep')
class_dict = { 'background': 0,
                            'airplane': 1,
                            'bicycle': 2,
                            'bird': 3,
                            'boat': 4,
                            'bottle': 5,
                            'bus': 6,
                            'car': 7,
                            'cat': 8,
                            'chair': 9,
                            'cow': 10,
                            'dining table': 11,
                            'dog': 12,
                            'horse': 13,
                            'motorcycle': 14,
                            'person': 15,
                            'potted plant': 16,
                            'sheep': 17,
                            'couch': 18,
                            'train': 19,
                            'tv': 20}
reverse_class_dict = {v: k for k, v in class_dict.items()}
dq.set_labels_for_run([reverse_class_dict[i] for i in range(NC)]) # 0 background, plus each class



📡 https://console.dev.rungalileo.io
🔭 Logging you into Galileo

🚀 You're logged in to Galileo as galileo@rungalileo.io!
✨ Initializing existing public project 'Derek-Elliott-Proj'
🏃‍♂️ Fetching existing run 'polygon_dep'




🛰 Connected to existing project 'Derek-Elliott-Proj', and existing run 'polygon_dep'.


In [11]:
from dataquality.integrations.cv.torch.semantic_segmentation import watch
watch(
    model,
    bucket_name='https://storage.googleapis.com/galileo-public-data',
    dataset_path=dataset_path,
    dataloaders={"training": train_loader, "validation": train_loader}
)
epochs = 1
scaler = torch.cuda.amp.GradScaler()


with torch.autocast('cuda'):
    for epoch in range(epochs):
        dq.set_epoch_and_split(epoch, "training")
        for j, sample in enumerate(tqdm(train_loader)):
            imgs, masks = sample['image'], sample['mask']
            out = model(imgs.to(device))

            # reshape to have loss for each pixel (bs * h * w, 21)\n",
            pred = out['out'].permute(0, 2, 3, 1).contiguous().view( -1, 21)
            masks = masks.long()
            msks_for_loss = masks.view(-1).to(device)

            loss = criterion(pred, msks_for_loss)
            optimizer.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            if j == 1: break
        if epoch == 0: break

We assume the dataloaders passed only have transforms that Tensor, Resize,         and Normalize the image and mask
‼ Any cropping or shearing transforms passed will lead to unexpected         results
See docs at https://dq.readthedocs.io/en/latest/ (placeholder) for more info         
 

Found layer classifier in model layers: backbone, classifier


 50%|█████     | 1/2 [00:02<00:02,  2.93s/it]


In [12]:
dq.finish()

Logging 2 samples [########################################] 100.00% elapsed time  :     0.22s =  0.0m =  0.0h
Logging 2 samples [########################################] 100.00% elapsed time  :     0.14s =  0.0m =  0.0h
Logging 2 samples [########################################] 100.00% elapsed time  :     0.19s =  0.0m =  0.0h
Logging 2 samples [########################################] 100.00% elapsed time  :     0.14s =  0.0m =  0.0h    
 ☁️ Uploading Data
CuML libraries not found, running standard process. For faster Galileo processing, consider installing
`pip install 'dataquality[cuda]' --extra-index-url=https://pypi.nvidia.com/`


Uploading data to Galileo:   0%|          | 0.00/25.8k [00:00<?, ?B/s]

Processing data for upload:   0%|          | 0/2 [00:00<?, ?it/s]

Uploading data to Galileo:   0%|          | 0.00/20.1k [00:00<?, ?B/s]

Uploading data to Galileo:   0%|          | 0.00/25.8k [00:00<?, ?B/s]

Processing data for upload:   0%|          | 0/2 [00:00<?, ?it/s]

Uploading data to Galileo:   0%|          | 0.00/20.1k [00:00<?, ?B/s]

Job default successfully submitted. Results will be available soon at https://console.dev.rungalileo.io/insights?projectId=7e78e642-cc40-4f5c-8f45-b2cadd8d674a&runId=4bc175ca-60c3-4cf4-b6ef-91013db1d7fa&split=training&metric=f1&depHigh=1&depLow=0&taskType=6
Waiting for job (you can safely close this window)...
	Uploading processed validation data
Done! Job finished with status completed
Click here to see your run! https://console.dev.rungalileo.io/insights?projectId=7e78e642-cc40-4f5c-8f45-b2cadd8d674a&runId=4bc175ca-60c3-4cf4-b6ef-91013db1d7fa&split=training&metric=f1&depHigh=1&depLow=0&taskType=6
🧹 Cleaning up
🧹 Cleaning up


'https://console.dev.rungalileo.io/insights?projectId=7e78e642-cc40-4f5c-8f45-b2cadd8d674a&runId=4bc175ca-60c3-4cf4-b6ef-91013db1d7fa&split=training&metric=f1&depHigh=1&depLow=0&taskType=6'

In [None]:
from dataquality.integrations.torch import unwatch
unwatch(model)