In [1]:
import os
import subprocess
import sys
import torch
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import cv2
import zipfile
from torchvision import models
import torchvision.transforms as transforms
import torchvision
from torch.utils.data import Dataset, DataLoader
import json


##############################################################################
print("check environment ")
##############################################################################

if 'COLAB_GPU' in os.environ:
   print("I'm running on Colab")

   def install(package):
       subprocess.check_call([sys.executable, "-m", "pip", "install", package])

   print("Cuda available",torch.cuda.is_available())
   print("installing required packages ...")
   install("segmentation-models-pytorch")
   install("lightning")
   install("wandb")
   install("onnx")
   install("albumentations")


import segmentation_models_pytorch as smp
import albumentations as A
import albumentations.augmentations.functional as F
from albumentations.pytorch import ToTensorV2

import warnings
warnings.filterwarnings('ignore')

import pytorch_lightning as pl
from lightning.pytorch.utilities.model_summary import ModelSummary
from pytorch_lightning.loggers import WandbLogger

import wandb




##############################################################################
print("Manage dataset")
##############################################################################

if 'COLAB_GPU' in os.environ:

   from google.colab import drive
   drive.mount('/content/drive')


   def load_data_to_tmp(folder_name):
    zip_ref = zipfile.ZipFile('/tmp/'+folder_name, 'r') #Opens the zip file in read mode
    zip_ref.extractall('/tmp') #Extracts the files into the /tmp folder
    zip_ref.close()

   run=wandb.init(project="Card_detection")
   print("dowload data from the wandb to temp")
   artifact = run.use_artifact('team-invonto/Card_detection/Dataset_July2023_train213_val31:v0', type='compressed_480x320')
   artifact.download('/tmp/')
   load_data_to_tmp('Train.zip')
   load_data_to_tmp('Validation.zip')
   data_path='/tmp/'
   wandb.finish()


else:
   data_path='/home/tatyana/Work_Invonto/Dataset/Card_Dataset/Dataset_ready_July2023'
   print('running local')

check environment 
I'm running on Colab
Cuda available True
installing required packages ...
Manage dataset
Mounted at /content/drive


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


dowload data from the wandb to temp


[34m[1mwandb[0m: Downloading large artifact Dataset_July2023_train213_val31:v0, 73.86MB. 2 files... 
[34m[1mwandb[0m:   2 of 2 files downloaded.  
Done. 0:0:1.5


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

In [21]:
############################################################################################################33
#### Card Dataset

class CardDataset(Dataset):
    """NailDataset dataset.
    img generator that take file names and selected poligons from csv_file
    """

    def __init__(self, csv_file, root_dir, transform=None, augmentation=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.

            nail_dataset[img_number_in_csv][1] -image
            nail_dataset[img_number_in_csv][0] -corresponding mask image
        """
        self.nail_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.augmentation = augmentation

    def __len__(self):
        return self.nail_frame.shape[0]

    def __getitem__(self, idx):
        file_id=self.nail_frame.iloc[idx]["filename"]
        #"read image"
        img_name = os.path.join(self.root_dir,self.nail_frame.iloc[idx]["filename"])
        img_name=img_name.split('.')[0]+'.png'
        image = cv2.imread(img_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # read mask
        mask_name=os.path.join(self.root_dir,self.nail_frame.iloc[idx]["mask"])
        mask = cv2.imread(mask_name,cv2.IMREAD_UNCHANGED)

        # required for albumentation
        mask = mask.astype(np.float32)
        mask[mask == 0] = 0.0
        mask[mask == 255] = 1.0

        if self.transform:
            sample = self.transform(image=image, mask=mask)
            image = sample['image']
            mask=sample['mask']
            mask=mask.unsqueeze(0)

        return image,mask

train_transform=A.Compose([
        A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=30, p=0.5,border_mode=cv2.BORDER_CONSTANT),
        A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.5),
        A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()])


validation_transform =A.Compose([
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()])


############################################################################################################33
#### Card Model

class CardModel(pl.LightningModule):

    def __init__(self,arch,encoder_name,encoder_weights,learning_rate,name_optimizer,frozen_encoder=True):
        super().__init__()
        self.lr=learning_rate
        self.optimizer_name=name_optimizer
        self.save_hyperparameters()
        self.training_step_outputs = []
        self.validation_step_outputs = []

        self.model = smp.create_model(
            arch,
            encoder_name=encoder_name,
            encoder_weights=encoder_weights,
            classes=1,
            #activation='sigmoid'
            )

        # for image segmentation dice loss could be the best first choice
        self.loss_fn = smp.losses.DiceLoss(smp.losses.BINARY_MODE,from_logits=True)

        if frozen_encoder==True:
            for child in self.model.encoder.children():
                for param in child.parameters():
                    param.requires_grad = False


    def forward(self, image):
        mask = self.model(image)
        return mask

    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        # it is independent of forward

        image=batch[0]
        # assert image (batch_size,num_channels,height,width)
        assert image.ndim == 4
        # assert the image dim is suitable for Unet
        h, w = image.shape[2:]
        assert h % 32 == 0 and w % 32 == 0

        mask=batch[1]
        assert mask.ndim == 4
        assert mask.max()<=1 and  mask.min()>=0 # check that the mask between [0,1] not in range [0,255]

        # calculate loss
        output_mask = self.forward(image)
        loss=self.loss_fn(output_mask,mask)

        pred_mask=(output_mask.sigmoid() >= 0.5).float()



        # calculate logs
        tp, fp, fn, tn = smp.metrics.get_stats(pred_mask.long(), mask.long(), mode="binary")

        train_batch_dictionary={"tp":tp,
                          "fp":fp,
                          "fn":fn,
                          "tn":tn}

        self.training_step_outputs.append(train_batch_dictionary)
        # logs metrics for each training_step,
        # and the average across the epoch, to the progress bar and logger
        self.log("train_loss", loss, on_epoch=True, prog_bar=True, logger=True)

        return loss

    def on_train_epoch_end(self):
        # aggregate step metics
        outputs=self.training_step_outputs
        tp = torch.cat([x["tp"] for x in outputs])
        fp = torch.cat([x["fp"] for x in outputs])
        fn = torch.cat([x["fn"] for x in outputs])
        tn = torch.cat([x["tn"] for x in outputs])


        per_image_iou = smp.metrics.iou_score(tp, fp, fn, tn, reduction="micro-imagewise") # mean (per image)
        dataset_iou = smp.metrics.iou_score(tp, fp, fn, tn, reduction="micro") # sum tp,fp,fn,tn over all dataset

        metrics = {
            "train_per_image_iou": per_image_iou,
        #    "train_dataset_iou": dataset_iou,
        }

        self.log_dict(metrics, prog_bar=True)
        self.training_step_outputs.clear()



    def validation_step(self, batch, batch_idx):
        # training_step defines the train loop.
        # it is independent of forward

        image=batch[0]
        # assert image (batch_size,num_channels,height,width)
        assert image.ndim == 4
        # assert the image dim is suitable for Unet
        h, w = image.shape[2:]
        assert h % 32 == 0 and w % 32 == 0

        mask=batch[1]
        assert mask.ndim == 4
        assert mask.max()<=1 and  mask.min()>=0 # check that the mask between [0,1] not in range [0,255]

        # calculate loss
        output_mask = self.forward(image)
        loss=self.loss_fn(output_mask,mask)

        pred_mask=(output_mask.sigmoid() >= 0.5).float()


        # calculate logs
        tp, fp, fn, tn = smp.metrics.get_stats(pred_mask.long(), mask.long(), mode="binary")

        validation_batch_dictionary={"loss":loss,
                          "tp":tp,
                          "fp":fp,
                          "fn":fn,
                          "tn":tn}

        self.validation_step_outputs.append(validation_batch_dictionary)
        # logs metrics for each training_step,
        # and the average across the epoch, to the progress bar and logger
        self.log("valid_loss", loss, on_epoch=True, prog_bar=True, logger=True)

        return loss


    def on_validation_epoch_end(self):
        # aggregate step metics
        outputs= self.validation_step_outputs
        tp = torch.cat([x["tp"] for x in outputs])
        fp = torch.cat([x["fp"] for x in outputs])
        fn = torch.cat([x["fn"] for x in outputs])
        tn = torch.cat([x["tn"] for x in outputs])


        per_image_iou = smp.metrics.iou_score(tp, fp, fn, tn, reduction="micro-imagewise") # mean (per image)
        #dataset_iou = smp.metrics.iou_score(tp, fp, fn, tn, reduction="micro") # sum tp,fp,fn,tn over all dataset

        metrics = {
            "validation_per_image_iou": per_image_iou,
            #"validation_dataset_iou": dataset_iou,
        }

        self.log_dict(metrics, prog_bar=True)
        self.validation_step_outputs.clear()

    def configure_optimizers(self):
        if  self.optimizer_name=='Adam':
            return torch.optim.Adam(self.parameters(), lr=(self.lr or self.learning_rate))
        if  self.optimizer_name=="Sgd":
            return torch.optim.SGD(self.parameters(), lr=(self.lr or self.learning_rate), momentum=0.9)


In [26]:
def train_run(config=None):

    N_EPOCH=20
    OPTIMISER='Adam'
    BATCH_SiZE=32
    Frozen_Encoder_Flag=True

    # Get hyperparameters from the run configs
    wandb.init(config=config)
    config = wandb.config



    #data_path='/home/tatyana/Work_Invonto/Dataset/Card_Dataset/Dataset_ready_July2023'
    data_path='/tmp/'

    ### Load Data
    n_cpu = os.cpu_count()
    #train
    x_train_path=os.path.join(data_path,'Train')
    #y_train_dataloader=os.path.join(x_train_path,'Train_DataLoader.csv')
    y_train_dataloader=os.path.join(x_train_path,'Train_DataLoader_sample96.csv')
    train_dataset = CardDataset(y_train_dataloader, x_train_path,transform=train_transform, augmentation=None)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SiZE, shuffle=True,drop_last=True,num_workers=n_cpu)
    #print('Number of training samples:', len(train_dataset))

    # validation
    x_valid_path = os.path.join(data_path, 'Validation')
    y_valid_dataloader = os.path.join(x_valid_path, 'Validation_DataLoader.csv')
    valid_dataset = CardDataset(y_valid_dataloader,x_valid_path,transform=validation_transform, augmentation=None)
    valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SiZE, shuffle=False,num_workers=n_cpu)
    #print('Number of validation samples:', len(valid_dataset))


    ### Define Model
    card_model = CardModel("Unet", config.encoder ,"imagenet",config.learning_rate,OPTIMISER,frozen_encoder=Frozen_Encoder_Flag)

    wandb_logger = WandbLogger(project="Card_detection")
    trainer = pl.Trainer(max_epochs=N_EPOCH,
                     logger=wandb_logger,
                     log_every_n_steps=1,
                     default_root_dir="/content/drive/MyDrive/Dataset/")
    trainer.fit(
        model=card_model,
        train_dataloaders=train_loader,
        val_dataloaders=valid_loader)

    name="CardModel_"+run.id
    path = "/content/drive/MyDrive/Dataset/"
    model_name=path+name+'.onnx'
    torch.onnx.export(card_model,
                  torch.randn(1,3,480,320).to('cpu'),
                  model_name,
                  input_names = ['input'],              # the model's input names
                  output_names = ['output'])
    wandb.save(model_name)

In [29]:
sweep_config = {
    "method": "random",
    "metric":{"name":"validation_per_image_iou","goal":"maximize"},
    "parameters":{
        "encoder":{{'values':
                        'resnet34',
                        'timm-resnest26d',
                        'timm-resnest50d',
                        'timm-res2net50_26w_4s',
                        'timm-regnetx_032',
                        'timm-gernet_s',
                        'se_resnet50',
                        'timm-skresnet18',
                        'densenet169',
                        'xception',
                        'efficientnet-b3',
                        'timm-mobilenetv3_large_100',
                        'dpn68',
                        'mit_b1',
                        'mobileone_s0'
                    }
        "learning_rate": {"distribution": "uniform", "min": 1e-5, "max": 0.1}
        }
    }

sweep_id = wandb.sweep(sweep_config, project="Card_detection")
wandb.agent(sweep_id, train_run, count=30)
wandb.finish()

Error in callback <function _WandbInit._resume_backend at 0x7c91d3fcdd80> (for pre_run_cell):


BrokenPipeError: ignored



Create sweep with ID: xmeyeqxe
Sweep URL: https://wandb.ai/team-invonto/Card_detection/sweeps/xmeyeqxe


[34m[1mwandb[0m: Agent Starting Run: 40v9ix71 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.07690522745751438
[34m[1mwandb[0m: 	optimizer: Sgd
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/wandb/sdk/wandb_init.py", line 1143, in init
    wi.setup(kwargs)
  File "/usr/local/lib/python3.10/dist-packages/wandb/sdk/wandb_init.py", line 208, in setup
    with telemetry.context(obj=self._init_telemetry_obj) as tel:
  File "/usr/local/lib/python3.10/dist-packages/wandb/sdk/lib/telemetry.py", line 42, in __exit__
    self._run._telemetry_callback(self._obj)
  File "/usr/local/lib/python3.10/dist-packages/wandb/sdk/wandb_run.py", line 713, in _telemetry_callback
    self._telemetry_flush()
  File "/usr/local/lib/python3.10/dist-packages/wandb/sdk/wandb_run.py", line 724, in _telemetry_flush
    self._backend.interface._publish_telemetry(self._telemetry_obj)
  File "/usr/local/lib/python3.10/dist-packages/wandb/s

Error in callback <function _WandbInit._pause_backend at 0x7c91d42c0e50> (for post_run_cell):


BrokenPipeError: ignored

In [30]:
wandb.finish()

Error in callback <function _WandbInit._resume_backend at 0x7c91d3fcdd80> (for pre_run_cell):


BrokenPipeError: ignored

BrokenPipeError: ignored

Error in callback <function _WandbInit._pause_backend at 0x7c91d42c0e50> (for post_run_cell):


BrokenPipeError: ignored