# R101-FPN PointRend

Code where we prepare and run the R101-FPN PointRend model.

Opening the files stored at drive and check if they are being loaded correctly.



In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')

path = r'/content/drive/My Drive/MAI/OR/Fashionpedia/'

import zipfile

zip_ref = zipfile.ZipFile(path + "test.zip", 'r')
zip_ref.extractall("")
zip_ref.close()

zip_ref = zipfile.ZipFile(path + "val.zip", 'r')
zip_ref.extractall("")
zip_ref.close()

zip_ref = zipfile.ZipFile(path + "train.zip", 'r')
zip_ref.extractall("")
zip_ref.close()

zip_ref = zipfile.ZipFile(path + "seg_val.zip", 'r')
zip_ref.extractall("")
zip_ref.close()

zip_ref = zipfile.ZipFile(path + "seg_train.zip", 'r')
zip_ref.extractall("")
zip_ref.close()

print('Train contains:', len(os.listdir('/content/train/')), 'images')
print('Seg_train contains:',len(os.listdir('/content/seg_train/')), 'images')
print('Validation contains:',len(os.listdir('/content/val/')), 'images')
print('Seg_val contains:',len(os.listdir('/content/seg_val/')), 'images')
print('Test contains:',len(os.listdir('/content/test/')), 'images')

Mounted at /content/drive
Train contains: 45623 images
Seg_train contains: 45623 images
Validation contains: 1158 images
Seg_val contains: 1158 images
Test contains: 2042 images


## Importing and installing all the necessary libraries

In [None]:
# Install PyTorch
!conda install pytorch==1.12.0 torchvision==0.13.0 torchaudio==0.12.0 cudatoolkit=11.3 -c pytorch
# Install mim
!pip install -U openmim
# Install mmengine
!mim install mmengine
# Install MMCV
!mim install 'mmcv >= 2.0.0rc1'
!rm -rf mmsegmentation
!git clone -b main https://github.com/open-mmlab/mmsegmentation.git 
%cd mmsegmentation
!pip install -e .


import torch, torchvision
import mmseg
import mmcv
import mmengine

/bin/bash: conda: command not found
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting openmim
  Downloading openmim-0.3.7-py2.py3-none-any.whl (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.3/51.3 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Collecting model-index
  Downloading model_index-0.1.11-py3-none-any.whl (34 kB)
Collecting colorama
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Collecting ordered-set
  Downloading ordered_set-4.1.0-py3-none-any.whl (7.6 kB)
Installing collected packages: ordered-set, colorama, model-index, openmim
Successfully installed colorama-0.4.6 model-index-0.1.11 openmim-0.3.7 ordered-set-4.1.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/index.html
Collecting mmengine
  Downloading mmengine-0.7.2-py3-none-any.whl (366 kB)
[2K

In [None]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-9e2b7377-7d2d-cf05-9929-64eff91eaac7)


### Prepare the model
Download the R101-FPN model and prepare it to receive the Fashionpedia dataset.

We need to convert the annotation into semantic map format as an image.

In [None]:
# define class and palette for better visualization
classes = ('Background', 'shirt, blouse','top, t-shirt, sweatshirt','sweater','cardigan','jacket', 'vest','pants','shorts','skirt', 'coat', 'dress', 'jumpsuit',
           'cape', 'glasses','hat','headband, hair accessory','tie', 'glove','watch','belt','leg warmer','tights, stockings','sock','shoe','bag, wallet','scarf',
           'umbrella','hood','collar','lapel', 'epaulette','sleeve', 'pocket', 'neckline','buckle','zipper','applique','bead','bow','flower','fringe','ribbon',
           'rivet', 'ruffle', 'sequin','tassel')
palette = [ [5, 5, 5],  [186, 20, 20],  [89, 227, 70],  [48, 150, 194],  [190, 41, 204],  [237, 155, 78],  [250, 237, 97],  [24, 76, 199],  [28, 91, 117],  
           [40, 158, 77],  [22, 77, 65],  [240, 73, 67],  [194, 70, 64],  [108, 181, 212],  [233, 238, 240],  [1, 6, 61],  [191, 2, 166],  [2, 191, 59], 
           [255, 185, 56],  [181, 178, 172],  [173, 113, 2],  [255, 255, 255],  [224, 224, 224],  [233, 247, 244],  [71, 66, 65],  [219, 153, 138],  
           [141, 138, 219],  [206, 138, 219],  [48, 150, 194],  [233, 250, 45],  [242, 224, 172],  [255, 192, 20],  [247, 242, 228],  [78, 186, 100],  
           [177, 250, 192],  [251, 255, 31],  [207, 207, 207],  [227, 52, 183],  [255, 139, 71],  [252, 49, 30],  [247, 104, 235],  [124, 252, 244],  
           [235, 69, 66],  [249, 255, 66],  [252, 255, 163],  [217, 217, 212],  [210, 212, 152]]

from mmseg.registry import DATASETS
from mmseg.datasets import BaseSegDataset

@DATASETS.register_module()
class FashionPediaDataset(BaseSegDataset):
  METAINFO = dict(classes = classes, palette = palette)
  def __init__(self, **kwargs):
    super().__init__(img_suffix='.jpg', seg_map_suffix='_seg.png', **kwargs)

In [None]:
# Download config and checkpoint files
!mim download mmsegmentation --config pointrend_r101_4xb4-160k_ade20k-512x512 --dest .

processing pointrend_r101_4xb4-160k_ade20k-512x512...
[2Kdownloading [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.2/183.2 MiB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[?25h[32mSuccessfully downloaded pointrend_r101_512x512_160k_ade20k_20200808_030852-8834902a.pth to /content/mmsegmentation[0m
[32mSuccessfully dumped pointrend_r101_4xb4-160k_ade20k-512x512.py to /content/mmsegmentation[0m


In [None]:
from mmengine import Config
cfg = Config.fromfile('configs/point_rend/pointrend_r101_4xb4-160k_ade20k-512x512.py')
print(f'Config:\n{cfg.pretty_text}')

Config:
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
    type='SegDataPreProcessor',
    mean=[123.675, 116.28, 103.53],
    std=[58.395, 57.12, 57.375],
    bgr_to_rgb=True,
    pad_val=0,
    seg_pad_val=255,
    size=(512, 512))
model = dict(
    type='CascadeEncoderDecoder',
    data_preprocessor=dict(
        type='SegDataPreProcessor',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        bgr_to_rgb=True,
        pad_val=0,
        seg_pad_val=255,
        size=(512, 512)),
    num_stages=2,
    pretrained='open-mmlab://resnet101_v1c',
    backbone=dict(
        type='ResNetV1c',
        depth=101,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        dilations=(1, 1, 1, 1),
        strides=(1, 2, 2, 2),
        norm_cfg=dict(type='SyncBN', requires_grad=True),
        norm_eval=False,
        style='pytorch',
        contract_dilation=True),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 

In [None]:
data_root = '/content/'
data_prefix_train = dict(img_path='train', seg_map_path='seg_train')
data_prefix_val = dict(img_path='val', seg_map_path='seg_val')
data_prefix_test = dict(img_path='test')

resume_run = True

cfg.norm_cfg = dict(type='BN', requires_grad=True)
cfg.crop_size = (256, 256)
cfg.model.data_preprocessor.size = cfg.crop_size
cfg.model.backbone.norm_cfg = cfg.norm_cfg
cfg.model.decode_head[0].num_classes = 47
cfg.model.decode_head[1].num_classes = 47

# Modify dataset type and path
cfg.dataset_type = 'FashionPediaDataset'
cfg.data_root = data_root

cfg.train_dataloader.batch_size = 32


cfg.train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations'),
    dict(type='Resize', scale=(256, 256), keep_ratio=True),
    dict(type='PhotoMetricDistortion'),
    dict(type='RandomCrop', crop_size=cfg.crop_size, cat_max_ratio=0.75),
    dict(type='RandomFlip', prob=0.5),
    dict(type='PackSegInputs')
]

cfg.test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='Resize', scale=(256, 256), keep_ratio=True),
    dict(type='LoadAnnotations'),
    dict(type='PackSegInputs')
]


cfg.train_dataloader.dataset.type = cfg.dataset_type
cfg.train_dataloader.dataset.data_root = cfg.data_root
cfg.train_dataloader.dataset.data_prefix = data_prefix_train 
cfg.train_dataloader.dataset.pipeline = cfg.train_pipeline

cfg.val_dataloader.dataset.type = cfg.dataset_type
cfg.val_dataloader.dataset.data_root = cfg.data_root
cfg.val_dataloader.dataset.data_prefix = data_prefix_val
cfg.val_dataloader.dataset.pipeline = cfg.test_pipeline

cfg.test_dataloader = cfg.val_dataloader


# Set up working dir to save files and logs.
cfg.work_dir = './work_dirs/checkpoints'

if resume_run:
  cfg.resume = True
  cfg.load_from = '/content/drive/My Drive/MAI/OR/Fashionpedia/iter_19000.pth' # Resume from the last checkpoint

cfg.train_cfg.max_iters = 30000
cfg.train_cfg.val_interval = 500
cfg.default_hooks.logger.interval = 10
cfg.default_hooks.checkpoint.interval = 500

# Set seed to facilitate reproducing the result
cfg['randomness'] = dict(seed=0)

# Let's have a look at the final config used for training
print(f'Config:\n{cfg.pretty_text}')

Config:
norm_cfg = dict(type='BN', requires_grad=True)
data_preprocessor = dict(
    type='SegDataPreProcessor',
    mean=[123.675, 116.28, 103.53],
    std=[58.395, 57.12, 57.375],
    bgr_to_rgb=True,
    pad_val=0,
    seg_pad_val=255,
    size=(512, 512))
model = dict(
    type='CascadeEncoderDecoder',
    data_preprocessor=dict(
        type='SegDataPreProcessor',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        bgr_to_rgb=True,
        pad_val=0,
        seg_pad_val=255,
        size=(256, 256)),
    num_stages=2,
    pretrained='open-mmlab://resnet101_v1c',
    backbone=dict(
        type='ResNetV1c',
        depth=101,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        dilations=(1, 1, 1, 1),
        strides=(1, 2, 2, 2),
        norm_cfg=dict(type='BN', requires_grad=True),
        norm_eval=False,
        style='pytorch',
        contract_dilation=True),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 20

### Train the model

In [None]:
from mmengine.runner import Runner

runner = Runner.from_cfg(cfg)

04/21 12:39:55 - mmengine - INFO - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.9.16 (main, Dec  7 2022, 01:11:51) [GCC 9.4.0]
    CUDA available: True
    numpy_random_seed: 0
    GPU 0: Tesla T4
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 11.8, V11.8.89
    GCC: x86_64-linux-gnu-gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0
    PyTorch: 2.0.0+cu118
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.8
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_5



04/21 12:40:03 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.
04/21 12:40:03 - mmengine - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH   ) RuntimeInfoHook                    
(BELOW_NORMAL) LoggerHook                         
 -------------------- 
before_train:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
(VERY_LOW    ) CheckpointHook                     
 -------------------- 
before_train_epoch:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
(NORMAL      ) DistSamplerSeedHook                
 -------------------- 
before_train_iter:
(VERY_HIGH   ) RuntimeInfoHook                    
(NORMAL      ) IterTimerHook                      
 -------------------- 
after_train_iter:
(VERY_HIGH   ) RuntimeInfoHook                



In [None]:
# start training
runner.train()



04/21 12:40:11 - mmengine - INFO - load model from: open-mmlab://resnet101_v1c
04/21 12:40:11 - mmengine - INFO - Loads checkpoint by openmmlab backend from path: open-mmlab://resnet101_v1c


Downloading: "https://download.openmmlab.com/pretrain/third_party/resnet101_v1c-e67eebb6.pth" to /root/.cache/torch/hub/checkpoints/resnet101_v1c-e67eebb6.pth



unexpected key in source state_dict: fc.weight, fc.bias

Loads checkpoint by local backend from path: /content/drive/My Drive/MAI/OR/Fashionpedia/iter_19000.pth
04/21 12:40:26 - mmengine - INFO - Load checkpoint from /content/drive/My Drive/MAI/OR/Fashionpedia/iter_19000.pth
04/21 12:40:26 - mmengine - INFO - resumed epoch: 0, iter: 19000
04/21 12:40:26 - mmengine - INFO - Checkpoints will be saved to /content/mmsegmentation/work_dirs/checkpoints.
04/21 12:41:00 - mmengine - INFO - Iter(train) [19010/30000]  lr: 8.9448e-03  eta: 10:19:25  time: 3.3818  data_time: 0.0914  memory: 11832  loss: 0.3486  decode_0.loss_ce: 0.1289  decode_0.acc_seg: 95.4791  decode_1.pointloss_ce: 0.2196  decode_1.acc_point: 82.8330
04/21 12:41:11 - mmengine - INFO - Iter(train) [19020/30000]  lr: 8.9442e-03  eta: 6:44:31  time: 1.0393  data_time: 0.0542  memory: 7844  loss: 0.3399  decode_0.loss_ce: 0.1278  decode_0.acc_seg: 94.2614  decode_1.pointloss_ce: 0.2122  decode_1.acc_point: 76.2864
04/21 12:41:22 



04/21 12:50:17 - mmengine - INFO - Iter(val) [  10/1158]    eta: 0:04:33  time: 0.2384  data_time: 0.0609  memory: 8740  
04/21 12:50:20 - mmengine - INFO - Iter(val) [  20/1158]    eta: 0:05:22  time: 0.3288  data_time: 0.0092  memory: 8745  
04/21 12:50:22 - mmengine - INFO - Iter(val) [  30/1158]    eta: 0:04:40  time: 0.1790  data_time: 0.0113  memory: 8743  
04/21 12:50:24 - mmengine - INFO - Iter(val) [  40/1158]    eta: 0:04:29  time: 0.2196  data_time: 0.0042  memory: 8741  
04/21 12:50:24 - mmengine - INFO - Iter(val) [  50/1158]    eta: 0:03:43  time: 0.0432  data_time: 0.0041  memory: 1426  
04/21 12:50:26 - mmengine - INFO - Iter(val) [  60/1158]    eta: 0:03:38  time: 0.1875  data_time: 0.0051  memory: 8745  
04/21 12:50:28 - mmengine - INFO - Iter(val) [  70/1158]    eta: 0:03:36  time: 0.1996  data_time: 0.0049  memory: 8741  
04/21 12:50:29 - mmengine - INFO - Iter(val) [  80/1158]    eta: 0:03:20  time: 0.0939  data_time: 0.0044  memory: 8741  
04/21 12:50:30 - mmengin

CascadeEncoderDecoder(
  (data_preprocessor): SegDataPreProcessor()
  (backbone): ResNetV1c(
    (stem): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
      (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (7): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (8): ReLU(inplace=True)
    )
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): ResLayer(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, e