In [3]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
import matplotlib.pyplot as plt
import time
import yaml

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.utils.visualizer import ColorMode

In [25]:
# if your dataset is in COCO format, this cell can be replaced by the following three lines:
# from detectron2.data.datasets import register_coco_instances
# register_coco_instances("my_dataset_train", {}, "json_annotation_train.json", "path/to/image/dir")
# register_coco_instances("my_dataset_val", {}, "json_annotation_val.json", "path/to/image/dir")

from detectron2.structures import BoxMode

def get_shoe_dicts(img_dir):
    json_file = os.path.join(img_dir, "via_region_data.json")
    print(json_file)
    with open(json_file) as f:
        imgs_anns = json.load(f)

    dataset_dicts = []
    for idx, v in enumerate(imgs_anns["_via_img_metadata"].values()):
        record = {}
        
        filename = os.path.join(img_dir, v["filename"])
        height, width = cv2.imread(filename).shape[:2]
        
        record["file_name"] = filename
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width
      
        annos = v["regions"]
        objs = []
        for anno in annos:
#             assert not anno["region_attributes"]
            anno = anno["shape_attributes"]
            px = anno["all_points_x"]
            py = anno["all_points_y"]
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]

            obj = {
                "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                "bbox_mode": BoxMode.XYXY_ABS,
                "segmentation": [poly],
                "category_id": 0,
            }
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

for d in ["train", "val"]:
    DatasetCatalog.register("shoe_" + d, lambda d=d: get_shoe_dicts("/home/max/Desktop/dissertation/Mask_RCNN/barry_data/" + d))
    MetadataCatalog.get("shoe_" + d).set(thing_classes=["shoe"])
    
shoe_metadata = MetadataCatalog.get("shoe_train")

def create_output_dir(lr, batch_n):
    return f"lr={float(lr)}--batch_n={batch_n}_{int(time.time())}"


def export_cfg(cfg,name,path):
    if "yaml" in name:
        with open(os.path.join(path,name),"w") as file:
            yaml.dump(cfg,file)
    else:
        with open(os.path.join(path,name+".yaml"),"w") as file:
            yaml.dump(cfg,file)
            
            
def get_internet_imgs():
    internet_names = os.listdir("/home/max/Desktop/dissertation/Mask_RCNN/barry_data/internet/")
    return ["/home/max/Desktop/dissertation/Mask_RCNN/barry_data/internet/"+file for file in internet_names]
            

# Extracting the LR and batch_size from the trained models filename
def get_batch_num(path):
    return float(path.split("=")[-1].split("_")[0])
def get_lr(path):
    return np.format_float_positional(float(path.split("=")[1].split("--")[0]))

In [2]:
"""

Training Data Visualisation

""";


# dataset_dicts = get_shoe_dicts("/home/max/Desktop/dissertation/Mask_RCNN/barry_data/train")
# for d in random.sample(dataset_dicts, 3):
#     img = cv2.imread(d["file_name"])
#     visualizer = Visualizer(img[:, :, ::-1], metadata=shoe_metadata)
#     out = visualizer.draw_dataset_dict(d)
#     plt.imshow(cv2.cvtColor(out.get_image()[:, :, ::-1],cv2.COLOR_BGR2RGB))
#     plt.show()

In [10]:
"""

Parameter Setting

""";
import numpy as np


batch_sizes_combs = np.array([2, 4, 8, 16, 32, 64, 128, 256, 512, 1024])
learning_rates = np.logspace(start=-7, stop=-2, num=8)

In [12]:
batch_sizes_combs

array([  2,   4,   8,  16,  32,  64, 128, 256, 512])

In [4]:
"""

Create all possible combinations of the above defined parameters

"""

learning_rates = np.logspace(start=-7, stop=-1, num=8)

configs = np.stack(np.meshgrid(batch_sizes_combs, learning_rates), -1).reshape(-1, 2)

In [19]:
"""

Instantiate a model from a config file.

Can be passed a custom config in yaml format

"""


cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("shoe_train",)
cfg.DATASETS.TEST = ("shoe_val",)
cfg.TEST.EVAL_PERIOD = 1
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
# cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.BASE_LR  = 1
cfg.SOLVER.MAX_ITER = 1000    # 300 iterations seems good enough for this toy dataset; you may need to train longer for a practical dataset
# cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (shoe)

# """
# change the path if neccessary

# """
# cfg.OUTPUT_DIR = os.path.join("./run_logs_test/", create_output_dir(lr=temp_learning_rate, batch_n=temp_batchn))
# os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
# trainer = DefaultTrainer(cfg).test()
# trainer.resume_or_load(resume=False)
# trainer.train()

In [6]:
"""

Training of all the possible parameter combinations

"""



for pair in configs:
    temp_learning_rate = float(pair[1])
    temp_batchn = int(pair[0])
    from detectron2.engine import DefaultTrainer
    
    
    """
    Model Set-Up
    """
    
    
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = ("shoe_train",)
    cfg.DATASETS.TEST = ("shoe_val",)
    cfg.TEST.EVAL_PERIOD = 1
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")  # Let training initialize from model zoo
    cfg.SOLVER.IMS_PER_BATCH = 2
    # cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
    cfg.SOLVER.BASE_LR  = temp_learning_rate
    cfg.SOLVER.MAX_ITER = 1000    # 300 iterations seems good enough for this toy dataset; you may need to train longer for a practical dataset
    # cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset (default: 512)
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = temp_batchn
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (shoe)

    """
    change the path if neccessary
    
    """
    cfg.OUTPUT_DIR = os.path.join("./run_logs_test/", create_output_dir(lr=temp_learning_rate, batch_n=temp_batchn))
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()
    

[32m[07/11 19:19:04 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

/home/max/Desktop/dissertation/Mask_RCNN/barry_data/train/via_region_data.json
[32m[07/11 19:19:07 d2.data.build]: [0mRemoved 0 images with no usable annotations. 277 images left.
[32m[07/11 19:19:07 d2.data.common]: [0mSerializing 277 elements to byte tensors and concatenating them all ...
[32m[07/11 19:19:07 d2.data.common]: [0mSerialized dataset takes 0.44 MiB
[32m[07/11 19:19:07 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[32m[07/11 19:19:07 d2.data.build]: [0mUsing training sampler TrainingSampler


Unable to load 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model!
Unable to load 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model!
Unable to load 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model!
Unable to load 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model!
Unable to load 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (1, 256, 1, 1) in the model!
Unable to load 'roi_heads.mask_head.predictor.bias' to the model due to incompatible shapes: (80,) in the checkpoint but (1,) in the model!


[32m[07/11 19:19:07 d2.engine.train_loop]: [0mStarting training from iteration 0
/home/max/Desktop/dissertation/Mask_RCNN/barry_data/val/via_region_data.json
[32m[07/11 19:19:08 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|    shoe    | 167          |
|            |              |[0m
[32m[07/11 19:19:08 d2.data.common]: [0mSerializing 70 elements to byte tensors and concatenating them all ...
[32m[07/11 19:19:08 d2.data.common]: [0mSerialized dataset takes 0.11 MiB
[32m[07/11 19:19:08 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
/home/max/Desktop/dissertation/Mask_RCNN/barry_data/val/via_region_data.json
[32m[07/11 19:19:09 d2.data.common]: [0mSerializing 70 elements to byte tensors and concatenating them all ...
[32m[07/11 19:19:09 d2.data.common]: [0mSerialized dataset takes 0.1

/home/max/Desktop/dissertation/Mask_RCNN/barry_data/val/via_region_data.json
[32m[07/11 19:19:18 d2.data.common]: [0mSerializing 70 elements to byte tensors and concatenating them all ...
[32m[07/11 19:19:18 d2.data.common]: [0mSerialized dataset takes 0.11 MiB
[32m[07/11 19:19:18 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
/home/max/Desktop/dissertation/Mask_RCNN/barry_data/val/via_region_data.json
[32m[07/11 19:19:18 d2.data.common]: [0mSerializing 70 elements to byte tensors and concatenating them all ...
[32m[07/11 19:19:18 d2.data.common]: [0mSerialized dataset takes 0.11 MiB
[32m[07/11 19:19:18 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
/home/max/Desktop/dissertation/Mask_RCNN/barry_data/val/via_region_data.json
[32m[07/11 19:19:19 d2.data.common]: [0mSeriali

[32m[07/11 19:19:27 d2.data.common]: [0mSerializing 70 elements to byte tensors and concatenating them all ...
[32m[07/11 19:19:27 d2.data.common]: [0mSerialized dataset takes 0.11 MiB
[32m[07/11 19:19:27 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
/home/max/Desktop/dissertation/Mask_RCNN/barry_data/val/via_region_data.json
[32m[07/11 19:19:28 d2.data.common]: [0mSerializing 70 elements to byte tensors and concatenating them all ...
[32m[07/11 19:19:28 d2.data.common]: [0mSerialized dataset takes 0.11 MiB
[32m[07/11 19:19:28 d2.data.dataset_mapper]: [0mAugmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
/home/max/Desktop/dissertation/Mask_RCNN/barry_data/val/via_region_data.json
[32m[07/11 19:19:29 d2.data.common]: [0mSerializing 70 elements to byte tensors and concatenating them all ...
[32m[07/11 1

KeyboardInterrupt: 

In [None]:
"""

TensorBoard Visualisation


DONT RUN - VERY LAGGY
RUN FROM COMMAND LINE:

   $tensorboard --logdir LOG_DIR_PATH

"""


# # Look at training curves in tensorboard:
# %load_ext tensorboard
# %tensorboard --logdir output

In [12]:
model_name_list = os.listdir("./run_logs/")

In [90]:
model_dict = {}

i = 0
for path in os.listdir("./run_logs/"):
        adict = {}

        adict["lr"] = get_lr(path)
        adict["batch_size"] = get_batch_num(path)
        model_dict[f"model_{i}"] = adict
        i+=1

In [46]:
import pandas as pd

In [91]:
df = pd.DataFrame(model_dict).T

In [92]:
df_by_lr = df.sort_values(["lr"],)
df_by_batch = df.sort_values(["batch_size"],ascending=True)

In [93]:
df_by_lr

Unnamed: 0,lr,batch_size
model_0,1e-07,256
model_20,1e-07,8
model_31,1e-07,512
model_40,1e-07,2
model_12,1e-07,128
model_6,1e-07,4
model_41,1e-07,64
model_4,1e-07,16
model_52,1e-07,32
model_47,7.19685673e-07,16


In [94]:
df_by_batch

Unnamed: 0,lr,batch_size
model_55,3.72759372031e-05,2
model_46,7.19685673e-07,2
model_11,0.0002682695795279,2
model_9,0.0019306977288832,2
model_33,5.1794746792e-06,2
model_40,1e-07,2
model_24,0.0138949549437313,2
model_5,0.0019306977288832,4
model_6,1e-07,4
model_34,3.72759372031e-05,4


In [6]:
"""

prediction making

""";

In [6]:
all_logs = os.listdir("./run_logs/")

In [7]:
all_logs

['lr=1e-07--batch_n=256_1594432079',
 'lr=3.727593720314938e-05--batch_n=512_1594440462',
 'lr=3.727593720314938e-05--batch_n=128_1594439839',
 'lr=7.196856730011514e-07--batch_n=512_1594435088',
 'lr=1e-07--batch_n=2_1594491500',
 'lr=1e-07--batch_n=16_1594430885',
 'lr=0.0019306977288832496--batch_n=4_1594443774',
 'lr=1e-07--batch_n=4_1594430306',
 'lr=5.179474679231212e-06--batch_n=128_1594437153',
 'lr=3.727593720314938e-05--batch_n=256_1594440147',
 'lr=0.0019306977288832496--batch_n=2_1594443488',
 'lr=7.196856730011514e-07--batch_n=4_1594433008',
 'lr=0.00026826957952797245--batch_n=2_1594440789',
 'lr=1e-07--batch_n=128_1594431771',
 'lr=0.00026826957952797245--batch_n=8_1594441363',
 'lr=0.0019306977288832496--batch_n=16_1594444353',
 'lr=5.179474679231212e-06--batch_n=32_1594436565',
 'lr=0.00026826957952797245--batch_n=64_1594442237',
 'lr=0.00026826957952797245--batch_n=128_1594442535',
 'lr=7.196856730011514e-07--batch_n=256_1594434774',
 'lr=7.196856730011514e-07--batch_

In [None]:
internet_names = get_internet_imgs()

In [12]:
for model in all_logs:
    print(f"*** Weights from {model} ***")
    cfg.MODEL.WEIGHTS = os.path.join("./run_logs/", f"{model}/model_final.pth")
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set a custom testing threshold for this model
    cfg.DATASETS.TEST = ("shoe_val", )
    predictor = DefaultPredictor(cfg)
    
    """
    If inference on validation, change the for loop below
    
    1) import shoe dataset with ds_dicts = get_shoe_dataset(*PATH to VAL*)
    2) Iterate overlist of dicts:
           im = load(dict[file_name])
    """
    for d in internet_names:
        im = cv2.imread(d)
        try:
            outputs = predictor(im)
        except Exception as e:
            print(e)
            print(d)
            print(type(im))
            print(im)

        try:
            v = Visualizer(im[:, :, ::-1],
                           metadata=shoe_metadata, 
                           scale=0.5, 
                           instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
            )
        except Exception as e:
            print(e)
            print(d)
            print(type(im))
            print(im)
        out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
        plt.figure(figsize=(10,10))
        plt.imshow(cv2.cvtColor(out.get_image()[:,:,::-1], cv2.COLOR_BGR2RGB))
    #     cv2_imshow(out.get_image()[:, :, ::-1])
        plt.show()
    time.sleep(5)
    plt.cla()
    plt.close()

*** Weights from lr=1e-07--batch_n=256_1594432079 ***


Unable to load 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (2, 1024) in the checkpoint but (81, 1024) in the model!
Unable to load 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (2,) in the checkpoint but (81,) in the model!
Unable to load 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (4, 1024) in the checkpoint but (320, 1024) in the model!
Unable to load 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (4,) in the checkpoint but (320,) in the model!
Unable to load 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (1, 256, 1, 1) in the checkpoint but (80, 256, 1, 1) in the model!
Unable to load 'roi_heads.mask_head.predictor.bias' to the model due to incompatible shapes: (1,) in the checkpoint but (80,) in the model!


name 'shoe_metadata' is not defined
/home/max/Desktop/dissertation/Mask_RCNN/barry_data/internet/download.jpeg
<class 'numpy.ndarray'>
[[[ 95  85  78]
  [ 95  85  78]
  [ 95  85  78]
  ...
  [ 37  35  34]
  [ 37  35  34]
  [ 37  35  34]]

 [[ 94  84  77]
  [ 95  85  78]
  [ 95  85  78]
  ...
  [ 37  35  34]
  [ 37  35  34]
  [ 38  36  35]]

 [[ 94  85  76]
  [ 94  85  76]
  [ 95  86  77]
  ...
  [ 37  35  34]
  [ 38  36  35]
  [ 38  36  35]]

 ...

 [[155 132 117]
  [155 132 117]
  [155 132 117]
  ...
  [242 235 232]
  [243 236 233]
  [244 237 234]]

 [[157 134 119]
  [156 133 118]
  [156 133 118]
  ...
  [241 234 231]
  [242 235 232]
  [243 236 233]]

 [[159 136 120]
  [158 135 119]
  [157 134 118]
  ...
  [240 233 230]
  [241 234 231]
  [242 235 232]]]


IndexError: boolean index did not match indexed array along dimension 0; dimension is 1024 but corresponding boolean dimension is 615

In [22]:
from detectron2.utils.visualizer import ColorMode
dataset_dicts = get_shoe_dicts("/home/max/Desktop/dissertation/Mask_RCNN/barry_data/val")
for d in random.sample(dataset_dicts, 50):
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1],
                   metadata=shoe_metadata, 
                   scale=0.5, 
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.figure(figsize=(10,10))
    plt.imshow(cv2.cvtColor(out.get_image()[:,:,::-1], cv2.COLOR_BGR2RGB))
#     cv2_imshow(out.get_image()[:, :, ::-1])
    plt.show()

/home/max/Desktop/dissertation/Mask_RCNN/barry_data/internet/via_region_data.json


FileNotFoundError: [Errno 2] No such file or directory: '/home/max/Desktop/dissertation/Mask_RCNN/barry_data/internet/via_region_data.json'