In [None]:
"""
Note: 
This is the notebook where we performed experiments on the object detection model.
All of the outputs were cleared in order to cut down on the length of the notebook
The model loading and data preparation is credits to Daniel Bourke's amenity detection project: 
https://towardsdatascience.com/replicating-airbnbs-amenity-detection-with-detectron2-28f33704d6ff
"""

In [None]:
import torch, torchvision
import detectron2
from detectron2.utils.logger import setup_logger
import os
setup_logger() 
import numpy as np
import pandas as pd
from tqdm import tqdm
import cv2
import random
import matplotlib.pyplot as plt
from detectron2 import model_zoo 
from detectron2.engine import DefaultPredictor 
from detectron2.config import get_cfg 
from detectron2.utils.visualizer import Visualizer 
from detectron2.data import MetadataCatalog 

In [None]:
target_classes = ['Bathtub',
 'Bed',
 'Billiard table',
 'Ceiling fan',
 'Coffeemaker',
 'Couch',
 'Countertop',
 'Dishwasher',
 'Fireplace',
 'Fountain',
 'Gas stove',
 'Jacuzzi',
 'Kitchen & dining room table',
 'Microwave oven',
 'Mirror',
 'Oven',
 'Pillow',
 'Porch',
 'Refrigerator',
 'Shower',
 'Sink',
 'Sofa bed',
 'Stairs',
 'Swimming pool',
 'Television',
 'Toilet',
 'Towel',
 'Tree house',
 'Washing machine',
 'Wine rack']

In [None]:
pt_cfg = get_cfg() 
pt_cfg.merge_from_file("./retinanet_model_final_config.yaml") 
pt_cfg.MODEL.WEIGHTS = "./retinanet_model_final.pth" 
pt_predictor = DefaultPredictor(pt_cfg)

In [None]:
train_path = "./train/"
valid_path = "./validation/"

In [None]:
def get_image_ids(image_folder=None):
    return [os.path.splitext(img_name)[0] for img_name in os.listdir(image_folder) if img_name.endswith(".jpg")]
val_img_ids = get_image_ids(valid_path)
val_annots = pd.read_csv("validation-annotations-bbox.csv")

In [None]:
def format_annotations(image_folder, annotation_file, target_classes=None):
    Formats annotation_file based on images contained in image_folder.
    Will get all unique image IDs and 
    image_ids = get_image_ids(image_folder)
    annot_file = pd.read_csv(annotation_file)
    classes = pd.read_csv("class-descriptions-boxable.csv",
                          names=["LabelName", "ClassName"])
    annot_file["ClassName"] = annot_file["LabelName"].map(classes.set_index("LabelName")["ClassName"])
    annot_file.sort_values(by=["ClassName"], inplace=True)
    if target_classes:
        annot_file = annot_file[annot_file["ImageID"].isin(image_ids) & annot_file["ClassName"].isin(target_classes)]
    else:
        annot_file = annot_file[annot_file["ImageID"].isin(image_ids)]
    assert len(annot_file.ImageID.unique()) == len(image_ids), "Label unique ImageIDs doesn't match target folder."
    annot_file["ClassName"] = pd.Categorical(annot_file["ClassName"])
    annot_file["ClassID"] = annot_file["ClassName"].cat.codes
    
    return annot_file

In [None]:
val_annots_formatted = format_annotations(image_folder=valid_path,
                                          annotation_file="validation-annotations-bbox.csv",
                                          target_classes=target_classes) # (fireplace & coffeemaker)

In [None]:
def rel_to_absolute(bbox, height, width):
    bbox[0] = np.round(np.multiply(bbox[0], width)) # x0
    bbox[1] = np.round(np.multiply(bbox[1], height)) # y0
    bbox[2] = np.round(np.multiply(bbox[2], width)) # x1
    bbox[3] = np.round(np.multiply(bbox[3], height)) # y1
    return [i.astype("object") for i in bbox]

In [None]:
from detectron2.structures import BoxMode
import json
import pprint

def get_image_dicts(image_folder, annotation_file, target_classes=None):
    dataset_name = str(image_folder)
    annotations = format_annotations(image_folder=image_folder, 
                                     annotation_file=annotation_file,
                                     target_classes=target_classes)
    img_ids = get_image_ids(image_folder)
    print(f"\nUsing {annotation_file} for annotations...")
    print(f"On dataset: {dataset_name}")
    print("Classes we're using:\n{}".format(annotations["ClassName"].value_counts()))
    print(f"Total number of images: {len(img_ids)}")
    img_dicts = []
    for idx, img in tqdm(enumerate(img_ids)):
        record = {}
        file_name = image_folder + img + ".jpg"
        height, width = cv2.imread(file_name).shape[:2]
        img_data = annotations[annotations["ImageID"] == img].reset_index()
        record["file_name"] = file_name
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width
        img_annotations = []
        for i in range(len(img_data)): 
            category_id = img_data.loc[i]["ClassID"].astype("object") 
            bbox = np.float32(img_data.loc[i][["XMin", "YMin", "XMax", "YMax"]].values)
            bbox = rel_to_absolute(bbox=bbox, height=height, width=width)
            annot = {
                "bbox": bbox, 
                "bbox_mode": BoxMode.XYXY_ABS, 
                "category_id": category_id
            }
            img_annotations.append(annot)
        record["annotations"] = img_annotations
        img_dicts.append(record)
    prefix = "valid" if "valid" in image_folder else "train"
    json_file = os.path.join(image_folder, prefix + "_labels.json")
    print(f"\nSaving labels to: {json_file}...")
    with open(json_file, "w") as f:
      json.dump(img_dicts, f)
    print("Showing an example:")
    pprint.pprint(random.sample(img_dicts, 1))
    return img_dicts

In [None]:
val_img_dicts = get_image_dicts(image_folder=valid_path,
                                annotation_file="validation-annotations-bbox.csv",
                                target_classes=target_classes)
train_img_dicts = get_image_dicts(image_folder=train_path,
                                  annotation_file="train-annotations-bbox.csv",
                                  target_classes=target_classes)

In [None]:
def load_json_labels(image_folder):
    for file in os.listdir(image_folder):
      if file.endswith(".json"):
        json_file = os.path.join(image_folder, file) 
    assert json_file, "No .json label file found, please make one with get_image_dicts()"
    with open(json_file, "r") as f:
      img_dicts = json.load(f)
    for img_dict in img_dicts:
      for annot in img_dict["annotations"]:
        annot["bbox_mode"] = BoxMode.XYXY_ABS
    return img_dicts
loaded_val_img_dicts = load_json_labels(valid_path)
loaded_train_img_dicts = load_json_labels(train_path)

In [None]:
aug_path = './aug/'
loaded_aug_img_dicts = load_json_labels(aug_path)

In [None]:
from detectron2.data import DatasetCatalog, MetadataCatalog
for dataset in ["train", "validation"]:
    dataset_name = dataset
    print(f"Registering {dataset_name}")
    DatasetCatalog.register(dataset_name, lambda dataset_name=dataset_name: load_json_labels(dataset_name))
    MetadataCatalog.get(dataset_name).set(thing_classes=target_classes)
metadata = MetadataCatalog.get("train")

In [None]:
pt_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2 
pt_cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
pt_cfg.MODEL.RETINANET.NUM_CLASSES = len(target_classes)
pt_cfg.DATASETS.TEST = ("validation",) 
pt_predictor = DefaultPredictor(pt_cfg)
pt_trainer = DefaultTrainer(pt_cfg)
pt_trainer.resume_or_load(resume=False) 
pt_evaluator = COCOEvaluator(dataset_name="validation", 
                          cfg=pt_cfg, 
                          distributed=False,
                          output_dir="./output_pt/")
pt_val_loader = build_detection_test_loader(pt_cfg, 
                                         dataset_name="validation")
inference_on_dataset(model=pt_trainer.model, 
                     data_loader=pt_val_loader, 
                     evaluator=pt_evaluator)

In [None]:
with open("output/config.yaml", "w") as f:
    f.write(cfg.dump())

In [None]:
def make_inference(image, model_config, model_weights, threshold=0.5, n=5):
    cfg = get_cfg()
    cfg.merge_from_file(model_config)
    cfg.MODEL.WEIGHTS = model_weights
    cfg.MODEL.SCORE_THRESH_TEST = threshold
    predictor = DefaultPredictor(cfg)
    img = cv2.imread(image)
    visualizer = Visualizer(img_rgb=img[:, :, ::-1],
                          metadata=MetadataCatalog.get(cfg.DATASETS.TEST[0]),
                          scale=0.7)

    outputs = predictor(img) 
#     print(outputs["instances"][:n])
    visualizer = visualizer.draw_instance_predictions(outputs["instances"][:n].to("cpu"))
    #     cv2_imshow(visualizer.get_image()[:, :, ::-1])
    pred_image = visualizer.get_image()[:, :, ::-1]
    plt.figure(figsize = (20,20))
    plt.imshow(pred_image)
    plt.show()

In [None]:
from collections import Counter
num_occ = Counter([val_img_dicts[i]['annotations'][j]['category_id'] for i in range(len(val_img_dicts)) for j in range(len(val_img_dicts[i]['annotations']))])
print(num_occ)
print(val_img_dicts[0].keys())

In [None]:
for i in range(len(val_img_dicts)):
    for j in range(len(val_img_dicts[i]['annotations'])):
        if val_img_dicts[i]['annotations'][j]['category_id'] == 19:
            print(val_img_dicts[i]['width'])
            print(val_img_dicts[i]['height'])
            print(val_img_dicts[i]['annotations'])
            make_inference(val_img_dicts[i]['file_name'],
               model_config="retinanet_model_final_config.yaml",
               model_weights="retinanet_model_final.pth",
               n=1)

In [None]:
# Swimming pool                  161
# Bed                            125
# Pillow                          76
# Kitchen & dining room table     76
# Countertop                      68
# Sofa bed                        61
# Couch                           61
# Sink                            57
# Porch                           52
# Stairs                          45
# Television                      44
# Fireplace                       41
# Washing machine                 40
# Toilet                          37
# Oven                            36
# Mirror                          33
# Billiard table                  32
# Microwave oven                  30
# Refrigerator                    26
# Fountain                        24
# Gas stove                       23
# Coffeemaker                     21
# Bathtub                         18
# Wine rack                       17
# Jacuzzi                         16
# Tree house                      11
# Ceiling fan                     11
# Shower                           9
# Towel                            9
# Dishwasher                       3


str_to_id = {"Swimming pool":                 23,
"Bed":                            1,
"Pillow":                          16,
"Kitchen & dining room table":     12,
"Countertop":                      6,
"Sofa bed":                        21,
"Couch":                           5,
"Sink":                            20,
"Porch":                           17,
"Stairs":                          22,
"Television":                      24,
"Fireplace":                       8,
"Washing machine":                 28,
"Toilet":                          25,
"Oven":                            15,
"Mirror":                          14,
"Billiard table":                  2,
"Microwave oven":                  13,
"Refrigerator":                    18,
"Fountain":                        9,
"Gas stove":                       10,
"Coffeemaker":                     4,
"Bathtub":                         0,
"Wine rack":                       29,
"Jacuzzi":                         11,
"Tree house":                      27,
"Ceiling fan":                     3,
"Shower":                           19,
"Towel":                            26,
"Dishwasher":                       7}

id_to_str = {str_to_id[string]:string for string in str_to_id}
print(id_to_str)

occurences = {"Swimming pool":                 161,
"Bed":                            125,
"Pillow":                          76,
"Kitchen & dining room table":     76,
"Countertop":                      68,
"Sofa bed":                        61,
"Couch":                           61,
"Sink":                            57,
"Porch":                           52,
"Stairs":                          45,
"Television":                      44,
"Fireplace":                       41,
"Washing machine":                 40,
"Toilet":                          37,
"Oven":                            36,
"Mirror":                          33,
"Billiard table":                  32,
"Microwave oven":                  30,
"Refrigerator":                    26,
"Fountain":                        24,
"Gas stove":                       23,
"Coffeemaker":                     21,
"Bathtub":                         18,
"Wine rack":                       17,
"Jacuzzi":                         16,
"Tree house":                      11,
"Ceiling fan":                     11,
"Shower":                           9,
"Towel":                            9,
"Dishwasher":                       3}

AP = {'Bathtub': 34.25027057569784,
'Bed': 59.03879550756028,
'Billiard table': 80.6170658008729,
'Ceiling fan': 74.75247524752476,
'Coffeemaker': 67.21489992445622,
'Couch': 36.476634558768154,
'Countertop': 12.059790879024206,
'Dishwasher': 31.291378938099314,
'Fireplace': 36.35683183029906,
'Fountain': 41.94557512767471,
'Gas stove': 18.65965580119998,
'Jacuzzi': 28.651577645715214,
'Kitchen & dining room table': 13.631045153528959,
'Microwave oven': 53.651455417228014,
'Mirror': 52.89604356333979,
'Oven': 34.228370636894674,
'Pillow': 18.82013126836111,
'Porch': 15.723925102088343,
'Refrigerator': 75.5005712805338,
'Shower': 2.574831875856775,
'Sink': 39.64110013165663,
'Sofa bed': 63.42160711928795,
'Stairs': 32.32828231466491,
'Swimming pool': 74.7463747113812,
'Television': 76.73849607919813,
'Toilet': 47.50870541642443,
'Towel': 32.80105970819099,
'Tree house': 39.709301753070505,
'Washing machine': 50.87204656214445,
'Wine rack': 37.51794152757786}
print(sum([AP[key] for key in AP]) / len(AP))

x = []
y = []
for key in AP:
    x.append(occurences[key])
    y.append(AP[key])
    
from sklearn.metrics import r2_score 
print("Eval")
plt.title('AP Score by Size of Validation Set')
plt.xlabel('# Examples')
plt.ylabel('AP')
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
coef = np.corrcoef(x, y)[0, 1]
print(coef)
plt.text(115, 5, f'R^2 = {round(coef**2, 3)}', bbox=props, fontsize=14)
# plt.annotate('Something', xy=(0.05, 0.95), xycoords='axes fraction')
plt.scatter(x, y)

In [None]:
"""
Stairs                         5981
Couch                          4259
Swimming pool                  3881
Porch                          3854
Television                     3789
Fountain                       3691
Bed                            3563
Pillow                         3508
Countertop                     3113
Kitchen & dining room table    2127
Sink                           1648
Mirror                         1572
Sofa bed                       1501
Toilet                         1099
Billiard table                  912
Fireplace                       711
Washing machine                 655
Oven                            637
Refrigerator                    592
Bathtub                         545
Gas stove                       526
Microwave oven                  485
Ceiling fan                     478
Towel                           338
Coffeemaker                     323
Wine rack                       254
Shower                          235
Tree house                      110
Jacuzzi                         103
Dishwasher                       92"""

occurences = {"Swimming pool":                 3881,
"Bed":                            3563,
"Pillow":                          3508,
"Kitchen & dining room table":     2127,
"Countertop":                      3113,
"Sofa bed":                        1501,
"Couch":                           4259,
"Sink":                            1648,
"Porch":                           3854,
"Stairs":                          5981,
"Television":                      3789,
"Fireplace":                       711,
"Washing machine":                 655,
"Toilet":                          1099,
"Oven":                            637,
"Mirror":                          1572,
"Billiard table":                  912,
"Microwave oven":                  485,
"Refrigerator":                    592,
"Fountain":                        3691,
"Gas stove":                       526,
"Coffeemaker":                     323,
"Bathtub":                         545,
"Wine rack":                       254,
"Jacuzzi":                         103,
"Tree house":                      110,
"Ceiling fan":                     478,
"Shower":                           235,
"Towel":                            338,
"Dishwasher":                       92}

x = []
y = []
for key in AP:
    x.append(occurences[key])
    y.append(AP[key])
    
print("Test")
plt.title('AP Score by Size of Test Set')
plt.xlabel('# Examples')
plt.ylabel('AP')
coef = np.corrcoef(x, y)[0, 1]
print(coef)
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
plt.text(4200, 5, f'R^2 = {round(coef**2, 3)}', bbox=props, fontsize=14)
# plt.annotate('Something', xy=(0.05, 0.95), xycoords='axes fraction')
plt.scatter(x, y)
# print(np.corrcoef(x, y))

In [None]:
min_thresh = 25.0
poor_classes = set([str_to_id[key] for key in AP if AP[key] < min_thresh])
poor_classes
len(train_img_dicts)
train_img_dicts[0]

In [None]:
def contains_poor(img, poor_classes):
    for box in img['annotations']:
        if box['category_id'] in poor_classes:
            return True
    return False

to_augment = [img for img in train_img_dicts if contains_poor(img, poor_classes)]
len(to_augment)

In [None]:
from PIL import Image, ImageFilter

aug_path = './aug/'

def flipImages(images):
    augmented_annotations = images
    for i in range(len(images)):
        im = Image.open(images[i]['file_name'])
#         plt.imshow(cv2.imread(images[i]["file_name"]))
#         plt.show()
        out = im.transpose(PIL.Image.FLIP_LEFT_RIGHT)
        blur = out.filter(ImageFilter.GaussianBlur(3))
        blur.save(aug_path + os.path.basename(images[i]['file_name']))
#         plt.imshow(cv2.imread(aug_path + os.path.basename(images[i]['file_name'])))
#         plt.show()
#         break
        w = images[i]['width']
#         print(images[i]['annotations'])
        for j in range(len(images[i]['annotations'])):
            images[i]['annotations'][j]['bbox'][0] = w - images[i]['annotations'][j]['bbox'][0]
            images[i]['annotations'][j]['bbox'][2] = w - images[i]['annotations'][j]['bbox'][2]
#         print(images[i]['annotations'])
    return augmented_annotations
            
augmented_annotations = flipImages(to_augment)
with open('./output_aug/train_aug.json', "w") as f:
      json.dump(augmented_annotations, f)

In [None]:
dataset_name = 'aug'
DatasetCatalog.register(dataset_name, lambda dataset_name=dataset_name: load_json_labels(dataset_name))
MetadataCatalog.get(dataset_name).set(thing_classes=target_classes)

In [None]:
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
aug_cfg=get_cfg()
aug_cfg.merge_from_file("./retinanet_model_final_config.yaml")
aug_cfg.MODEL.WEIGHTS = "./retinanet_model_final.pth"
aug_cfg.DATASETS.TRAIN = ("aug",)
aug_cfg.DATASETS.TEST = ("validation",)
aug_cfg.OUTPUT_DIR = './output_aug/'
aug_cfg.DATALOADER.NUM_WORKERS = 2
aug_cfg.SOLVER.IMS_PER_BATCH = 2
aug_cfg.SOLVER.BASE_LR = 0.00125
aug_cfg.SOLVER.MAX_ITER = 100
aug_cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
aug_cfg.MODEL.RETINANET.NUM_CLASSES = len(target_classes)
os.makedirs(aug_cfg.OUTPUT_DIR, exist_ok=True)
aug_trainer = DefaultTrainer(aug_cfg)
aug_trainer.resume_or_load(resume=False) 
aug_trainer.train()

In [None]:
aug_cfg.MODEL.WEIGHTS = os.path.join(aug_cfg.OUTPUT_DIR, "model_final.pth") 
aug_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2 
aug_cfg.DATASETS.TEST = ("validation",) 
aug_predictor = DefaultPredictor(aug_cfg)

In [None]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
aug_evaluator = COCOEvaluator(dataset_name="validation", 
                          cfg=aug_cfg, 
                          distributed=False, 
                          output_dir="./output_aug/")

aug_val_loader = build_detection_test_loader(aug_cfg, 
                                         dataset_name="validation")
inference_on_dataset(model=aug_trainer.model, 
                     data_loader=aug_val_loader, 
                     evaluator=aug_evaluator)

In [None]:
make_inference2("bathroom.jpeg",
               model_config="./retinanet_model_final_config.yaml",
               model_weights="./retinanet_model_final.pth",
               n=3)
make_inference("kitchen.jpeg",
               model_config="./retinanet_model_final_config.yaml",
               model_weights="./retinanet_model_final.pth",
               n=3)
make_inference("bedroom.jpeg",
               model_config="./retinanet_model_final_config.yaml",
               model_weights="./retinanet_model_final.pth",
               n=3)

In [None]:
# str_to_id
kitchen= {'Coffeemaker', 'Countertop', 'Dishwasher', 'Gas stove', 'Kitchen & dining room table', 'Microwave oven', 'Microwave oven', 'Oven', 'Refrigerator', 'Sink', 'Wine rack'}
bedroom = {'Bed', 'Pillow', 'Sofa bed', 'Television', 'Ceiling fan', 'Mirror'}
living_room = {'Ceiling fan', 'Couch', 'Fireplace', 'Kitchen & dining room table', 'Pillow', 'Sofa bed', 'Television'}
bathroom = {'Bathtub', 'Countertop', 'Mirror', 'Towel', 'Toilet'}
outdoor = {'Fountain', 'Porch', 'Swimming pool', 'Tree house'}

In [None]:
def calcAP(room):
    return sum([AP[amen] for amen in room]) / len(room)
print(f"The AP of the kitchen is {calcAP(kitchen)}")
print(f"The AP of the bedroom is {calcAP(bedroom)}")
print(f"The AP of the living room is {calcAP(living_room)}")
print(f"The AP of the bathroom is {calcAP(bathroom)}")
print(f"The AP of outdoor amenities is {calcAP(outdoor)}")

In [None]:
def make_inference2(image, model_config, model_weights, threshold=0.5, n=5):
    cfg = get_cfg()
    cfg.merge_from_file(model_config)
    cfg.MODEL.WEIGHTS = model_weights
    cfg.MODEL.SCORE_THRESH_TEST = threshold
    predictor = DefaultPredictor(cfg)
    img = cv2.imread(image)
    visualizer = Visualizer(img_rgb=img[:, :, ::-1],
                          metadata=MetadataCatalog.get(cfg.DATASETS.TEST[0]),
                          scale=0.7)

    outputs = predictor(img) 
    print(outputs['instances'])
    instances = outputs['instances']
#     dishwashers = instances[instances.pred_classes == 7]
#     print(instances[instances.pred_classes == 7])
    tables = instances[instances.pred_classes == 12]

    visualizer = visualizer.draw_instance_predictions(tables.to("cpu"))
#         cv2_imshow(visualizer.get_image()[:, :, ::-1]) 
    pred_image = visualizer.get_image()[:, :, ::-1]
    plt.figure(figsize = (20,20))
    plt.imshow(pred_image)
    plt.show()
make_inference("kitchen.jpeg",
               model_config="./retinanet_model_final_config.yaml",
               model_weights="./retinanet_model_final.pth",
               n=3)
make_inference2("kitchen.jpeg",
               model_config="./retinanet_model_final_config.yaml",
               model_weights="./retinanet_model_final.pth",
               n=3)
id_to_str