# This notebook is used to generate a csv file 
## CSV stores name of each test image and then includes information about it's predicitions (BBOX, LABEL, SCORE). 
### To be used to generate csv used in ensemble.py (for mulitple predictors - see 2nd to last cell) or simply in labels_to_text.py  (single predictor - see last cell)

In [1]:
# Import Packages
import os
import pandas as pd
import random
import cv2
import json

In [2]:
import torch, torchvision
torch.__version__
!gcc --version

gcc (Ubuntu 5.4.0-6ubuntu1~16.04.12) 5.4.0 20160609
Copyright (C) 2015 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.



In [3]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import cv2
import random
import os
import pandas as pd

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

In [4]:
train_path = "train"
valid_path = "validation"

In [5]:
# Classes of amenities Airbnb mostly cares about
subset = ["Toilet",
          "Swimming_pool",
          "Bed",
          "Billiard_table",
          "Sink",
          "Fountain",
          "Oven",
          "Ceiling_fan",
          "Television",
          "Microwave_oven",
          "Gas_stove",
          "Refrigerator",
          "Kitchen_&_dining_room_table",
          "Washing_machine",
          "Bathtub",
          "Stairs",
          "Fireplace",
          "Pillow",
          "Mirror",
          "Shower",
          "Couch",
          "Countertop",
          "Coffeemaker",
          "Dishwasher",
          "Sofa_bed",
          "Tree_house",
          "Towel",
          "Porch",
          "Wine_rack",
          "Jacuzzi"]

subset.sort()

In [6]:
#Replaces underscores with spaces - matches airbnb classes with those from website
for i in range(len(subset)):
  subset[i] = subset[i].replace("_", " ")
print(subset)

['Bathtub', 'Bed', 'Billiard table', 'Ceiling fan', 'Coffeemaker', 'Couch', 'Countertop', 'Dishwasher', 'Fireplace', 'Fountain', 'Gas stove', 'Jacuzzi', 'Kitchen & dining room table', 'Microwave oven', 'Mirror', 'Oven', 'Pillow', 'Porch', 'Refrigerator', 'Shower', 'Sink', 'Sofa bed', 'Stairs', 'Swimming pool', 'Television', 'Toilet', 'Towel', 'Tree house', 'Washing machine', 'Wine rack']


In [7]:
# Import CV2 for getting height & width of image
import cv2

# Import Detectron2 BoxMode for bounding boxes style
from detectron2.structures import BoxMode

In [8]:
def load_json_labels(image_folder):
    """
    Returns Detectron2 style labels of images in image_folder based on JSON label file in image_folder.
    
    TODO -- Maybe create some verbosity here? AKA, what are the outputs?
    TODO -- what if annotations = None? Can we create a call to create an annotations CSV in 1 hit?
    
    Params
    ------
    image_folder (str): target folder containing images
    """
    # Get absolute path of JSON label file
    for file in os.listdir(image_folder):
      if file.endswith(".json"):
        json_file = os.path.join(image_folder, file)

    # TODO: Fix this assertion
    assert json_file, "No .json label file found, please make one with annots_to_json()"

    with open(json_file, "r") as f:
      img_dicts = json.load(f)

    # Convert bbox_mode to Enum of BoxMode.XYXY_ABS (doesn't work loading normal from JSON)
    for img_dict in img_dicts:
      for annot in img_dict["annotations"]:
        annot["bbox_mode"] = BoxMode.XYXY_ABS

    return img_dicts

In [None]:
valid_img_dicts = load_json_labels("validation")
train_img_dicts = load_json_labels("train")

In [None]:
from detectron2.data import DatasetCatalog, MetadataCatalog

def register_datasets(train_path, valid_path=None, target_classes=None):
  """
  Registers a Detectron2 style dataset from training paths.

  Params
  ------
  train_path (str) : pathname to training data containing training images
  valid_path (str) : pathname to validation data containing validation images
  """
  # TODO - update to accept any kind of path, e.g. not only coffeemaker, maybe could take a dict as input?
  # E.g. {"training": "path/to/training",
  #          "valid": "path/to/valid"}
  for d in [train_path, valid_path]:
    dataset_name = d.split("/")[-1]
    print("Registering: {}".format(dataset_name))
    DatasetCatalog.register(dataset_name, lambda d=d: load_json_labels(d))
    MetadataCatalog.get(dataset_name).set(thing_classes=target_classes)
  return MetadataCatalog.get(dataset_name)

In [None]:
metadata = register_datasets(train_path=train_path,
                             valid_path=valid_path,
                             target_classes=subset)

In [None]:
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg

# Create Different Predictors and Config Files
# RN0  .000125

In [None]:
cfgRN0 = get_cfg()
cfgRN0.merge_from_file(("configs/RN_000125.yaml"))


trainer = DefaultTrainer(cfgRN0)
trainer.resume_or_load(resume=True)
predictorRN0 = DefaultPredictor(cfgRN0)

# RN1  .002

In [None]:
cfgRN1 = get_cfg()
cfgRN1.merge_from_file(("configs/RN_002.yaml"))


trainer = DefaultTrainer(cfgRN1)
trainer.resume_or_load(resume=True)
predictorRN1 = DefaultPredictor(cfgRN1)

# Continue to create as many configurations and predictors as you want

In [10]:
#converts coordinates to relative

import detectron2.structures.instances as Instances
import cv2


def absolute_to_rel(bbox, height, width):
    bbox[0] =  bbox[0] / width   #x0
    bbox[1] =  bbox[1] / height  #y0
    bbox[2] =  bbox[2] / width  #x1
    bbox[3] =  bbox[3] / height  #y1
    
    return  (bbox)


In [11]:
#Generates a dictionary where keys are boxes, scores and classes
# box value is a list of lists of lists - list of coordinates of boxes 
# for each model's prediction

# #score value is a list of lists or a list of lists of score values
# classes is a list of lists of classes

def preDict(imgPath, predList):
    img = cv2.imread(imgPath)
    d = {}
    
    imgID = imgPath.split('/')[-1]
    d["id"] = imgID
    d["boxes"] = []
    d["scores"] = []
    d["classes"]= []
    
    shape = img.shape     #gets a tuple (height, width)
    height = shape[0]     #sets height
    width = shape[1]      #sets width variable
    
    for predictor in predList:
        x = predictor((img))
        tens = x['instances']
        numInstances = tens.scores.size()[0]
        Boxes = tens.pred_boxes
        Boxes = (Boxes.tensor)
        Boxes = Boxes.cpu()
        Boxes = Boxes.numpy()   #Boxes in numpy array
    
    
        scores = tens.scores
        scores = scores.cpu().numpy()    #scores in numpy array
    
        classes = tens.pred_classes.cpu().numpy()  #classes in numpy array
    
        Boxes = Boxes.tolist()            #boxes is now a list of lis
        scores = scores.tolist()          #now a list
        classes = classes.tolist()
    


        for Box in Boxes:            
            Box = absolute_to_rel(Box, height, width)
        
        d["boxes"].append(Boxes)
        d["scores"].append(scores)
        d["classes"].append(classes)

    return d

In [None]:
# This just tests my pre dict function
predictorList = [predictorRN0, predictorRN1, predictorRN2, predictorRN3]
for d in valid_img_dicts:
    print(preDict((d["file_name"]), predictorList))
    print("\n")
    

In [12]:
import csv
def writeCSV(csvFile, predictors):
    list = []
    for d in valid_img_dicts:
        list.append(preDict((d["file_name"]), predictors))
    
    csv_columns = ["id", "boxes", "scores", "classes"]
    
    try:
        with open(csvFile, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
            writer.writeheader()
            for data in list:
                writer.writerow(data)
                
    except IOError:
        print("I/o error")

# Writes to output CSV multiple predictions for each image

In [None]:
predictorList = [predictorRN0, predictorRN1, predictorRN2, predictorRN3]
writeCSV("predictions_output.csv", predictorList)

# Writes to output CSV for one prediction for each image

In [None]:
predictorList = [predictorRN0]
writeCSV("../customPrecision/mAP/singlePredictionBDM.csv", predictorList)
