# Solar Panel Detection using Faster RCNN

In [None]:
# !pip install pyyaml==5.1
# !pip install torch==1.9.0+cu102 torchvision==0.10.0+cu102 -f https://download.pytorch.org/whl/torch_stable.html

# Install detectron2 that matches the above pytorch version
# !pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html
# exit(0)  # After installation, you need to "restart runtime" in Colab. This line can also restart runtime

In [None]:
# check pytorch installation: 
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
# assert torch.__version__.startswith("1.9")   # please manually install torch 1.9 if Colab changes its default version

In [None]:
# Assumption: matplotlib, numpy, opencv are installed

In [None]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import glob
import matplotlib.pyplot as plt
import numpy as np
import os, json, cv2, random
from PIL import Image
import tqdm

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

In [None]:
import random
random.seed(1364)

# Train on custom datasets

### Download the datasets

The solar_array dataset for 4 different cities can be found in the following link/article:
https://www.nature.com/articles/sdata2016106#ref-CR40

Four different aerial datasets:
https://figshare.com/articles/dataset/Fresno_Aerial_USGS_Imagery_from_the_Distributed_Solar_Photovoltaic_Array_Location_and_Extent_Data_Set/3385828 (**Fresno**, [28.7GB](https://ndownloader.figshare.com/articles/3385828/versions/1))
https://figshare.com/articles/dataset/Stockton_Aerial_USGS_Imagery_from_the_Distributed_Solar_Photovoltaic_Array_Location_and_Extent_Data_Set/3385804 (**Stockton**, [6.5GB](https://ndownloader.figshare.com/articles/3385804/versions/1))
https://figshare.com/articles/dataset/Oxnard_Aerial_USGS_Imagery_from_the_Distributed_Solar_Photovoltaic_Array_Location_and_Extent_Data_Set/3385807 (**Oxnard**, [5GB](https://ndownloader.figshare.com/articles/3385807/versions/1))
https://figshare.com/articles/dataset/Modesto_Aerial_USGS_Imagery_from_the_Distributed_Solar_Photovoltaic_Array_Location_and_Extent_Data_Set/3385789 (**Modesto**, [1.4GB](https://ndownloader.figshare.com/articles/3385789/versions/1))

In this notebook, we use the **Fresno** dataset.

The mask annotations (polygons) for all four datasets are provided by the article:

[Data Descriptor: Distributed solar photovoltaic array location and extent dataset for remote sensing object identification](https://www.nature.com/articles/sdata2016106.pdf)

and can be downloaded from this link:
https://figshare.com/articles/dataset/Distributed_Solar_Photovoltaic_Array_Location_and_Extent_Data_Set_for_Remote_Sensing_Object_Identification/3385780?backTo=/collections/Full_Collection_Distributed_Solar_Photovoltaic_Array_Location_and_Extent_Data_Set_for_Remote_Sensing_Object_Identification/3255643

It contains annotation data in various forms and formats (lat-long coordinates, pixelwise coordinates, csv, json, ...).

In fact we only require the polygon-json dataset (<u>*SolarArrayPolygons.json*</u>) which can be individually downloaded from:

https://ndownloader.figshare.com/files/24115694

#### Or you can just download the required images and annotation files from these two links:

In [None]:
# !wget https://ndownloader.figshare.com/articles/3385804/versions/1
# !wget https://ndownloader.figshare.com/articles/3385780/versions/4

#### Unzip the downloaded files

In [None]:
# !unzip 1 -d solar_array_dataset
# !unzip 4 -d solar_array_dataset_annotations

#### An example ground truth bounding box

In [None]:
img = Image.open('solar_array_dataset/10sfg465970.tif')
img = np.array(img)
plt.imshow(img)
plt.show()

In [None]:
plt.imshow(img[4050:4075, 4055:4085, :])  # An example bounding box taken from the annorations for this image
plt.show()

#### Get the list of image filenames for different splits

(train, val, test): (70, 15, 15)

In [None]:
filenames = glob.glob("solar_array_dataset/*.tif")

In [None]:
total_size = len(filenames)
train_size = int(0.7 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size

In [None]:
random.shuffle(filenames) 

In [None]:
filenames_split = {}
filenames_split['train'] = filenames[:train_size]
filenames_split['val'] = filenames[train_size: (train_size + val_size)]
filenames_split['test'] = filenames[-test_size:]

### Register train/val/test datasets (converting arbitrary dataset formats to COCO format)

In [None]:
# if your dataset is in COCO format, this cell can be replaced by the following three lines:
# from detectron2.data.datasets import register_coco_instances
# register_coco_instances("my_dataset_train", {}, "json_annotation_train.json", "path/to/image/dir")
# register_coco_instances("my_dataset_val", {}, "json_annotation_val.json", "path/to/image/dir")

from detectron2.structures import BoxMode

def get_solar_dicts(split):
    with open(os.path.join("solar_array_dataset_annotations", "SolarArrayPolygons.json")) as f:
        imgs_anns = json.load(f)

    dataset_dicts = []
    
    for filename in tqdm.tqdm(filenames_split[split], total=len(filenames_split[split]), desc=f'{split}_data loading'):
        record = {}
        
        img = Image.open(filename)
        img = np.array(img)
        height, width = img.shape[:2]
        
        record["file_name"] = filename
        record["image_id"] = 0
        record["height"] = height
        record["width"] = width
        
        filename_id = os.path.splitext(os.path.basename(filename))[0]
        
        relevant_items = [item for item in imgs_anns['polygons'] if item['image_name'] == filename_id]
      
        objs = []
        for item in relevant_items:
            vertices = item["polygon_vertices_pixels"]
            px = [vertex[0] for vertex in vertices]
            py = [vertex[1] for vertex in vertices]
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]

            obj = {
                "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                "bbox_mode": BoxMode.XYXY_ABS,
                "segmentation": [poly],
                "category_id": 0,  # only one single object class (solar)
            }
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

for split in ["train", "val", "test"]:
    DatasetCatalog.register("solar_" + split, lambda split=split: get_solar_dicts(split))
    MetadataCatalog.get("solar_" + split).set(thing_classes=["solar"])
solar_train_metadata = MetadataCatalog.get("solar_train")
solar_val_metadata = MetadataCatalog.get("solar_val")
solar_test_metadata = MetadataCatalog.get("solar_test")

In [None]:
dataset_dicts = get_solar_dicts('train')
solar_metadata = solar_train_metadata

In [None]:
for d in random.sample(dataset_dicts, 3):
    img = Image.open(d['file_name'])
    img = np.array(img)
    visualizer = Visualizer(img, metadata=solar_metadata, scale=1)
    out = visualizer.draw_dataset_dict(d)
    print(out.get_image().shape)
    fig, ax = plt.subplots(1, 1, figsize=(20, 20))
    imgPIL = Image.fromarray(out.get_image())
    imgPIL.save(f"sample_gt_{os.path.splitext(os.path.basename(d['file_name']))[0]}.tif")
    ax.imshow(out.get_image())
    plt.show()

## Training

In [None]:
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator
from detectron2.data import build_detection_test_loader, build_detection_train_loader

class CocoTrainer(DefaultTrainer):

    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            os.makedirs("eval_dir", exist_ok=True)
            output_folder = "eval_dir"
        return COCOEvaluator(dataset_name, cfg, False, output_folder)

#### Instantiate a Faster-RCNN config object

In [None]:
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("solar_train",)
cfg.DATASETS.VAL = ("solar_val",)
cfg.DATASETS.TEST = ("solar_test",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")  
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.0025  # pick a good LR
cfg.SOLVER.MAX_ITER = 20001    
cfg.TEST.EVAL_PERIOD = 1000
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (solar). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

#### Actual training

In [None]:
%%time
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = CocoTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

## Inference/Evaluation

In [None]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

In [None]:
# import the COCO Evaluator to use the COCO Metrics
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

# Call the COCO Evaluator function and pass the Validation Dataset
evaluator = COCOEvaluator("solar_val", cfg, False, output_dir="output/")
val_loader = build_detection_test_loader(cfg, "solar_val")

# Use the created predicted model in the previous step
inference_on_dataset(predictor.model, val_loader, evaluator)

#### Sample Visualisation

In [None]:
dataset_dicts = get_solar_dicts('train')
solar_metadata = solar_train_metadata

In [None]:
from detectron2.utils.visualizer import ColorMode

for d in random.sample(dataset_dicts, 3):
    img = Image.open(d["file_name"])
    img = np.array(img)
    outputs = predictor(img)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    print(outputs["instances"])
    v = Visualizer(img,
                   metadata=solar_metadata, 
                   scale=1, 
                   instance_mode=ColorMode.IMAGE 
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    imgPIL = Image.fromarray(out.get_image())
    imgPIL.save(f"sample_pred_fasterrcnn_{os.path.splitext(os.path.basename(d['file_name']))[0]}.tif")
    plt.imshow(out.get_image())
    plt.show()
    # print(outputs["instances"].to("cpu"))