## Clone dataset and files from GitHub

In [None]:
!git clone "https://github.com/la-zu-li/pca-measuring-c3s-clinker"

In [None]:
!mv pca-measuring-c3s-clinker/* ./
!rmdir pca-measuring-c3s-clinker
!rm -rf *.ipynb maskrcnn

## Required packages and libs

In [None]:
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [None]:
!pip instal -r requirements.txt
!pip install roboflow

In [None]:
# COMMON LIBRARIES
import os
import sys
import json
import itertools
import cv2 as cv
import numpy as np
import pickle as pkl
from time import time

from datetime import datetime
from google.colab.patches import cv2_imshow

# DATA SET PREPARATION AND LOADING
from detectron2.data.datasets import register_coco_instances
from detectron2.data import DatasetCatalog, MetadataCatalog

# VISUALIZATION
from detectron2.utils.visualizer import Visualizer
from detectron2.utils.visualizer import ColorMode

# NETWORK CONFIGURATION
from detectron2 import model_zoo
from detectron2.config import get_cfg

# EVALUATION
from detectron2.engine import DefaultPredictor

# TRAINING
from detectron2.engine import DefaultTrainer

## Dataset

### Register

In [None]:
DATA_SET_NAME = "c3s-clinker-dataset"
DATA_SET_LOCATION = "./c3s-clinker-dataset"
ANN_FILE_NAME = "_annotations.coco.json"

In [None]:
DS_NAMES = {}
DS_PATHS = {}

for ds_type in ["train", "test", "valid"]:

    ds_name = DS_NAMES[ds_type] = f"{DATA_SET_NAME}-{ds_type}"
    ds_path = DS_PATHS[ds_type] = os.path.join(DATA_SET_LOCATION, ds_type)

    register_coco_instances(
        name=ds_name,
        image_root=ds_path,
        json_file=os.path.join(ds_path, ANN_FILE_NAME),
        metadata={}
    )

### Check

In [None]:
[
    data_set
    for data_set
    in MetadataCatalog.list()
    if data_set.startswith(DATA_SET_NAME)
]

In [None]:
metadata = MetadataCatalog.get(DS_NAMES["train"])
dataset_train = DatasetCatalog.get(DS_NAMES["train"])

dataset_entry = dataset_train[0]
image = cv.imread(dataset_entry["file_name"])

visualizer = Visualizer(
    image[:, :, ::-1],
    metadata=metadata,
    scale=0.8,
    instance_mode=ColorMode.IMAGE_BW
)

out = visualizer.draw_dataset_dict(dataset_entry)
cv2_imshow(out.get_image()[:, :, ::-1])

## Train

### Configure

In [None]:
# HYPERPARAMETERS
ARCHITECTURE = "mask_rcnn_R_101_FPN_3x"
CONFIG_FILE_PATH = f"COCO-InstanceSegmentation/{ARCHITECTURE}.yaml"
MAX_ITER = 4000
EVAL_PERIOD = 200
BASE_LR = 0.001
BATCH_SIZE = 2

# Number of classes should consider background
# Ex: 3 classes dataset + background --> N_CLASSES = 4
CLASSES = MetadataCatalog.get(DS_NAMES["train"]).thing_classes
N_CLASSES = len(CLASSES)

# OUTPUT DIR
OUTPUT_DIR_PATH = os.path.join(
    DATA_SET_NAME,
    ARCHITECTURE
)
os.makedirs(OUTPUT_DIR_PATH, exist_ok=True)

In [None]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(CONFIG_FILE_PATH))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(CONFIG_FILE_PATH)
cfg.DATASETS.TRAIN = (DS_NAMES["train"],)
cfg.DATASETS.TEST = (DS_NAMES["test"],)
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64
cfg.TEST.EVAL_PERIOD = EVAL_PERIOD
cfg.DATALOADER.NUM_WORKERS = 2
cfg.SOLVER.IMS_PER_BATCH = BATCH_SIZE
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.SOLVER.BASE_LR = BASE_LR
cfg.SOLVER.MAX_ITER = MAX_ITER
cfg.MODEL.ROI_HEADS.NUM_CLASSES = N_CLASSES
cfg.OUTPUT_DIR = OUTPUT_DIR_PATH

### Start training

In [None]:
start = time()

trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

end = time()
print(f"Training took: {end-start} seconds")

### Training metrics

In [None]:
# Look at training curves in tensorboard:
%load_ext tensorboard
%tensorboard --logdir $OUTPUT_DIR_PATH

### Create predictor with trained weights

In [None]:
cfg.MODEL.WEIGHTS = os.path.join(OUTPUT_DIR_PATH, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # confidence threshold
predictor = DefaultPredictor(cfg)

## Model Evaluation with COCO metrics

In [None]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

evaluator = COCOEvaluator(DS_NAMES["valid"], output_dir=OUTPUT_DIR_PATH)
val_loader = build_detection_test_loader(cfg, DS_NAMES["valid"])
predictions = inference_on_dataset(predictor.model, val_loader, evaluator)

## Model Inference

In [None]:
# retrieve inference time
dataset_test = DatasetCatalog.get  (DS_NAMES["test"])
metadata_test = MetadataCatalog.get(DS_NAMES["test"])

img_filenames = [d["file_name"] for d in dataset_test]
img_ids = [d["image_id"] for d in dataset_test]
images = [cv.imread(f) for f in img_filenames]

start = time()
instances = [predictor(img)["instances"] for img in images]
end = time()

print(f"Inference took: {end-start} seconds")

Visualize

In [None]:
for img, ins in zip(images, instances):
    kwargs = {
        "img_rgb": img[:,:,::-1],
        "metadata": metadata,
        "scale": 0.8
    }
    visualizer = Visualizer(**kwargs)
    pred = visualizer.draw_instance_predictions(ins.to("cpu"))
    pred_img = pred.get_image()[:, :, ::-1]
    cv2_imshow(pred_img)

In [None]:
dt_masks = {}
for img_id, ins in zip(img_ids, instances):
    dt_masks[img_id] = ins.pred_masks.to("cpu").numpy()

## Crystal Measurement

In [None]:
from lib.measure_methods import longest_diagonal_pca

dt_measures = {}
for img_id, dt_mask in dt_masks.items():
    measures = []
    for dt_mask_ins in dt_mask:
        contours,_ = cv.findContours(dt_mask_ins.astype(np.uint8), cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE)
        contour = max(contours, key=lambda x: x.shape[0])
        points = contour.squeeze()

        if len(points.shape) != 2: continue
        measurement = longest_diagonal_pca(points)
        measures.append(measurement)
    dt_measures[img_id] = measures

Visualize measurement lines

In [None]:
for img_id, img, ins in zip(img_ids, images, instances):
    detected_masks = dt_masks[img_id]
    measures = dt_measures[img_id]

    # draw measurement lines
    for start, end, length in measures:
        img = cv.line(img, start, end, color=(0,255,0), thickness=2)

    # draw prediction
    kwargs = {
        "img_rgb": img[:,:,::-1],
        "metadata": metadata,
        "scale": 0.8
    }
    visualizer = Visualizer(**kwargs)
    pred = visualizer.draw_instance_predictions(ins.to("cpu"))
    pred_img = pred.get_image()[:,:,::-1]
    pred_img = np.array(pred_img)

    cv2_imshow(pred_img)

Export detection data as PKL

In [None]:
with open("dt_masks.pkl", "wb") as f:
    pkl.dump(dt_masks, f, protocol=pkl.HIGHEST_PROTOCOL)

## Evaluation of measurement methods

In [None]:
import pandas as pd
import plotly.express as px
from sklearn.feature_selection import mutual_info_regression

### Running scripts for generating data in dataframe format

In [None]:
os.makedirs("data", exist_ok=True)
%run scripts/all_measures_to_json.py
%run scripts/merge_anns_and_measures.py
%run scripts/evaluate_network_performance.py
%run scripts/evaluate_measuring_performance.py
%run scripts/compute_geometrical_features.py

In [None]:
df = pd.read_csv("data/geometric_features.csv")
df_ = df[df["detected_mask_id"] >= 0]

### Model evaluation on IoU metrics and custom graphs

Distribution of max IoU of ground-truth objects

In [None]:
fig = px.histogram(df, x="iou")
fig.update_layout(xaxis_title="IoU")
fig.show()

Total number of false positives and negatives for IoU threshold of 0.5

In [None]:
df_summary = pd.read_csv("data/data_summary.csv")
df_summary

Mutual information between IoU and each shape feature

In [None]:
features = ["iou", "area", "perimeter", "circularity", "aspect_ratio", "rectangularity", "solidity"]
ft_names= ["IoU", "Area", "Perimeter", "Circularity", "Aspect Ratio", "Rectangularity", "Solidity"]

all_ft_mi = []
for x,y in zip(features, ft_names):
    result = mutual_info_regression(df[features], df[x], discrete_features=[False for _ in features])
    all_ft_mi.append(pd.DataFrame(result.T, columns=[y], index=ft_names))

ft_mi = pd.concat(all_ft_mi, axis=1)
ft_mi = ft_mi.applymap(lambda x: "{:.2f}".format(x))

px.imshow(ft_mi, text_auto=True).show()

### Measurement methods evaluation

Compute error based on manual measures

In [None]:
algorithms = ["pca", "lr", "centroid", "brute-force"]

errors = pd.DataFrame([], columns=algorithms)
times  = pd.DataFrame([], columns=algorithms)

for alg in algorithms:
    errors[alg] = abs(df_["ground-truth_measure"] - df_[f"{alg}_measure"]) / df_["ground-truth_measure"]
    times[alg] = df_[f"{alg}_time"]

times *= 1000

Box-plot with error for each method

In [None]:
fig = px.box(errors,
    color_discrete_sequence=["coral"],
    labels= {"value": "Error"}
).update_layout(xaxis_title=None)
fig.show()

Data summary for each method time and error

In [None]:
summary = pd.concat([errors.mean()*100, errors.std()*100, times.mean(), times.std()], axis=1)
summary.columns = ["average error", "error STD", "average time (ms)", "time STD (ms)"]

for k in summary.columns:
    if "error" in k:
        summary[k] = summary[k].apply(lambda x: "{:.1f}%".format(x))
    else: summary[k] = summary[k].apply(lambda x: "{:.1f}".format(x))

summary