In [155]:
from torchvision import ops
import os
from bs4 import BeautifulSoup as bs
from skimage import io

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torchvision import transforms
from tqdm.notebook import tqdm
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights, fasterrcnn_resnet50_fpn_v2
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import math
from sklearn.ensemble import RandomForestRegressor
import pickle

### Image Splitting Functions

In [156]:
def parse_xml(xml):
  label = xml.find_all("name")
  xmin = xml.find_all("xmin")
  ymin = xml.find_all("ymin")
  xmax = xml.find_all("xmax")
  ymax = xml.find_all("ymax")
  min_size = min(len(label), len(xmin), len(ymin), len(xmax), len(ymax))
  for i in range(min_size):
      label[i] = label[i].text
      xmin[i] = xmin[i].text
      ymin[i] = ymin[i].text
      xmax[i] = xmax[i].text
      ymax[i] = ymax[i].text
  df = pd.DataFrame({"label": label[:min_size], "xmin": xmin[:min_size], "ymin": ymin[:min_size], "xmax": xmax[:min_size], "ymax": ymax[:min_size]})
  return df

def clean_up_data(df):
   df = df[df["label"] != "Label Studio"]
#    cat1 = 'adult male'
#    cat2 = 'male juvenile / female juvenile / adult female'
#    cat3 = 'pup'

#    cat1_diffs = ["Adult Male", "Adult male", "adult male seal", "Seal"]

#    cat2_diffs = ['juvenile male / juvenile female / adult female',
#                  'male juvenile / female juvenile /adult female',
#                  'Adult Female or Young Male',
#                  'male juvenile/female juvenile/adult female',
#                  'Juvenile male / juvenile female / adult female',
#                  'Juvenile male/ juvenile female/ adult female',
#                  'male juvenile / female juvenile / adulf female',
#                  'adult female seal', '\\\\', ']', '\'']
#    cat3_diffs = ['Pup', 'Juvenile', 'baby seal', 'juvenile seal']
#    for cat1_diff in cat1_diffs:
#        df.loc[df["label"] == cat1_diff, ['label']] = cat1
#    for cat2_diff in cat2_diffs:
#        df.loc[df["label"] == cat2_diff, ['label']] = cat2
#    for cat3_diff in cat3_diffs:
#        df.loc[df["label"] == cat3_diff, ['label']] = cat3
   return df

def get_bb(in_path, xml):
   df = pd.DataFrame()
   i = 0
   for x in xml:
      f = open(in_path + x)
      xml_file = bs("".join(f.readlines()), features="xml")
      df_temp = parse_xml(xml_file)
      df_temp.insert(0, "file_num", str(i).zfill(4))
      df = pd.concat([df, df_temp])
      f.close()
      i+=1
   df = clean_up_data(df)
   return df

def find_bounding_boxes(df_img, x_size, y_size, xmin, ymin, xmax, ymax):
  col_names = list(df_img.columns)
  col_names.append("percent_seal")
  df_bb = pd.DataFrame()
  for row in df_img.itertuples():
    bb_xmin = int(row[3])
    bb_ymin = int(row[4])
    bb_xmax = int(row[5])
    bb_ymax = int(row[6])

    bb_xmin_ = max(bb_xmin - xmin, 0)
    bb_ymin_ = max(bb_ymin - ymin, 0)
    bb_xmax_ = min(bb_xmax - xmin, xmax - xmin)
    bb_ymax_ = min(bb_ymax - ymin, ymax - ymin)
    bb_row = [row[1], row[2], bb_xmin_, bb_ymin_, bb_xmax_, bb_ymax_]

    # if a bounding box was found
    if ((bb_xmin_ >= 0 and bb_xmin_ <= x_size) and
       (bb_ymin_ >= 0 and bb_ymin_ <= y_size) and
       (bb_xmax_ >= 0 and bb_xmax_ <= x_size) and
       (bb_ymax_ >= 0 and bb_ymax_ <= y_size)):
      height = bb_ymax_ - bb_ymin_; length = bb_xmax_ - bb_xmin_
      area = height * length # area of bb in image
      bb_area = (bb_xmax - bb_xmin) * (bb_ymax - bb_ymin) # total area of bb

      if (bb_area == 0): # ignore bounding boxes that don't have any area
        break

      percent_seal_present = area/bb_area # % of bb present in subimage
      # print("original bb coordinates:", row)
      # print("subimage coordinates:", [xmin, ymin, xmax, ymax])
      # print("new bb coordinates:", bb_row)
      # print(percent_seal_present, area, bb_area)
      bb_row.append(percent_seal_present)
      df_bb = pd.concat([df_bb,pd.Series(bb_row, index=col_names).to_frame().T])

  if len(df_bb) == 0:
    df_bb = None
  return df_bb

def split_image(img, df_img, x_size, y_size, x_int, y_int, thresh):
    # _size is the length in each direction
    # _int is the interval you shift right or down by
    # ex. (150, 150, 75, 75) - each split creates a 150x150 images and shifts over by 75 pixels each run through.
    x_len = img.shape[1]
    y_len = img.shape[0]

    seal_count = []
    sub_images = []
    # crops like reading a book
    i = 0
    while (i < y_len):
        # updates the new y coordinates
        y1 = i
        if i + y_size > y_len:
            y1 = y_len-y_size
            y2 = y_len
            i = y_len
        else:
            y2 = i + y_size
            i += y_int

        j = 0
        while (j < x_len):
            # updates the new x coordinates
            x1 = j
            if j + x_size > x_len:
                x1 = x_len - x_size
                x2 = x_len
                j = x_len
            else:
                x2 = j + x_size
                j += x_int
            df_bb = find_bounding_boxes(df_img, x_size, y_size, x1, y1, x2, y2)
            if df_bb is not None and df_bb["percent_seal"].max() > thresh:
                seal_count.append(df_bb.shape[0])
                cropped = img[y1:y2,x1:x2]
                sub_images.append(cropped)
    return sub_images, seal_count

### Prediction Functions

In [157]:
def get_object_detection_model(version, num_classes = 2, feature_extraction = True):
    """
    Inputs
        num_classes: int
            Number of classes to predict. Must include the 
            background which is class 0 by definition!
        feature_extraction: bool
            Flag indicating whether to freeze the pre-trained 
            weights. If set to True the pre-trained weights will be  
            frozen and not be updated during.
    Returns
        model: FasterRCNN
    """
    if version == 2:
        model = fasterrcnn_resnet50_fpn_v2(weights='DEFAULT')
    elif version == 1:
        model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1)    
    # If True, the pre-trained weights will be frozen.
    if feature_extraction == True:
        for p in model.parameters():
            p.requires_grad = False    
    # Replace the original 91 class top layer with a new layer
    # tailored for num_classes.
    in_feats = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_feats, num_classes)    
    return model

def predict(model, image, device="cpu", transform = transforms.Compose([transforms.ToTensor()])):
    image = transform(np.array(image)).unsqueeze(0).type(torch.FloatTensor).to(device)
    pred = model(image)
    return pred

def detach_pred(pred):
    pred["boxes"] = pred["boxes"].detach().cpu()
    pred["scores"] = pred["scores"].detach().cpu()
    pred["labels"] = pred["labels"].detach().cpu()
    return pred

def decode_prediction(prediction, 
                      score_threshold = 0.8, 
                      nms_iou_threshold = 0.2):
    """
    Inputs
        prediction: dict
        score_threshold: float
        nms_iou_threshold: float
    Returns
        prediction: tuple
    """
    boxes = prediction["boxes"]
    scores = prediction["scores"]
    labels = prediction["labels"]    
    # Remove any low-score predictions.
    if score_threshold is not None:
        want = scores > score_threshold
        boxes = boxes[want]
        scores = scores[want]
        labels = labels[want]    
    # Remove any overlapping bounding boxes using NMS.
    if nms_iou_threshold is not None:
        want = torchvision.ops.nms(boxes = boxes, scores = scores, iou_threshold = nms_iou_threshold)
        boxes = boxes[want]
        scores = scores[want]
        labels = labels[want]    
        return boxes.detach().cpu().numpy()
    
def nms_boxes(pred, thresh):
    boxes = pred["boxes"]
    scores = pred["scores"]
    labels = pred["labels"]
    want = torchvision.ops.nms(boxes = boxes, scores = scores, iou_threshold = thresh)
    boxes = boxes[want]
    scores = scores[want]
    labels = labels[want]    
    return len(boxes.detach().cpu().numpy())

### Data Frame Generation Functions

In [158]:
def get_preds(file_name, model, path, device="cpu"):
    img_name = file_name + ".JPG"
    xml_name = file_name + ".xml"
    image = io.imread(path + img_name, plugin="matplotlib")
    xml = get_bb(path, [xml_name])
    sub_images, seal_count = split_image(image, xml, 150, 150, 75, 75, .3)
    preds = []
    for s in sub_images:
        pred = detach_pred(predict(model, s, device)[0])
        preds.append(pred)
    return preds, seal_count

def get_file_names(path):
    return set([x.split(".")[0] for x in os.listdir(path)])

def get_preds_counts(path, model, device):
    preds_counts = {}
    file_names = get_file_names(path)
    for file_name in tqdm(file_names):
        preds_counts[file_name] = get_preds(file_name, model, path, device)
    return preds_counts

def gen_pred_data(d, thresh):
    file_names = []
    actual_count = []
    best_scores = []
    pred_count = []
    boxes_total = []
    boxes_iou = []
    potential_scores = np.arange(0, 1, .05)
    for file_name in tqdm(d.keys()):
        preds, counts = d[file_name]
        for i in range(len(preds)):
            pred = preds[i]
            best_score = 0
            best_error = math.inf
            best_count = 0
            for ps in potential_scores:
                boxes = decode_prediction(pred, ps, thresh)
                error = abs(len(boxes) - counts[i])
                if error < best_error:
                    best_error = error
                    best_score = ps
                    best_count = len(boxes)
            file_names.append(file_name)
            actual_count.append(counts[i])
            best_scores.append(best_score)
            pred_count.append(best_count)
            boxes_total.append(len(pred["boxes"]))
            boxes_iou.append(nms_boxes(pred, thresh))
    return pd.DataFrame({"File Name": file_names, "Actual Count": actual_count, "Best Score": best_scores, "Best Count": pred_count, "Total Boxes": boxes_total, "Boxes IOU": boxes_iou})

def gen_pred_data_eval(d, thresh):
    file_names = []
    boxes_total = []
    boxes_iou = []
    index = []
    for file_name in tqdm(d.keys()):
        preds = d[file_name]
        i = 0
        for i in range(len(preds)):
            pred = preds[i]
            file_names.append(file_name)
            boxes_total.append(len(pred["boxes"]))
            boxes_iou.append(nms_boxes(pred, thresh))
            index.append(i)
            i += 1
    return pd.DataFrame({"File Name": file_names,  "Total Boxes": boxes_total, "Boxes IOU": boxes_iou, "Index": index})

In [159]:
training_image_path = r"C:\Users\kaanan\Desktop\Training, Val, and Test Images\Training Images/"
validation_image_path = r"C:\Users\kaanan\Desktop\Training, Val, and Test Images\Validation Images/"
model_path = r"C:\Users\kaanan\Desktop\RCNN\RCNN Evaluation Information\Trial 2\Models\rcnn_trial2_50"

In [160]:
# Connect to the GPU if one exists.
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print("Using: ", device)
torch.cuda.empty_cache()

Using:  cuda


In [161]:
model = get_object_detection_model(2)
model.load_state_dict(torch.load(model_path))
model.eval()
model.to(device)
transform=transforms.Compose([transforms.ToTensor()])

In [162]:
thresh =.05

files = os.listdir(r"C:\Users\kaanan\Desktop\RCNN\Data/")
if "individual_best_scores_train.csv" in files:
    print("Found Training Data")
    train_preds = pd.read_csv(r"C:\Users\kaanan\Desktop\RCNN\Data/individual_best_scores_train.csv")
else:
    print("Training Data not found. Generating new.")
    pred_counts_train = get_preds_counts(training_image_path, model, device)
    train_preds = gen_pred_data(pred_counts_train, thresh)
    print("Training Data generated, writing for future use")
    train_preds.to_csv(r"C:\Users\kaanan\Desktop\RCNN\Data/individual_best_scores_train.csv")

if "individual_best_scores_val.csv" in files:
    print("Found Validation Data")
    val_preds =  pd.read_csv(r"C:\Users\kaanan\Desktop\RCNN\Data/individual_best_scores_val.csv")
else:
    print("Validation Data not found, generating new.")
    pred_counts_val = get_preds_counts(validation_image_path, model, device)
    val_preds = gen_pred_data(pred_counts_val, thresh)
    print("Validation Data generated, saving for future use.")
    val_preds.to_csv(r"C:\Users\kaanan\Desktop\RCNN\Data/individual_best_scores_val.csv")

Found Training Data
Found Validation Data


In [163]:
df_training = pd.read_csv("../Data/training_min_0.05_300_V2")
df_validation = pd.read_csv("../Data/validation_min_0.05_300_V2")

In [164]:
train_preds["Box Left Ratio"] =  train_preds["Boxes IOU"] / train_preds["Total Boxes"]

In [165]:
val_preds["Box Left Ratio"] =  val_preds["Boxes IOU"] / val_preds["Total Boxes"]

In [166]:
image_x = df_training.drop(columns = ["File Name", "Score", "Actual Count", "Unnamed: 0"])
image_x_no_rgb = df_training[["Box Num", "Cluster Num", "Biggest Cluster", "Smallest Cluster"]]
image_y = df_training["Score"]
rf_rgb = RandomForestRegressor(max_samples = .45, random_state=0).fit(image_x, image_y)
rf_no_rgb = RandomForestRegressor(max_samples = .7, random_state=0).fit(image_x_no_rgb, image_y)
df_training["Image Score RGB"] = rf_rgb.predict(image_x)
df_training["Image Score"] = rf_no_rgb.predict(image_x_no_rgb)
df_training["Image Count"] = df_training["Actual Count"]
df_training = df_training.drop(columns=["Unnamed: 0", "Actual Count"])

In [167]:
image_x = df_validation.drop(columns = ["File Name", "Score", "Actual Count", "Unnamed: 0"])
image_x_no_rgb = df_validation[["Box Num", "Cluster Num", "Biggest Cluster", "Smallest Cluster"]]
image_y = df_validation["Score"]
df_validation["Image Score RGB"] = rf_rgb.predict(image_x)
df_validation["Image Score"] = rf_no_rgb.predict(image_x_no_rgb)
df_validation["Image Count"] = df_validation["Actual Count"]
df_validation = df_validation.drop(columns=["Unnamed: 0", "Actual Count"])

In [168]:
combined_training = df_training.merge(train_preds, on= "File Name")
combined_validation = df_validation.merge(val_preds, on= "File Name")

#### With RGB

In [169]:
x_train = combined_training[['Box Num', 'Cluster Num', 'Biggest Cluster','Smallest Cluster', 'Biggest R', 'Biggest G', 'Biggest B', 'Smallest R','Smallest B', 'Smallest G', 'Average R', 'Average B', 'Average G','Image Score RGB','Total Boxes', 'Boxes IOU','Box Left Ratio']]
y_train = combined_training["Best Score"]
x_val = combined_validation[['Box Num', 'Cluster Num', 'Biggest Cluster','Smallest Cluster', 'Biggest R', 'Biggest G', 'Biggest B', 'Smallest R','Smallest B', 'Smallest G', 'Average R', 'Average B', 'Average G','Image Score RGB','Total Boxes', 'Boxes IOU','Box Left Ratio']]
y_val = combined_validation["Best Score"]
x_train["Box Left Ratio"] = x_train['Box Left Ratio'].fillna(0)
x_val["Box Left Ratio"] = x_val['Box Left Ratio'].fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_train["Box Left Ratio"] = x_train['Box Left Ratio'].fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_val["Box Left Ratio"] = x_val['Box Left Ratio'].fillna(0)


In [170]:
rf_score_rgb = RandomForestRegressor(random_state=0).fit(x_train, y_train)
pred_scores = rf_score_rgb.predict(x_train)
abs(pred_scores-y_train).mean()

0.056853692506134694

In [171]:
pred_scores = rf_score_rgb.predict(x_val)
abs(pred_scores-y_val).mean()

0.13755912826265604

#### Without RGB

In [172]:
x_train = combined_training[['Box Num', 'Cluster Num', 'Biggest Cluster','Smallest Cluster', "Image Score", 'Total Boxes', 'Boxes IOU','Box Left Ratio']]
y_train = combined_training["Best Score"]
x_val = combined_validation[['Box Num', 'Cluster Num', 'Biggest Cluster','Smallest Cluster', "Image Score", 'Total Boxes', 'Boxes IOU','Box Left Ratio']]
y_val = combined_validation["Best Score"]
x_train["Box Left Ratio"] = x_train['Box Left Ratio'].fillna(0)
x_val["Box Left Ratio"] = x_val['Box Left Ratio'].fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_train["Box Left Ratio"] = x_train['Box Left Ratio'].fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_val["Box Left Ratio"] = x_val['Box Left Ratio'].fillna(0)


In [173]:
rf_score_no_rgb = RandomForestRegressor(random_state=0).fit(x_train, y_train)
pred_scores = rf_score_no_rgb.predict(x_train)
abs(pred_scores-y_train).mean()

0.05730548465634972

In [174]:
pred_scores = rf_score_no_rgb.predict(x_val)
abs(pred_scores-y_val).mean()

0.13771415026046066

### Getting Count of Each Image

In [175]:
with open(r"C:\Users\kaanan\Desktop\RCNN\MetaData\training_preds_total_V2", "rb") as f:
    actual_training_preds = pickle.load(f)
with open(r"C:\Users\kaanan\Desktop\RCNN\MetaData\validation_preds_total_V2", "rb") as f:
    actual_val_preds = pickle.load(f)

In [176]:
train_eval = gen_pred_data_eval(actual_training_preds, thresh)
val_eval = gen_pred_data_eval(actual_val_preds, thresh)
train_eval["Box Left Ratio"] =  train_eval["Boxes IOU"] / train_eval["Total Boxes"]
val_eval["Box Left Ratio"] =  val_eval["Boxes IOU"] / val_eval["Total Boxes"]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

In [177]:
combined_training_eval = df_training.merge(train_eval, on= "File Name")
combined_validation_eval = df_validation.merge(val_eval, on= "File Name")

In [178]:
x_train = combined_training_eval[['Box Num', 'Cluster Num', 'Biggest Cluster','Smallest Cluster', "Image Score", 'Total Boxes', 'Boxes IOU','Box Left Ratio']]
x_val = combined_validation_eval[['Box Num', 'Cluster Num', 'Biggest Cluster','Smallest Cluster', "Image Score", 'Total Boxes', 'Boxes IOU','Box Left Ratio']]
x_train["Box Left Ratio"] = x_train['Box Left Ratio'].fillna(0)
x_val["Box Left Ratio"] = x_val['Box Left Ratio'].fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_train["Box Left Ratio"] = x_train['Box Left Ratio'].fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_val["Box Left Ratio"] = x_val['Box Left Ratio'].fillna(0)


In [179]:
combined_training_eval["Predicted Score"] = rf_score_no_rgb.predict(x_train)
combined_validation_eval["Predicted Score"] = rf_score_no_rgb.predict(x_val)

In [180]:
actual_counts_training = combined_training_eval[["File Name", "Image Count"]].drop_duplicates()
actual_counts_validation = combined_validation_eval[["File Name", "Image Count"]].drop_duplicates()

In [181]:
def get_sub_image_counts(df, preds, thresh):
    counts = []
    for i in range(df.shape[0]):
        row = df.iloc[i, :]
        file_name = row["File Name"]
        index = row["Index"]
        pred_score = row["Predicted Score"]
        counts.append(len(decode_prediction(preds[file_name][index], pred_score, thresh)))
    return counts


In [182]:
combined_training_eval["Pred Counts"] = get_sub_image_counts(combined_training_eval, actual_training_preds, thresh)
combined_validation_eval["Pred Counts"] = get_sub_image_counts(combined_validation_eval, actual_val_preds, thresh)

In [183]:
training_counts = combined_training_eval.groupby("File Name").sum().reset_index()[["File Name", "Pred Counts"]].merge(actual_counts_training, on="File Name")
training_counts["Count Diff"] = abs(training_counts["Pred Counts"] - training_counts["Image Count"])
training_counts["Percent Diff"] = (training_counts["Count Diff"] / training_counts["Image Count"]).fillna(0)
training_counts["Percent Diff"].mean()

0.7085822683527391

In [184]:
val_counts = combined_validation_eval.groupby("File Name").sum().reset_index()[["File Name", "Pred Counts"]].merge(actual_counts_validation, on="File Name")
val_counts["Count Diff"] = abs(val_counts["Pred Counts"] - val_counts["Image Count"])
val_counts["Percent Diff"] = (val_counts["Count Diff"] / val_counts["Image Count"]).fillna(0)
val_counts["Percent Diff"].mean()

0.8786181077296269