This notebook analyses all of a film's frames and detects and classifies the animated faces using the trained detection and classification models. Film frames are sources from the website Animation Screencaps.

To use Google Colab change the domain from 'github.com' to 'githubtocolab.com'. The notebook will open in Colab.

In [None]:
!pip install torch==1.10.1+cu111 torchvision==0.11.2+cu111 torchaudio==0.10.1 -f https://download.pytorch.org/whl/torch_stable.html
!pip install pyyaml==5.1  > /dev/null
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.10/index.html

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Films
!unzip "/content/drive/MyDrive/machine_learning/Films/*.zip" > /dev/null
# Detector
!unzip /content/drive/MyDrive/machine_learning/Analysis/Detection_Test/Best_Detector.zip > /dev/null
# Classifier
!unzip /content/drive/MyDrive/machine_learning/Analysis/Classification_Train_Test/Best_Classifier.zip > /dev/null

In [None]:
import glob as gb
import cv2
import pandas as pd
import os
import json
import itertools
import numpy as np
import matplotlib.pyplot as plt
import shutil

import detectron2
from detectron2.utils.logger import setup_logger
from detectron2.structures import BoxMode
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader, DatasetCatalog, MetadataCatalog

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
# from torchsummary import summary

In [None]:
film_list = []
for f in gb.glob('/content/[0-9]*'):
    film_list.append(f)
print(len(film_list), film_list[0])

1 /content/8-ralphbreaks


In [None]:
mapping = {
    # Sequels
    "0-rescuers-down-under": "rescuers-2",
    "0-toy-story3": "toystory-3",
    "1-cars2": "cars-2",
    "3-monsters-university": "monstersinc-2",
    "6-finding-dory": "nemo-2",
    "7-cars3": "cars-3",
    "8-incredibles2": "incredibles-2",
    "8-ralphbreaks": "wreckitralph-2",
    "9-frozen2": "frozen-2",
    "9-toy-story2": "toystory-2",
    "9-toystory4": "toystory-4"
}

In [None]:
def get_board_dicts(dir):
    # Load the instances
    json_file = f"{dir}/instances.json"
    with open(json_file) as f:
        dataset_dicts = json.load(f)
    for i in dataset_dicts:
      fname = i["file_name"]
      i["file_name"] = f"{DATASET_PATH}/{fname}"
      # for j in i["annotations"]:
      #     j["bbox_mode"] = BoxMode.XYWH_ABS
      #     j["category_id"] = int(0)
      #     j["bbox"] = list(map(int, j["bbox"])) #something wrong here?
    return dataset_dicts

def register_datasets():
    for d in ["inference"]:
      DatasetCatalog.register(f"{FILM_NAME}", lambda d=d: get_board_dicts(f"{DATASET_PATH}")) 
      MetadataCatalog.get(f"{FILM_NAME}").set(thing_classes=["Face"]) # just "Face" if detection
      return

In [None]:
def get_matches(id, predictions):
  matches = [x for x in predictions if x["image_id"] == id]
  return matches

def lowest_conf_pred(pair):
  if pair[1]["score"] <= pair[0]["score"]:
    return pair[1]["image_id"], pair[1]["bbox"][0] # no id value to return :(
  else:
    return pair[0]["image_id"], pair[0]["bbox"][0]

def check_intersection(pairs):
  # Store prediction ids that are duplicate detections in image
  preds_to_remove = []
  for pair in pairs:
    # Calculate area of each bbox
    a_area = pair[0]["bbox"][2] * pair[0]["bbox"][3]
    b_area = pair[1]["bbox"][2] * pair[1]["bbox"][3]
    # Calculate intersection overlap
    # Bounding box format is [top-left-x, top-left-y, width, height]
    a_x1 = pair[0]["bbox"][0]
    a_y1 = pair[0]["bbox"][1]
    a_x2 = pair[0]["bbox"][0] + pair[0]["bbox"][2]
    a_y2 = pair[0]["bbox"][1] + pair[0]["bbox"][3]
    b_x1 = pair[1]["bbox"][0]
    b_y1 = pair[1]["bbox"][1]
    b_x2 = pair[1]["bbox"][0] + pair[1]["bbox"][2]
    b_y2 = pair[1]["bbox"][1] + pair[1]["bbox"][3]
    dx = min(a_x2, b_x2) - max(a_x1, b_x1)
    dy = min(a_y2, b_y2) - max(a_y1, b_y1)
    if (dx>=0) and (dy>=0):
      intersection = dx*dy
      # Is the intersection of either bbox area greater than 50%?
      a_overlap = intersection / a_area
      b_overlap = intersection / b_area
      if a_overlap >= 0.5 or b_overlap >= 0.5:
        image_id, bbox_x1 = lowest_conf_pred(pair)
        markers = (image_id, bbox_x1)
        preds_to_remove.append(markers)
    else:
      #print("No intersection")
      continue
  return(preds_to_remove)

In [None]:
for film in film_list:
  FILM_NAME = film.split("/")[-1]
  DATASET_PATH = f"/content/{FILM_NAME}/full"
  PRED_PATH = "/content/out"

  # Initialise dataframe for storing the output json
  output = pd.DataFrame(columns=["file_name","height","width","annotations"])
  output["annotations"] = output["annotations"].astype('object')
  # Loop through files
  fnames = [os.path.basename(f) for f in gb.glob(f"{DATASET_PATH}/*.jpg")]
  dims = cv2.imread(f"{DATASET_PATH}/{fnames[0]}").shape[:2]   # load first image to get dimensions
  for fname in fnames:
    # Entry for individual image and append to result
    res = pd.DataFrame(columns=["file_name","height","width","annotations"])
    res.at[0,"file_name"] = fname
    res.at[0,"height"] = dims[0]
    res.at[0,"width"] = dims[1]
    res["annotations"] = res["annotations"].astype('object')
    res.at[0,"annotations"] = []
    output = output.append(res)
    output.reset_index(drop=True,inplace=True)
  output.reset_index(inplace=True)
  output.rename(columns={"index": "image_id"}, inplace=True)
  output.to_json(f"{DATASET_PATH}/instances.json", orient="records")
  output.to_json(f"instances.json", orient="records") # copy saved in /content/ for easier debugging
  # Register dataset
  register_datasets()

  # Detect faces
  setup_logger()
  cfg = get_cfg()
  cfg.merge_from_file("output/config.yaml")
  cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set threshold for this model
  cfg.MODEL.WEIGHTS = "output/model_final.pth"
  cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # "Face"
  predictor = DefaultPredictor(cfg)
  evaluator = COCOEvaluator(f"{FILM_NAME}", cfg, False, output_dir="./out/")
  test_loader = build_detection_test_loader(cfg, f"{FILM_NAME}")
  inference_on_dataset(predictor.model, test_loader, evaluator)

  # Remove duplicate detections
  with open(f"{PRED_PATH}/coco_instances_results.json") as f:
    predictions = json.load(f)
  ids = [x["image_id"] for x in predictions]
  unique_ids = list(set(ids))
  complete_list = []
  for id in unique_ids:
    # Get all predictions from 1 image
    matches = get_matches(id, predictions)
    # Sort the boxes from highest to lowest conf score
    matches.sort(key=lambda x: x["score"], reverse=True)
    # Create list with all unique pair-wise permutations
    pair_order_list = itertools.combinations(matches,2)
    pairs = list(pair_order_list)
    num_pairs = len(pairs)
    # Skip if only one prediction in image
    if num_pairs == 0:
      continue
    # Get predictions ids that are duplicate detections
    preds_to_remove = check_intersection(pairs)
    if len(preds_to_remove) > 0:
      # Add ids to remove
      complete_list.extend(preds_to_remove)
  unique_complete_list = list(set(complete_list))
  # Update predictions dict
  new_predictions = []
  for x in predictions:
    if (x["image_id"], x["bbox"][0]) not in unique_complete_list:
      new_predictions.append(x)
  print(f"Number of predictions remaining: {len(new_predictions)}/{len(predictions)}")
  # Save file
  os.rename(f"{PRED_PATH}/coco_instances_results.json", f"{PRED_PATH}/coco_instances_results_original.json")
  with open(f"{PRED_PATH}/coco_instances_results.json", 'w') as fp:
    json.dump(new_predictions, fp)

  # Crop faces
  predictions = pd.read_json(f"{PRED_PATH}/coco_instances_results.json") # Image_id comes from this file! Missing image_id's mean that no faces were detected in that image
  new = predictions.groupby(['image_id', 'category_id'], as_index = False).agg({'bbox': list, 'score': list})
  dataset_dicts = detectron2.data.datasets.load_coco_json(f"{PRED_PATH}/{FILM_NAME}_coco_format.json", f"{PRED_PATH}")
  for i in dataset_dicts:
      path = i["file_name"]
      fname = path.split("/")[-1]
      i["file_name"] = f"{DATASET_PATH}/{fname}" 
  img_ids = pd.DataFrame.from_dict(dataset_dicts)
  df = new.merge(img_ids, how="left")
  # Save to target folder
  DST_PATH = f"out/portraits"
  os.makedirs(f"{DST_PATH}", exist_ok=True)
  for index, row in df.iterrows():
    path = row["file_name"]
    fname = path.split("/")[-1]
    im_h = row["height"]
    im_w = row["width"]
    img = cv2.imread(path)
    for index, j in enumerate(row["bbox"]):
      bbox = j
      x1, y1, w, h = list(map(int, bbox)) # convert float annots to int
      # Increase the bbox size while ensuring it remains within image coordinates
      new_x1 = max(x1 - int(w*0.25), 0)
      new_y1 = max(y1 - int(h*0.5), 0)
      new_w = min(w + int(w*0.5), im_w)
      new_h = min(h + int(h*0.75), im_h)
      new_fname = fname.split(".jpg")[0] + "-" + str(index) + ".jpg"
      dst_path = f"{DST_PATH}/{new_fname}"
      #crop image
      crop_img = img[new_y1:new_y1+new_h, new_x1:new_x1+new_w]
      cv2.imwrite(dst_path, crop_img)
  num_imgs = len([name for name in os.listdir(f"/content/{DST_PATH}")])
  print(f"Done! Saved {num_imgs}")

  # Classify gender
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  # Data transforms for inference
  data_transforms = { 
      'inference': transforms.Compose([ transforms.Resize((224, 224)),
                                      transforms.ToTensor(),
                                      # normalization is necessary for evaluation, because otherwise the model sees unexpecetd pixel colours
                                      transforms.Normalize([0.485, 0.456, 0.406], #ImageNet Mean
                                                            [0.229, 0.224, 0.225]) #ImageNet StdDev
      ])
  }
  # Create dataloader
  film_names = [FILM_NAME]
  image_datasets = {x: datasets.ImageFolder("out", data_transforms['inference']) for x in film_names}
  dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=1, shuffle=False, num_workers=2) for x in film_names}  # batch size 1 so can get file names correctly
  dataset_sizes = {x: len(image_datasets[x]) for x in film_names}
  # Class names
  class_names = ['female','male']
  # Load model
  model_name = 'model_best.pt'
  path_to_model = f'/content/output/{model_name}'
  model = torch.load(path_to_model)
  model.to(device)
  # Create temp dataframe to store predictions
  gender_preds = pd.DataFrame(columns=["portrait_file_name", "file_name", "portrait_id", "category_id"])
  # Make gender predictions and populate "category_id" with gender predictions
  with torch.no_grad():
    for x in film_names:
      for i, (inputs, labels) in enumerate(dataloaders[x]):
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        path, _ = dataloaders[x].dataset.samples[i]
        fname = path.split('/')[-1]
        extension = fname.split('-')[-1]
        portrait_id = extension.split('.')[0]
        new_filename = f"{DATASET_PATH}/" + fname.split(f"-{extension}")[0] + ".jpg"
        for j in range(inputs.size()[0]):
          category_id = class_names[preds[j]]
        row = pd.DataFrame(
            [[path, new_filename, portrait_id, category_id]],
            columns=['portrait_file_name', 'file_name', 'portrait_id', 'category_id']
            )
        gender_preds = pd.concat([gender_preds, row])
      gender_preds.reset_index(drop=True, inplace=True)

  # Merge the dataframes "gender_preds" and "df"
  gender_preds = gender_preds.sort_values(by=['portrait_file_name']) # Sort path in descending order
  # Group the rows
  grouped_portraits = gender_preds.groupby(['file_name'], as_index = False).agg({'portrait_file_name': list, 'portrait_id': list, 'category_id': list})
  # Drop category_id column - it will be replaced by the one in gender_preds
  df = df.drop(['category_id'], axis=1)
  # Merge
  df_predictions = df.merge(grouped_portraits, how="left")
  df_predictions.to_csv(f'out/{FILM_NAME}_predictions.csv', index=False)

  # Save to drive
  shutil.make_archive(f"/content/drive/MyDrive/machine_learning/Analysis/Films/{mapping[FILM_NAME]}", 'zip', "./out")

  # Rename out dir
  os.rename("out", f"out-{mapping[FILM_NAME]}")

[32m[09/22 11:19:46 d2.evaluation.coco_evaluation]: [0mTrying to convert '8-ralphbreaks' to COCO format ...
[32m[09/22 11:19:46 d2.data.datasets.coco]: [0mConverting annotations of dataset '8-ralphbreaks' to COCO format ...)
[32m[09/22 11:19:46 d2.data.datasets.coco]: [0mConverting dataset dicts into COCO format
[32m[09/22 11:19:46 d2.data.datasets.coco]: [0mConversion finished, #images: 11981, #annotations: 0
[32m[09/22 11:19:46 d2.data.datasets.coco]: [0mCaching COCO format annotations at './out/8-ralphbreaks_coco_format.json' ...
[32m[09/22 11:19:47 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|    Face    | 0            |
|            |              |[0m
[32m[09/22 11:19:47 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[09/22 11:19:47 d2.data.common]: [0mS

  max_size = (max_size + (stride - 1)) // stride * stride
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[09/22 11:19:49 d2.evaluation.evaluator]: [0mInference done 11/11981. Dataloading: 0.0011 s/iter. Inference: 0.1205 s/iter. Eval: 0.0002 s/iter. Total: 0.1218 s/iter. ETA=0:24:17
[32m[09/22 11:19:54 d2.evaluation.evaluator]: [0mInference done 52/11981. Dataloading: 0.0013 s/iter. Inference: 0.1219 s/iter. Eval: 0.0002 s/iter. Total: 0.1235 s/iter. ETA=0:24:33
[32m[09/22 11:19:59 d2.evaluation.evaluator]: [0mInference done 92/11981. Dataloading: 0.0013 s/iter. Inference: 0.1226 s/iter. Eval: 0.0002 s/iter. Total: 0.1242 s/iter. ETA=0:24:36
[32m[09/22 11:20:04 d2.evaluation.evaluator]: [0mInference done 132/11981. Dataloading: 0.0014 s/iter. Inference: 0.1232 s/iter. Eval: 0.0002 s/iter. Total: 0.1249 s/iter. ETA=0:24:39
[32m[09/22 11:20:09 d2.evaluation.evaluator]: [0mInference done 172/11981. Dataloading: 0.0014 s/iter. Inference: 0.1237 s/iter. Eval: 0.0002 s/iter. Total: 0.1254 s/iter. ETA=0:24:40
[32m[09/22 11:20:14 d2.evaluation.evaluator]: [0mInference done 212/119