# This notebook uses the spectrograms in the "data/" directory (and a trained model from the S3 Bucket) to assess model performance and predict the locations of vocalizations in WAV files. These predictions are stored in TXT files (which are formatted as Raven Selection Tables).

#### Change the name of the S3 Bucket (wherever it appears in the code) to reflect the name of the S3 Bucket in your AWS Account.

#### Do not move any import statements to different code chunks. The code seems to break if the import statements are moved around.

## Import Statements

The first set of import statements (others will be introduced as they are needed)

In [None]:
# Package Imports

import math
import pandas as pd
import numpy as np

import warnings
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from scipy.io import wavfile
from collections import OrderedDict
from tqdm import tqdm
import pickle
import json
import glob
import os
from os import path
import boto3
from PIL import Image
import json

import re
import json
import sagemaker
import glob
from sagemaker import get_execution_role, session
from scipy.io import wavfile

## Specifications

Specifies the paths to the directories in the S3 Bucket which contain models and training jobs, respectively.

In [None]:
# Name of the S3 Bucket where model artifacts are located
    # NOTE: Change "bucket_name" to the name of the S3 Bucket for your AWS account.
    # (Note that part of the current name has been redacted for privacy.)
bucket_name = "sagemaker-us-..."

# Files and Directories
MODEL_OUTPUT_URL = f"s3://{bucket_name}/output"
BASE_LOG_NAME = "/aws/sagemaker/TrainingJobs"

Specifies the "default" values for the post-processing of predictions

In [None]:
# DEFAULT VALUES FOR POST-PROCESSING (Feel Free to Modify)
DEFAULT_OVERLAP_THRESH = 0
DEFAULT_CONF_TYPE = "Relative"
DEFAULT_CONF_THRESH = 85
DEFAULT_FREQ_LIM = None
DEFAULT_IoU_THRESH = 0.2
DEFAULT_DESIRED_CLASSES = ["hb"]

Maps class names to their class numbers (and vice versa) for model predictions

In [None]:
# CLASS ID MAPPING for Model's Predictions (Make sure it reflects the data that the model was trained on)
PRED_CLASS_ID_MAP = {"blank": 0, "hb": 1,"kw": 2, "rf": 3, "sl": 4}
PRED_CLASS_NAME_MAP = {0: "blank", 1: "hb", 2: "kw", 3: "rf", 4: "sl"}

Maps class names to their class numbers (and vice versa) for hand annotations

In [None]:
# CLASS ID MAPPING for hand-annotations (Make sure it reflects the data in the "data/" directory)
HAND_CLASS_ID_MAP = {"blank": 0, "hb": 1, "kw": 2, "rf": 3, "sl": 4}
HAND_CLASS_NAME_MAP = {0: "blank", 1: "hb", 2: "kw", 3: "rf", 4: "sl"}

## Setup

### Models and Endpoints

Allows this notebook to properly interface with the rest of SageMaker and access the S3 Bucket.

In [None]:
# Establishes the SageMaker session and determines the region in which the session is occurring
sm_session = sagemaker.Session()
region = sm_session.boto_region_name

# Specifies the execution role to establish permissions for access purposes
role = get_execution_role()

# Obtains the necessary information in order to invoke the model endpoint later
runtime = boto3.client(service_name="runtime.sagemaker")

Defines a function that can obtain a trained model from the S3 Bucket (assuming the corresponding training job has finished training the model).

In [None]:
from sagemaker.model import Model
from sagemaker import image_uris

def get_model_from_output(model_output_path):
    """
    Function to obtain a model's artifacts (i.e., the file containing the information needed to use the model).
    
    PARAMETERS
    ----------
        model_output_path: string
            The directory corresponding to the name of the model's training job (see the initalization of "model_url" for more details).
    ----------
    
    RETURNS
    ----------
        model: "Model" object
            "A SageMaker Model that can be deployed to an Endpoint"
                (https://sagemaker.readthedocs.io/en/stable/api/inference/model.html)
    ----------
    """
    model_url = f"s3://{bucket_name}/output/{model_output_path}/output/model.tar.gz"
    # Retrieves the URI to the object detection docker image
    image_uri = image_uris.retrieve(
        region=sm_session.boto_region_name, framework="object-detection", version="latest")

    model = Model(image_uri=image_uri, model_data=model_url, role=role)
    return model

Defines a function that can delete a model endpoint.
This function must be called once you are done using the model endpoint to avoid excessive charges to the AWS account.

In [None]:
def delete_model(model):
    """
    Function to delete model endpoint.
    RUN THIS AS SOON AS YOU ARE DONE USING THE ENDPOINT TO AVOID ADDITIONAL CHARGES TO THE AWS ACCOUNT!
    
    PARAMETERS
    ----------
        model: "Model" object
            A SageMaker model currently deployed to a SageMaker endpoint.
    ----------
    
    RETURNS
    ----------
        N/A
    ----------
    """
    print(model)
    print(f"endpoint_name - {model.endpoint_name}")
    cur_name = model.endpoint_name
    # Causes error for unknown reason
    #cur_name = str(model.endpoint_name)
    endpoint_config_name = re.sub("endpoint/", "endpoint-config/", cur_name)
    
    sagemaker.Session().delete_endpoint_config(model.endpoint_name)
    sagemaker.Session().delete_endpoint(model.endpoint_name)

Defines a function that can create a SageMaker endpoint to host and use the model. It also deploys the model to that endpoint.

In [None]:
from sagemaker.model_monitor import DataCaptureConfig

def deploy_model(model, endpoint_name):
    """
    Deploys the model to a SageMaker endpoint with the name specified by "endpoint_name".
    Takes roughly 10 minutes to fully create the endpoint from a notebook instance using the "ml.t3.medium" instance type.
    
    PARAMETERS
    ----------
        model: "Model" object
            A SageMaker model created by "get_model_from_output()".
        endpoint_name: string
            The name of the endpoint you would like to create.
    ----------
    
    RETURNS
    ----------
        predictor: "callable[string, sagemaker.session.Session] or None"
            (https://sagemaker.readthedocs.io/en/stable/api/inference/model.html)
    ----------
    """
    print("EndpointName={}".format(endpoint_name))
    
    try:
        data_capture_config = DataCaptureConfig(
        enable_capture=True, sampling_percentage=100, destination_s3_uri=MODEL_OUTPUT_URL)

        predictor = model.deploy(
            initial_instance_count=1,
            instance_type="ml.m4.xlarge",
            endpoint_name=endpoint_name,
            data_capture_config=data_capture_config,
        )
    except Exception as e:
        print(e)
        delete_model(model)
        return deploy_model()
    return predictor

The second set of import statements

In [None]:
import boto3
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

%matplotlib inline

Defines a function that can print information about an endpoint's predictor.

In [None]:
from sagemaker.predictor import Predictor
from sagemaker.serializers import CSVSerializer
import time

def get_predictor(endpoint_name):
    """
    Gets the predictor based on the name of the created endpoint.
        NOTE: This function does not serve a critical purpose. It exists only so we can print and observe the predictor.
    
    PARAMETERS
    ----------
        endpoint_name: string
            The name of an endpoint that currently exists.
    ----------
    
    RETURNS
    ----------
        predictor: "Predictor" object
            (https://sagemaker.readthedocs.io/en/stable/api/inference/predictors.html)
    ----------
    """
    predictor = Predictor(endpoint_name=endpoint_name, serializer=CSVSerializer())
    return predictor

### IoU Calculations

Defines and tests a function that can calculate the IoU (Intersection over Union) between two bounding boxes.

In [None]:
def get_iou(bb1, bb2):
    """
    Calculate the Intersection over Union (IoU) of two bounding boxes.

    PARAMETERS
    ----------
    bb1 : dict
        Keys: {'x1', 'x2', 'y1', 'y2'}
        The (x1, y1) position is at the top left corner,
        the (x2, y2) position is at the bottom right corner
    bb2 : dict
        Keys: {'x1', 'x2', 'y1', 'y2'}
        The (x, y) position is at the top left corner,
        the (x2, y2) position is at the bottom right corner
    ----------

    RETURNS
    ----------
    float
        in [0, 1]
    ----------
    """
    assert bb1['x1'] <= bb1['x2']
    assert bb1['y1'] <= bb1['y2']
    assert bb2['x1'] <= bb2['x2']
    assert bb2['y1'] <= bb2['y2']

    # determine the coordinates of the intersection rectangle
    x_left = max(bb1['x1'], bb2['x1'])
    y_top = max(bb1['y1'], bb2['y1'])
    x_right = min(bb1['x2'], bb2['x2'])
    y_bottom = min(bb1['y2'], bb2['y2'])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    # The intersection of two axis-aligned bounding boxes is always an
    # axis-aligned bounding box
    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # compute the area of both AABBs
    bb1_area = (bb1['x2'] - bb1['x1']) * (bb1['y2'] - bb1['y1'])
    bb2_area = (bb2['x2'] - bb2['x1']) * (bb2['y2'] - bb2['y1'])

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    #print(bb1, bb2)
    try:
        iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    except ZeroDivisionError:
        print("Two bounding boxes with a nonpositive dimension found. Treating IoU as 0.")
        iou = 0
    assert iou >= 0.0
    assert iou <= 1.0
    return iou

In [None]:
# Tests get_iou() function
box1 = {"x1": 0.1, "x2": 0.2, "y1": 0.2, "y2": 0.8}
box2 = {"x1": 0.1, "x2": 0.3, "y1": 0.2, "y2": 0.7}
get_iou(box1, box2)

### Post-Processing Strategies (for Filtering Predictions)

Defines a function that can apply NMS (Non=Maximum Suppression) to a list of model predictions.

In [None]:
def nms_pred_filter(preds, nms_thresh = DEFAULT_OVERLAP_THRESH):
    """
    Filters out all but the highest-confidence predicted bounding box in cases where multiple predicted boxes for the same class overlap 
    (using Non-Maximum Suppression Algorithm) to improve overall quality of predictions.
        *SOURCE: https://towardsdatascience.com/non-maximum-suppression-nms-93ce178e177c
        
    PARAMETERS
    ----------
        preds: list of lists
            Contains predicted boxes (sorted by confidence score)
            (Note that each predicted box includes information on its confidence score, so we 
                do not need a separate list to hold the confidence scores.)
                    
        nms_thresh: float
            Represents the maximum overlap predicted boxes will be "allowed" to have before NMS is applied.
    ----------
        
    RETURNS
    ----------
        filt_preds: list of lists
            Contains predicted boxes (sorted by confidence score) after applying the NMS threshold
    ----------
    """
    # Handles situation where the NMS Threshold is set to None
    if nms_thresh is None:
        return preds
    
    # Implements NMS Algorithm
    filt_preds = []
    while len(preds) > 0:
        bad_preds = []
        # Step 1:
        max_conf_score_pred = preds.pop(0)
        filt_preds.append(max_conf_score_pred)
        # Step 2:
        klass_max, score_max, x0_max, y0_max, x1_max, y1_max = max_conf_score_pred
        box_max = {"x1": x0_max, "x2": x1_max, "y1": y0_max,"y2": y1_max}
        for i, predBox in enumerate(preds):
            klass, score, x0, y0, x1, y1 = predBox
            box = {"x1": x0, "x2": x1, "y1": y0,"y2": y1}
            iou = get_iou(box_max, box)

            if iou > nms_thresh and klass_max == klass:
                bad_preds.append(predBox)
                
        for i, predBox in enumerate(bad_preds):
            # "Normal" NMS Technique
            preds.remove(predBox)
            # "Soft-NMS" Technique
            #preds[i][1] = preds[i][1]*(1-iou)
                
    return filt_preds

Defines a function that can apply a relative confidence threshold to a list of model predictions.

In [None]:
def rel_conf_subset(preds, pctl):
    """
    Implements relative confidence threshold, based on the percentile (with respect to confidence score) 
    of predicted boxes to keep.
    
    PARAMETERS
    ----------
        preds: list of lists
            Contains predicted boxes (sorted by confidence score)
            
        pctl: float
            Percentile (based on confidence score) of predicted boxes to keep
            (Example: "pctl = 90" refers to the set of predicted bounding boxes with confidence scores in the 90th percentile)
    ----------
    
    RETURNS
    ----------
        preds: list of lists
            Contains predicted boxes (sorted by confidence score) after applying the relative confidence threshold
    ----------
    """
    # Handles situation where the Relative Confidence Threshold is set to None
    if pctl is None:
        return preds
    
    # Discards all but the top "100-pctl" percent of predictions (with respect to their confidence score)
    toTake = len(preds)*(1-pctl/100)
    preds = preds[:int(toTake)]
    return preds

Defines a function that can apply a frequency limit to a model's humpback whale predictions.

In [None]:
def limit_box_frequency(preds, freq_cutoff = DEFAULT_FREQ_LIM):
    """
    Removes predicted bounding boxes (for humpback whales) that exclusively exist above a specific frequency (in Hz).
    (Example: If freq_cutoff is 1000 Hz, every box that exclusively exists above 1000 Hz on its spectrogram is removed 
        from the list of predictions.)
    
    PARAMETERS
    ----------
        preds: list of lists
            Contains predicted boxes (sorted by confidence score)
            
        freq_cutoff: float
            Upper limit for a box's lower frequency bound (in Hz)
            (Note that, if the box's lower frequency bound is above a specific frequency, the entire box is located above that frequency.)
    ----------
            
    RETURNS
    ----------
        filt_preds: list of lists
            Contains predicted boxes (sorted by confidence score) after applying the frequency limit
    ----------
    """
    # Handles situation where the Frequency Limit is set to None
    if freq_cutoff is None:
        return preds
    
    # This is the y-axis's maximum frequency on the spectrograms.
        # NOTE: This number is related to the value specified for "FREQUENCY_MAX" in "ConvertWavToSpec.ipynb".
            # For example, SPEC_MAX_FREQ = 1455.749312 when "FREQUENCY_MAX" = 1600
            # See the comments within "convert_pred_spec_coords_to_wav_coords()" for more details on how to determine this value.
    SPEC_MAX_FREQ = 1455.749312
    # Converts the frequency cutoff value into a form that can be compared to the coordinates of a bounding box
        # *NOTE: Subtraction occurs because a y-coordinate of 0 reflects the top of the spectrogram (while a y-coordinate of 1 reflects the bottom)
    freq_cutoff = 1 - freq_cutoff/SPEC_MAX_FREQ
    filt_preds = []
    for i, predBox in enumerate(preds):
        klass, score, x0, y0, x1, y1 = predBox
        # "Less than" is used because a y-coordinate of 0 reflects the top of the spectrogram (while a y-coordinate of 1 reflects the bottom)
        if y1 < freq_cutoff and klass == PRED_CLASS_ID_MAP["hb"]:
            continue
        # Adds predicted boxes that are not "too high" to the filtered list of predictions
        filt_preds.append(predBox)
        
    return filt_preds

Defines an experimental function that can combine overlapping model predictions for the same species into one big prediction.

In [None]:
def combine_overlaps(preds, overlap_thresh = None):
    """
    NOTE: Code has not been tested in a while (writing this message on 7/23/22). Be wary of using it.
    
    EXPERIMENTAL STRATEGY: Combines two bounding boxes into one big box (if they exceed the overlap threshold) and keeps the 
    higher of the two confidence scores.
        *NOTE: This does not seem to improve performance, so I do not recommend using this as one of the post-processing steps.
    
    PARAMETERS
    ----------
        preds: sorted list of predicted boxes (sorted by confidence score)
            *Note that each predicted box includes information on its confidence score, so we do not need a separate list to 
            hold the confidence scores.
            
        overlap_thresh: a float that represents the maximum overlap predicted boxes will be "allowed" to have
    ----------
    
    RETURNS
    ----------
        filt_preds: list of lists
            Contains predicted boxes (sorted by confidence score) after applying the overlap threshold
    ----------
    """
    # Handles situation where the Overlap Threshold is set to None
    if overlap_thresh is None:
        return preds
    
    filt_preds = []
    while len(preds) > 0:
        bad_preds = []
        # Takes prediction with maximum confidence and removes it from "preds"
        max_conf_score_pred = preds.pop(0)
        # Extracts important information from the maximum-confidence prediction
        klass_max, score_max, x0_max, y0_max, x1_max, y1_max = max_conf_score_pred
        # Reformats the bounding box information to be compatible with get_iou()
        box_max = {"x1": x0_max, "x2": x1_max, "y1": y0_max, "y2": y1_max}
        # Iterates through all remaining predictions in "preds"
        for i, predBox in enumerate(preds):
            # Extracts and reformats same information as before, but for a predicted box that remains in "preds"
            klass, score, x0, y0, x1, y1 = predBox
            box = {"x1": x0, "x2": x1, "y1": y0, "y2": y1}
            # Calculates IoU between the max-confidence box and a box currently in "preds"
            iou = get_iou(box_max, box)
            # If the iou score between the two boxes exceeds the overlap threshold, combines the two boxes and keeps the higher confidence score
            if iou > overlap_thresh and klass_max == klass:
                # Adds lower-confidence box to list of "bad" predictions to be deleted from "preds" later
                bad_preds.append(predBox)
                # Boxes are combined into one big box by updating coordinates
                new_x0 = min(box_max['x1'], box['x1'])
                new_x1 = max(box_max['x2'], box['x2'])
                new_y0 = min(box_max['y1'], box['y1'])
                new_y1 = max(box_max['y2'], box['y2'])
                # The higher-confidence prediction gets these new coordinates
                max_conf_score_pred[2] = new_x0
                max_conf_score_pred[3] = new_y0
                max_conf_score_pred[4] = new_x1
                max_conf_score_pred[5] = new_y1
                # Updates the bounding box information for the higher-confidence prediction so that IoU continues to be properly calculated.
                box_max = {"x1": new_x0, "x2": new_x1, "y1": new_y0, "y2": new_y1}
        # Bad predictions are removed from "preds"
        for i, predBox in enumerate(bad_preds):
            preds.remove(predBox)
        # The combined box, now with the highest confidence out of the original overlapping boxes, is added to the filtered list of predictions
        filt_preds.append(max_conf_score_pred)
    return filt_preds

Defines the function that filters model predictions using post-processing strategies and the list of desired classes.

In [None]:
def filter_predictions(preds, overlap_threshold = DEFAULT_OVERLAP_THRESH, conf_type = DEFAULT_CONF_TYPE, 
                       conf_threshold = DEFAULT_CONF_THRESH, FREQ_LIM = DEFAULT_FREQ_LIM, desired_classes = DEFAULT_DESIRED_CLASSES):
    """
    Applies various "post-processing" strategies to remove "bad" predictions from the list of predicted bounding boxes.
    
    PARAMETERS
    ----------
        preds: list of lists
            Contains predicted boxes (sorted by confidence score)
            
        overlap_threshold: float
            One of the "post-processing" parameters (i.e., parameters that help remove "bad" predictions)
            
        conf_type: string
            Specifies which type of "confidence threshold" with be used in post-processing
            
        conf_threshold: float
            One of the "post-processing" parameters
            
        FREQ_LIM: float
            One of the "post-processing" parameters
            
        desired_classes: list of strings 
            Specifies class names whose predictions should be returned
    ----------
    
    RETURNS
    ----------
        preds: list of lists
            Contains predicted boxes (sorted by confidence score) for the desired classes after applying all post-processing steps
    ----------
    """
    # Obtains only the desired predictions
    old_preds = preds
    preds = []
    for i, predBox in enumerate(old_preds):
        klass, score, x0, y0, x1, y1 = predBox
        # Uses list of desired classes
        for i in desired_classes:
            if klass == PRED_CLASS_ID_MAP[i]:
                preds.append(predBox)

                
    # Implements relative confidence threshold, based on the percentile (with respect to confidence score) of predicted boxes to keep
    if conf_type == "Relative":
        preds = rel_conf_subset(preds, conf_threshold)
    # Implements Absolute Confidence Threshold (not desirable compared to relative confidence threshold)
    #if conf_type == "Absolute":
        #preds = [preds[i] for i in range(len(preds)) if preds[i][1] >= conf_threshold]
    
    # Filters out "bad" predictions using Non-Maximum Suppression (NMS)
    preds = nms_pred_filter(preds, overlap_threshold)
    
    # Removes predicted bounding boxes that exclusively exist above a specific frequency (in Hz)
    #preds = limit_box_frequency(preds, FREQ_LIM)
    
    # Combines overlapping predicted boxes into one big box
    #preds = combine_overlaps(preds, 0)
    
    return preds

### Using an endpoint to make predictions

Defines the function that uses the model endpoint to make predictions on a single spectrogram.

In [None]:
def get_model_bounding_boxes(spectrogram_name, overlap_threshold = DEFAULT_OVERLAP_THRESH, conf_type = DEFAULT_CONF_TYPE, 
                             conf_threshold = DEFAULT_CONF_THRESH, FREQ_LIM = DEFAULT_FREQ_LIM, desired_classes = DEFAULT_DESIRED_CLASSES):
    """
    Gets the list of model predictions (using the model's endpoint created by "deploy_model()") for a single spectrogram and 
    calls "filter_preds()".
    
    PARAMETERS
    ----------
        spectrogram_name: string
            File path to the current spectrogram (saved as a .png file) that the model should predict on
            
        overlap_threshold: float
            One of the "post-processing" parameters (i.e., parameters that help remove "bad" predictions)
            
        conf_type: string
            Specifies which type of "confidence threshold" with be used in post-processing
            
        conf_threshold: float
            One of the "post-processing" parameters
            
        FREQ_LIM: float
            One of the "post-processing" parameters
            
        desired_classes: list of strings 
            Specifies class names whose predictions you want
    ----------
        
    RETURNS
    ----------
        preds: list of lists
            Contains predicted boxes (sorted by confidence score) for the desired classes after applying all post-processing steps
    ----------
    """
    
    # Uses the model endpoint to predict where vocalizations are on the current spectrogram (and gets those predictions)
    try:
        with open(spectrogram_name, "rb") as image:
            f = image.read()
            b = bytearray(f)

            endpoint_response = runtime.invoke_endpoint(EndpointName=predictor.endpoint_name, ContentType="image/png", Body=b)
            results = endpoint_response["Body"].read()
            detections = json.loads(results)
    except Exception as e:
        print(e)
        return
    
    # Sorting predicted bounding boxes by confidence score (descending order)
    preds = sorted(detections["prediction"], key=lambda x:x[1], reverse=True)
    # Applies various post-processing strategies to remove "bad" predictions
    preds = filter_predictions(preds, 
                               overlap_threshold = overlap_threshold, 
                               conf_type = conf_type, 
                               conf_threshold = conf_threshold, 
                               FREQ_LIM = FREQ_LIM,
                               desired_classes = desired_classes)
    return preds

### Getting desired hand-annotations from a LST File

Defines the function that uses a LST file to obtain desired hand-annotations for a single spectrogram.

In [None]:
def get_hand_annotated_boxes_spec(spec_name, lst_name, desired_classes = DEFAULT_DESIRED_CLASSES):
    """
    Gets the desired hand-annotations for the relevant spectrogram (from the relevant LST file).
    
    PARAMETERS:
    ----------
        spec_name: string
            The name of the spectrogram that you want hand-annotations for (as it appears in the LST file)
        
        lst_name: string
            The name of the LST file containing the hand-annotations
        
        desired_classes: list of strings
            Contains the class names that you want hand-annotations for
    ----------
    
    RETURNS
    ----------
        temp: list of lists
            Contains the desired hand-annotations for the spectrogram specified by "spec_name"
    ----------
    """
    with open(lst_name, "r") as f:
        for line in f:
            filePath = line.split("\t")[-1]
            fileName = filePath.split("/")[-1].strip()
            
            if spec_name == fileName:
                annots = [float(el) for el in line.split("\t")[3:-1]]
                # Keeps classes number in box information
                temp = [annots[i:i+5] for i in range(0, len(annots), 5)]
                
                # FILTERS HAND ANNOTATIONS TO ONLY CONTAIN DESIRED ANNOTATIONS
                if temp is not None:
                    old_hand_annots = temp
                    temp = []
                    for i, handAnnot in enumerate(old_hand_annots):
                        klass_true, xmin, ymin, xmax, ymax = handAnnot
                        for i in desired_classes:
                            if klass_true == HAND_CLASS_ID_MAP[i]:
                                temp.append(handAnnot)    
                
                return temp

### Visualizing Predictions and Hand-Annotations

Defines the functions which allow you to visualize the hand-annotations and a model's predictions on the relevant spectrogram.

In [None]:
from matplotlib.pyplot import figure
from time import sleep
import random
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

def plot_hand_annotations(hand_annots, img_height, img_width):
    """
    Plots a spectrogram's hand-annotated boxes on the spectrogram
    
    PARAMETERS
    ----------
        hand_annots: list of lists
            Contains the hand-annotations you want to display on the spectrogram
            
        img_height: int
            The height of the image
            
        img_width: int
            The width of the image
    ----------
    
    RETURNS
    ----------
        N/A
    ----------
    """
    # Iterates through every hand-annotation in the list
    for index, annot in enumerate(hand_annots):
        # Obtains important box information and uses it to plot the hand-annotated box
        klass, x0, y0, x1, y1 = annot
        
        xmin = int(x0 * img_width)
        ymin = int(y0 * img_height)
        xmax = int(x1 * img_width)
        ymax = int(y1 * img_height)
        rect1 = plt.Rectangle(
                    (xmin, ymin),
                    xmax - xmin,
                    ymax - ymin,
                    fill=False,
                    edgecolor="red",
                    linewidth=3.5,
                    label = "Hand-Annotated"
                )
        plt.gca().add_patch(rect1)
        
        # Colors the box differently depending on which class it represents
        if klass == 0:
            col = 'blue'
        if klass == 1:
            col = 'pink'
        if klass == 2:
            col = 'green'
        if klass == 3:
            col = 'yellow'
        if klass == 4:
            col = 'black'
        
        plt.gca().text(
            xmin,
            ymin - 3,
            f"{index}",
            bbox=dict(facecolor=col, alpha=0.5),
            fontsize=10,
            color="white",
        )

def plot_model_predicted_boxes(predicted_boxes, img_height, img_width, thresh=0.2):
    """
    Plots a spectrogram's predicted boxes on the spectrogram
    
    PARAMETERS:
    ----------
        predicted_boxes: list of lists 
            Contains the predictions you want to display on the spectrogram
            
        img_height: int
            The height of the image
            
        img_width: int
            The width of the image
            
        thresh: float
            The minimum confidence score a prediction must have to be plotted.
            (NOTE: The code that implements "thresh" is currently commented out since the list of predictions 
                is typically filtered before the call to this function.)
    ----------
    
    RETURNS
    ----------
        num_detections: int
            The total number of predictions plotted on the spectrogram.
    ----------
    """
    num_detections = 0
    for index, det in enumerate(predicted_boxes):
        (klass, score, x0, y0, x1, y1) = det
        
        #if score < thresh:
            #continue
        num_detections += 1
        xmin = int(x0 * img_width)
        ymin = int(y0 * img_height)
        xmax = int(x1 * img_width)
        ymax = int(y1 * img_height)
        rect2 = plt.Rectangle(
            (xmin, ymin),
            xmax - xmin,
            ymax - ymin,
            fill=False,
            edgecolor='yellow',
            linewidth=3.5,
            label="Model-Predicted"
        )
        plt.gca().add_patch(rect2)
        plt.gca().text(
            xmin,
            ymin - 3,
            f"{index}, {round(score, 2)}",
            bbox=dict(facecolor='orange', alpha=0.5),
            fontsize=10,
            color="white",
        )
    
    return num_detections
        

def plot_image_with_boxes(filename, hand_annotated_boxes=None, model_predicted_boxes=None, threshold=0.2, pctl=80):
    """
    Plots a single spectrogram and also provides the option of displaying the bounding boxes for its annotations.
    
    PARAMETERS:
    ----------
        filename: string
            Represents the file path to the spectrogram
            
        hand-annotated_boxes: list of lists (optional)
            Contains the hand-annotations you want to display on the spectrogram 
                (or is None, if you don't want to display any hand-annotations)
                
        model_predicted_boxes: list of lists (optional) 
            Contains the predictions you want to display on the spectrogram 
                (or is None, if you don't want to display any predictions)
                
        threshold: float
            The minimum confidence score a prediction must have to be plotted.
            (NOTE: The code that implements "thresh" is currently commented out since the list of predictions 
                is typically filtered before the call to this function.)
                
        pctl: float
            Represents the percentile of predictions which are kept after applying "threshold"
            Note that, because the code for applying "threshold" is currently commented out, 
                I typically pass in a value of 0 for the percentile 
                (even if a different percentile was applied before the call to this function).
    ----------
    
    RETURNS
    ----------
        N/A
    ----------
    """
    # Reads in the spectrogram and initializes important specifications
    plt.close()
    figure(figsize=(20, 15), dpi=150)
    img = mpimg.imread(filename)
    plt.imshow(img)
    img_height = img.shape[0]
    print(img_height)
    img_width = img.shape[1]
    print(img_width)
    colors = dict()
    num_detections = 0
    classes=[]
    
    # Plots the model's predicted bounding bounding boxes on the spectrogram (if the corresponding list was passed in as a parameter)
    if model_predicted_boxes:
        num_detections = plot_model_predicted_boxes(model_predicted_boxes, img_height, img_width, threshold)
        
    # Plots the hand-annotated bounding boxes on the spectrogram (if the corresponding list was passed in as a parameter)
    if(hand_annotated_boxes):
        plot_hand_annotations(hand_annotated_boxes, img_height, img_width)
        
    # Prints important information and adds axes to the spectrogram
    handles, labels = plt.gca().get_legend_handles_labels()
    by_label = dict(zip(labels, handles))
    plt.legend(by_label.values(), by_label.keys())
    print("Number of detections: " + str(num_detections))
    
    # Adds a different title to the spectrogram depending on what is plotted
    if hand_annotated_boxes and model_predicted_boxes:
        title = f"Spectrogram {filename} \n with Hand-Annotations and Predictions"
        #title = f"Spectrogram with Predictions Above\nthe {pctl}th Percentile"
    elif hand_annotated_boxes:
        title = f"Spectrogram {filename} \n Hand-Annotated Boxes"
    else:
        title = f"Spectrogram {filename} \n"

    plt.title(title, fontdict={'fontsize':24})
    plt.show()

### Defines a helpful function that produces a list containing the file path to each spectrogram in the "data/" directory (sorted by beginning time).

In [None]:
def get_all_spec_paths(wav_fname, files = glob.glob('data/*.png')):
    """
    Given a single WAV file name, produces a sorted list containing the file path for each of its spectrograms.
        (This assumes "ConvertWavToSpec.ipynb" has already placed the WAV file's spectrograms in the "data/" directory.)
        (Furthermore, this also assumes that no audio chunks were "skipped over" during spectrogram creation.)
            (For example, if spectrograms were removed for lacking annotations, the corresponding audio chunks were "skipped over".)
    
    PARAMETERS:
    ----------
        wav_fname: string
            Denotes the "numeric" portion of the WAV file's name (e.g., wav_fname = "671658014.181008033412")
            
        files: list of strings
            An unsorted list containing all spectrogram file paths for the WAV file specified by "wav_fname"
    ----------
    
    RETURNS:
    ----------
        all_spec_paths: list of strings
            A sorted list containing all spectrogram file paths for the WAV file specified by "wav_fname"
            (Note that the spectrogram file paths are sorted by the beginning times they correspond to in the WAV file)
                (e.g., all_spec_paths = 
                    ["data/671658014.181008033412-0.png", "data/671658014.181008033412-1.png", "data/671658014.181008033412-2.png", ...])
    ----------
    """
    all_spec_paths = []
    for file in files:
        cur_name = file.split("/")[-1]
        if wav_fname in cur_name:
            cur_name = "data/" + cur_name.split("-")[0] + "-" + str(len(all_spec_paths)) + ".png"
            all_spec_paths.append(cur_name)
    return all_spec_paths

## Getting a Previously-Trained Model and Deploying it to a SageMaker Endpoint

In [None]:
# Gets model artifacts from S3 Bucket
    # Example: model_path = "cpbio-final-best-model"
model_path = "cpbio-final-best-model"
model = get_model_from_output(model_path)

In [None]:
# Specifies the name of the endpoint you want to create
endpoint_name = "modelendpoint"

In [None]:
# Creates endpoint that will be used for model evaluation
    # Note that you may encounter an error upon running this code chunk. If so, simply run this code chunk again.
"""
The endpoint takes roughly 10 minutes to create. When the creation process is complete, the output should look similar to the following:

    EndpointName=modelendpoint
    ---------------!
    
"""
deploy_model(model, endpoint_name)

#### Wait until the endpoint has been created before running any more code.

In [None]:
# Prints technical model information
model.__dict__

In [None]:
# Gets and prints the endpoint's predictor
predictor = get_predictor(endpoint_name)
predictor

## Specifying Post-Processing Parameters and Location of Hand-Annotations

This code chunk allows you to specify the post-processing parameters (and the LST file containing the hand-annotations) for any performance evaluations or prediction files you produce. Feel free to modify these values and rerun the code chunk if you want to try different values.

In [None]:
# NOTE: Use either Relative Confidence Thresholds or Absolute Confidence Thresholds, but never both.
# NOTE: I recommend keeping all but one parameter to a single value in the lists (in order to avoid excessive runtimes).

# Non-Maximum Suppression (NMS) Thresholds (list)
overlap_thresholds = [0]
#overlap_thresholds = [None]

# Relative Confidence Thresholds (list)
conf_thresholds = [85]
#conf_thresholds = [None]
conf_type = "Relative"
# Absolute Confidence Thresholds (list)
#conf_thresholds = [0.225]
#conf_type = "Absolute"

# Frequency Limit, measured in Hz
FREQ_LIM = None

# IoU Threshold
iou_threshold = 0.2

# Desired classes (list of strings)
    # Specify each class name you desire predictions and performance metrics for 
    # Type the name as it is written in "Allowed Classes" from ConvertWavToSpec.ipynb
#desired_classes = ["blank", "hb", "kw", "rf", "sl"]
desired_classes = ["hb"]

#LST file that you want hand-annotations from
lst_file_name = "val.lst"

#### Now, different code chunks need to be run depending on whether you want to calculate performance metrics, produce a selection table of predicted annotations for use in Raven, or both.

## Calculating Performance Metrics

Defines the function that determines the number of true positives, false positives, and false negatives between a set of predictions and a set of hand-annotations. These values are necessary to calculate precision and recall later.

In [None]:
def calculate_precision_recall(predicted_boxes, hand_annotated_boxes, iou_threshold = DEFAULT_IoU_THRESH):
    """
    Calculates the number of true positives, false positives, and false negatives for a list of predictions and a list of hand-annotations.
    
    PARAMETERS:
    ----------
        predicted_boxes: list of lists
            Contains the predictions which will be used in the performance evaluation
            
        hand_annotated_boxes: list of lists
            Contains the hand-annotations which will be used in the performance evaluation
            
        iou_threshold: float 
            Determines the minimum amount of overlap that a predicted box must have with a hand-annotated box in order to be a 
                successful prediction.
    ----------
    
    RETURNS:
    ----------
        truePositives: int
            The number of predicted boxes in "predicted_boxes" that have enough overlap with a hand-annotated box in "hand_annotated_boxes"
            (Note that two predicted boxes can have enough overlap with the same hand-annotated box, and this counts as two true positives.)
            
        falsePositives: int
            The number of predicted boxes in "predicted_boxes" that do not have enough overlap with a hand-annotated box in 
                "hand_annotated_boxes".
                
        falseNegatives: int
            The number of hand-annotated boxes in "hand_annotated_boxes" that do not have enough overlap with a predicted box in 
                "predicted_boxes".
                
        predicted_boxes: list of lists
            The final list of predicted boxes
            
        numberHandAnnotated: int
            The total number of hand-annotations within "hand_annotated_boxes"
    ----------
    """
    truePositives = 0
    falsePositives = 0
    
    # Catches the cases when there are no hand-annotations
    if hand_annotated_boxes is None:
        truePositives = 0
        falsePositives = len(predicted_boxes)
        falseNegatives = 0
        return truePositives, falsePositives, falseNegatives, predicted_boxes, 0
    
    # Creates a "notVisited" list that tracks which hand annotations have been "visited" by a prediction (and initializes each status to True)
    notVisited = [True for box in hand_annotated_boxes]
    numberHandAnnotated = len(hand_annotated_boxes)

    # Creates various lists (some of which are currently unused)
    maxIouScores = []
    boxes = []
    flags = [False for box in predicted_boxes]
    
    
    ### Original Implementation of Absolute Confidence Threshold
        ### NOTE: Currently hardcoded at 0.225; precision increases when applying the threshold elsewhere for unknown reason.
    #predicted_boxes = [predicted_boxes[i] for i in range(len(predicted_boxes)) if predicted_boxes[i][1] >= 0.225]
    
    
    ### Likely the Original Implementation of Relative Confidence Threshold
        ### NOTE: Currently hardcoded at 90th percentile
        ### NOTE: Relative Confidence Thresholds can now be applied in filter_predictions(), but the option to do it here remains.
    ###pctl = 90
    ###toTake = len(predicted_boxes)*(1-pctl/100)
    ###predicted_boxes = predicted_boxes[:int(toTake)]
    
    
    # Catches the cases when there are no predictions
    if len(predicted_boxes) == 0:
        truePositives = 0
        falsePositives = 0
        falseNegatives = numberHandAnnotated
        return truePositives, falsePositives, falseNegatives, predicted_boxes, numberHandAnnotated
    
    # Finds each prediction's "max IoU" across all hand-annotations and adds each max IoU to a list (currently serves no purpose)
    for i, predBox in enumerate(predicted_boxes):
        klass, score, x0, y0, x1, y1 = predBox

        maxIou = None
        for j, handAnnot in enumerate(hand_annotated_boxes):
            klass_true, xmin, ymin, xmax, ymax = handAnnot
            predictedBox = {"x1": x0, "x2": x1, "y1": y0,"y2": y1}
            groundTruth = {"x1": xmin, "x2": xmax,"y1": ymin,"y2": ymax}
            iouScore = get_iou(predictedBox, groundTruth)
            
            if maxIou is None or iouScore > maxIou:
                maxIou = iouScore
        maxIouScores.append(maxIou)
    
    # Calculates the number of True Positives, False Positives, and False Negatives across every prediction and hand-annotation
    # Iterates through every prediction in the list
    for i, predBox in enumerate(predicted_boxes):
        # Gets important information from the current prediction
        klass, score, x0, y0, x1, y1 = predBox
        # Initializes the current prediction's "count" status to False (meaning it does not "count" as a True Positive)
        count = False
        # Iterates through every hand-annotation in the list
        for j, handAnnot in enumerate(hand_annotated_boxes):
            # Gets important information from the current hand-annotation
            klass_true, xmin, ymin, xmax, ymax = handAnnot   
            # Properly formats the predicted box's information and hand-annotated box's information to be input for "get_iou()"
            predictedBox = {"x1": x0, "x2": x1, "y1": y0,"y2": y1}
            groundTruth = {"x1": xmin, "x2": xmax,"y1": ymin,"y2": ymax}
            # Calculates the IoU between the current predicted box and current hand-annotated box
            iouScore = get_iou(predictedBox, groundTruth)
            # Checks if the prediction and hand-annotation have "enough" overlap and belong to the same class
            if iouScore > iou_threshold and PRED_CLASS_NAME_MAP[klass] == HAND_CLASS_NAME_MAP[klass_true]:
                # If so, the prediction "counts" as a success, and the hand-annotation has been "visited" by a prediction.
                count = True
                notVisited[j] = False

        # After comparing the current prediction to every hand-annotation, the prediction's status is evaluated
        if count:
            # If it was "counted" during any comparison, then we consider it a True Positive.
            truePositives += 1
        else:
            # If it was never "counted", then we consider it a False Positive.
            falsePositives += 1
    # After checking every combination of a prediction and hand-annotation, any "unvisited" hand-annoations are False Negatives
    falseNegatives = sum(notVisited)
    return truePositives, falsePositives, falseNegatives, predicted_boxes, numberHandAnnotated

Defines and calls the function that evaluates the model's performance on all spectrograms in the "data/" directory. If there are no spectrograms in the "data/" directory, use ConvertWavToSpec.ipynb to create some. You can specify different values for the post-processing parameters to see how they impact precision and recall. The runtime of this function depends on the number of spectrograms and parameter values used in the performance evaluation.

In [None]:
# Run this chunk when you want to test your model on the spectrograms in the "data/" directory. 
# It will output performance metrics, and you can specify post-processing parameters.

# Gets the file path to each PNG file in the "data/" directory
files = glob.glob('data/*.png')

# Function to test your model and obtain evaluation metrics.
def get_metrics(files, overlap_thresholds = [DEFAULT_OVERLAP_THRESH], 
                conf_type = DEFAULT_CONF_TYPE, conf_thresholds = [DEFAULT_CONF_THRESH], 
                FREQ_LIM = DEFAULT_FREQ_LIM, iou_threshold = DEFAULT_IoU_THRESH, 
                desired_classes = DEFAULT_DESIRED_CLASSES, lst_file_name = "val.lst"):
    """
    For every combination of NMS threshold and Confidence threshold, calculates the model's performance metrics across every spectrogram 
    in "files".
    
    PARAMETERS:
    ----------
        files: list of strings 
            Denotes the path to each PNG file (i.e., each spectrogram) that should be considered in the performance evaluation
            
        overlap_thresholds: list of floats 
            Denotes the NMS thresholds you wish to apply in post-processing (one threshold per evaluation)
            
        conf_type: string
            Either "Relative" or "Absolute".
            Indicates whether you want your confidence thresholds to be interpreted as a "relative" confidence thresholds or 
                "absolute" confidence thresholds.
            (NOTE: Support for "absolute" confidence thresholds has been commented out due to inconsistent performance 
                depending on where it is applied.)
                
        conf_thresholds: list of floats
            Denotes the confidence thresholds you wish to apply in post-processing (one threshold per evaluation)
            
        FREQ_LIM: float
            Denotes the frequency limit (in Hz) that you wish to apply in post-processing
            
        iou_threshold: float
            Denotes the IoU threshold which determines the criteria for "enough overlap" between a predicted box and 
                a hand-annotated box.
                
        desired_classes: list of strings
            Denotes the classes whose predictions and hand-annotations should be considered in the model evaluation
            
        lst_file_name: string
            Specifies the name of the LST file which contains the hand-annotation information for the performance evaluation
                (with the ".lst" portion).
    ----------
    
    RETURNS:
    ----------
        N/A
    ----------
    """
    # Iterates across every combination of NMS threshold and Confidence threshold in the lists
    for othresh in overlap_thresholds:
        for cthresh in conf_thresholds:
            # Initializes running totals
            totalTp = 0
            totalFp = 0
            totalFn = 0
            groundtruth = 0
            # Prints important specifications for the current test
            print("Model Name: " + model_path)
            print(f"PERFORMANCE METRICS FOR {desired_classes} on {lst_file_name}")
            print(f"NMS Overlap Threshold: {othresh}")
            print(f"{conf_type} Confidence Threshold: {cthresh}")
            print(f"Frequency Limit: {FREQ_LIM}")
            print(f"IoU Threshold: {iou_threshold}")
            
            # Iterates through every spectrogram file path in the list
            for file in sorted(files):
                spec_name = file.split("/")[-1]
                # Gets the list of desired hand-annotated boxes for the current spectrogram
                hand_annotated_boxes = get_hand_annotated_boxes_spec(spec_name, lst_file_name, desired_classes)
                
                # Gets the post-processed list of desired predicted boxes for the current spectrogram
                predictions = get_model_bounding_boxes(f"data/{spec_name}", 
                                                       overlap_threshold = othresh, 
                                                       conf_type = conf_type, 
                                                       conf_threshold = cthresh, 
                                                       FREQ_LIM = FREQ_LIM,
                                                       desired_classes = desired_classes)
                
                # Obtains the number of true positives, false positives, false negatives, and hand-annotations for the current spectrogram
                truePositives, falsePositives, falseNegatives, predicted_boxes, gt = calculate_precision_recall(predictions, 
                                                                                                                hand_annotated_boxes, 
                                                                                                                iou_threshold)
                # Adds the number of true positives, false positives, false negatives, and hand-annotations to the running total
                totalTp += truePositives
                totalFp += falsePositives
                totalFn += falseNegatives
                groundtruth += gt
                
            # Calculates recall, precision, and f1-score across every spectrogram in "files"
            try:
                recall = (groundtruth-totalFn)/groundtruth
            except ZeroDivisionError:
                recall = "Not Applicable"
                
            try:
                precision = totalTp/(totalTp + totalFp)
            except ZeroDivisionError:
                precision = "Not Applicable"
                
            try:
                f1 = 2*(precision*recall)/(precision+recall)
            except (TypeError, ZeroDivisionError) as error:
                f1 = "Not Applicable"
            
            # Clearly displays results from the model evaluation
            print(f"   True Positives: {totalTp}, False Positives: {totalFp}, False Negatives: {totalFn}")
            print(f"   Total Number of Predicted Boxes: {totalTp + totalFp}, Total Number of Hand-Annotated Boxes: {groundtruth}")
            print(f"   Recall: {recall}")
            print(f"   Precision: {precision}")
            print(f"   F1 Score: {f1}")
            print()
            print("Done!")


# Passes in spectrograms that will be evaluated, along with the evaluation parameters, and outputs evaluation metrics
get_metrics(files = files, 
            overlap_thresholds = overlap_thresholds, 
            conf_type = conf_type, 
            conf_thresholds = conf_thresholds, 
            FREQ_LIM = FREQ_LIM, 
            iou_threshold = iou_threshold, 
            desired_classes = desired_classes,
            lst_file_name = lst_file_name)

If there are a very small number of hand-annotations and predictions across the spectrograms, you can run the following code chunks to determine which spectrograms contain them.

In [None]:
# Gets all .png files from the "data/" directory
files = glob.glob('data/*.png')

# Specifies the name of the WAV file that you want to produce a selection table for
wav_fname = "671658014.181008033412"
#wav_fname = "671658014.180930183532"
#wav_fname = "671658014.181008003414"
#wav_fname = "671658014.180929003601"

# Gets the path to every spectrogram for the WAV file (sorted in the proper order)
all_spec_paths = get_all_spec_paths(wav_fname, files)

In [None]:
# Displays the names of spectrograms which have hand-annotations for the desired classes
for i in all_spec_paths:
    truth = get_hand_annotated_boxes_spec(i[5:], lst_file_name, desired_classes)
    if len(truth) == 0:
        continue
    print(i)

In [None]:
# Displays the names of spectrograms which have model predictions (after post-processing)
for i in all_spec_paths:
    predictions = get_model_bounding_boxes(i, overlap_thresholds[0], conf_type, conf_thresholds[0], FREQ_LIM, desired_classes)
    if len(predictions) == 0:
        continue
    print(i)

This code chunk plots the hand-annotations and predictions (from the model's performance evaluation) for a given spectrogram on that spectrogram. This is a highly recommended strategy for gauging how the model makes its decisions. You must specify the file name of the spectrogram.

In [None]:
# Plots the desired, post-processed predicted boxes and desired hand-annotated boxes on the corresponding spectrogram (for a single spectrogram)
filename = "671658014.181008033412-0.png"
#filename = "671658014.180930183532-0.png"
#filename = "671658014.181008003414-0.png"
#filename = "671658014.180929003601-0.png"

# Notice that the first post-processing value in each list is used.
hand_annotated_boxes = get_hand_annotated_boxes_spec(filename, lst_file_name, desired_classes)
predictions = get_model_bounding_boxes(f"data/{filename}", overlap_thresholds[0], conf_type, conf_thresholds[0], FREQ_LIM, desired_classes)
print(predictions)
plot_image_with_boxes(f"data/{filename}", hand_annotated_boxes=hand_annotated_boxes, model_predicted_boxes=predictions,
                   threshold=0,pctl=conf_thresholds[0])

This code chunk calculates performance metrics for a single spectrogram. You must specify the spectrogram's file path.

In [None]:
# Calculates performance metrics for a single spectrogram
file_path = "data/671658014.181008033412-0.png"
get_metrics(glob.glob(file_path), overlap_thresholds, conf_type, conf_thresholds, FREQ_LIM, iou_threshold, desired_classes, lst_file_name)

This code chunk displays detailed information regarding a single spectrogram's predictions and hand-annotations. You must specify the file name of the spectrogram.

In [None]:
"""
Displays the number of true positives, number of false positives, number of false negatives, 
list of predictions, and number of hand-annotations for a single spectrogram
    NOTE: Uses the first NMS threshold and first Confidence threshold in the lists
"""
# Filename is the name of one of the spectrograms found in the data directory.
# If there are no spectrograms in there, use ConvertWavToSpec.ipynb to generate some.
    # Example: filename = "671658014.181008033412-145.png"
filename = "671658014.181008033412-0.png"
#filename = "671658014.180930183532-0.png"
#filename = "671658014.181008003414-0.png"
#filename = "671658014.180929003601-0.png"

# Notice that the first post-processing value in each list is used.
hand_annotated_boxes = get_hand_annotated_boxes_spec(filename, lst_file_name, desired_classes)
predictions = get_model_bounding_boxes(f"data/{filename}", overlap_thresholds[0], conf_type, conf_thresholds[0], FREQ_LIM, desired_classes)
calculate_precision_recall(predictions, hand_annotated_boxes, iou_threshold)

## Producing a Selection Table of Predicted Annotations for Use in Raven

Useful Import Statement

In [None]:
from os.path import exists

Use this code chunk to specify and obtain important information.

In [None]:
# Gets all .png files from the "data/" directory
files = glob.glob('data/*.png')

# Specifies the name of the WAV file that you want to produce a selection table for
wav_fname = "671658014.181008033412"
#wav_fname = "671658014.180930183532"
#wav_fname = "671658014.181008003414"
#wav_fname = "671658014.180929003601"

# Gets the path to every spectrogram for the WAV file (sorted in the proper order)
all_spec_paths = get_all_spec_paths(wav_fname, files)

Defines and calls the function that iterates through multiple spectrograms. It obtains predictions for each spectrogram and stores them in one large list.

In [None]:
def get_all_model_bounding_boxes(all_spectrogram_paths, 
                                 overlap_threshold = DEFAULT_OVERLAP_THRESH, conf_type = DEFAULT_CONF_TYPE, 
                                 conf_threshold = DEFAULT_CONF_THRESH, FREQ_LIM = DEFAULT_FREQ_LIM, 
                                 desired_classes = DEFAULT_DESIRED_CLASSES):
    """
    Gets the list of model predictions (using the model's endpoint created by "deploy_model()") for every spectrogram in 
    "all_spectrogram_paths". It is a shell that calls "get_model_bounding_boxes()" multiple times.
    
    PARAMETERS
    ----------
        all_spectrogram_paths: list of strings
            Contains the file paths to the spectrograms that the model should make predictions for
            
        overlap_threshold: float
            One of the "post-processing" parameters (i.e., parameters that help remove "bad" predictions)
            
        conf_type: string
            Specifies which type of "confidence threshold" with be used in post-processing
            
        conf_threshold: float
            One of the "post-processing" parameters
            
        FREQ_LIM: float
            One of the "post-processing" parameters
            
        desired_classes: list of strings 
            Specifies class names whose predictions you want
    ----------
    
    RETURNS
    ----------
        all_preds: three-dimensional list 
            The list contains smaller lists (i.e., "sublists"), and each sublist contains predictions.
                Each prediction is in the form of [klass, score, x0, y0, x1, y1].
                Each sublist contains the desired, post-processed predicted boxes for the corresponding spectrogram.
            Together, the sublists contain the predictions for every spectrogram in "all_spectrogram_paths".
    ----------
    """
    all_preds = []
    for spectrogram_path in all_spectrogram_paths:
        preds = get_model_bounding_boxes(spectrogram_path, 
                                         overlap_threshold = overlap_threshold, 
                                         conf_type = conf_type, 
                                         conf_threshold = conf_threshold, 
                                         FREQ_LIM = FREQ_LIM, 
                                         desired_classes = desired_classes)
        all_preds.append(preds)
    return all_preds

In [None]:
# Notice that the first post-processing value in each list is used.
all_preds = get_all_model_bounding_boxes(all_spec_paths, overlap_thresholds[0], conf_type, conf_thresholds[0], FREQ_LIM, desired_classes)

Defines and calls the function that converts predicted box boundaries from being measured in terms of the spectrogram to being measured in terms of the entire WAV file.

In [None]:
def convert_pred_spec_coords_to_wav_coords(wav_fname, all_preds, time_span = 10800, chunk_size = 30, spec_overlap = 3.0, spec_max_freq = 1455.749312):
    """
    Converts the location information for each predicted box in "all_preds" from being in terms of the spectrogram's edges (where all 
    location information is on a "0 to 1" scale) to being in terms of the entire WAV file (where "time information" can range across 
    the WAV file's full time span in seconds, and "frequency information" can range from 0 Hz to the maximum frequency visible on the 
    spectrograms.
    
    PARAMETERS:
    ----------
        wav_fname: string
            Contains the "numeric" portion of the WAV file's name (e.g., wav_fname = "671658014.181008033412")
        
        all_preds: three-dimensional list 
            The list contains smaller lists (i.e., "sublists"), and each sublist contains predictions.
                Each prediction is in the form of [klass, score, x0, y0, x1, y1].
                Each sublist contains the desired, post-processed predicted boxes for the corresponding spectrogram.
            Together, the sublists contain the predictions for every spectrogram in "all_spectrogram_paths".
                       
        time_span: float
            The elapsed time (in seconds) that the entire WAV file spans.
            (EQUAL TO THE FINAL "end_s" PRINTED BY "extract_chunk()" WITHIN "ConvertWavToSpec.ipynb" WHEN THE SPECTROGRAMS WERE CREATED)
            
        chunk_size: float
            The amount of time (in seconds) that a single spectrogram covers.
            (EQUAL TO "CHUNK_SIZE_SEC" WITHIN "ConvertWavToSpec.ipynb" WHEN THE SPECTROGRAMS WERE CREATED)
            
        spec_overlap: float
            The number of seconds of overlap between one spectrogram and its subsequent spectrogram.
            (EQUAL TO THE FLOAT SPECIFIED IN "int(<FLOAT> * sr)" FOR THE CALCULATION OF "step" WITHIN "ConvertWavToSpec.ipynb" WHEN THE SPECTROGRAMS WERE CREATED)
            
        spec_max_freq: float
            The y-axis's maximum frequency on the spectrograms (in Hz)
            (RELATED TO THE VALUE SPECIFIED FOR "FREQUENCY_MAX" IN "ConvertWavToSpec.ipynb" WHEN THE SPECTROGRAMS WERE CREATED)
                (Example: spec_max_freq = 1455.749312 when FREQUENCY_MAX = 1600)
            (Currently, the only way to determine the exact value is to cross-reference hand-annotated boxes with the annotation TXT file.)
    ----------
    
    RETURNS:
    ----------
        conv_preds: list of lists
            A list of predictions that correspond to those in "all_preds" with converted location information 
                (i.e., the format for a predicted box is [klass, score, beg_time, end_time, min_freq, max_freq] instead of [klass, score, x0, y0, x1, y1])
    ----------
    """
    conv_preds = []
    for i, cur_preds in enumerate(all_preds):
        
        start_of_spec = (chunk_size - spec_overlap)*i
        if i == len(all_preds)-1 and i > 0:
            length_of_spec = time_span - start_of_spec
        else:
            length_of_spec = chunk_size
        
        for pred in cur_preds:
            klass, score, x0, y0, x1, y1 = pred
        
            beg_time = start_of_spec + x0*length_of_spec
            end_time = start_of_spec + x1*length_of_spec
            min_freq = (1-y1)*spec_max_freq
            max_freq = (1-y0)*spec_max_freq
        
            cur_conv_pred = [klass, score, beg_time, end_time, min_freq, max_freq]
            conv_preds.append(cur_conv_pred)
    return conv_preds

In [None]:
conv_preds = convert_pred_spec_coords_to_wav_coords(wav_fname, all_preds, 10799.5225, 30, 3.0, 1455.749312)

Defines and calls the function that creates a selection table (in the form of a TXT file) containing all desired predictions for a specified WAV file. This selection table can be opened in Raven alongside the WAV file, allowing you to "listen" to the model's predictions.

In [None]:
def make_selection_table(wav_fname, conv_preds):
    """
    Makes a TXT file reflecting the model's predictions for the specified WAV file (called "<wav_fname>-predictions.txt").
    It will be formatted very similarly to the annotators' TXT files for use in Raven as a selection table.
    
    PARAMETERS:
    ----------
        wav_fname: string
            The "numeric" portion of the WAV file's name (e.g., wav_fname = "671658014.181008033412")
            
        conv_preds: list of lists
            A list of all predictions for the specified WAV file.
                Each prediction is in the form of [klass, score, beg_time, end_time, min_freq, max_freq].
    ----------
    
    RETURNS:
    ----------
        N/A
    ----------
    """
    column_names = ["Selection", "View", "Channel", "Begin Time (s)", "End Time (s)", "Low Freq (Hz)", "High Freq (Hz)", "Species", "Confidence Score"]
    
    # Removes old TXT file with same name (if one exists)
    file_name = f"{wav_fname}-predictions.txt"
    if exists(file_name):
        print(f"{file_name} exists, removing now")
        !rm $file_name
    
    # Creates TXT file
    with open(f"{wav_fname}-predictions.txt", "a") as f:
        for col in column_names:
            f.write(col)
            if col == column_names[-1]:
                continue
            f.write('\t')
        f.write('\n')
        
        for i, pred in enumerate(conv_preds):
            
            f.write(str(i+1))
            f.write('\t')
            f.write("Spectrogram 1")
            f.write('\t')
            f.write('1')
            f.write('\t')
            
            klass, score, beg_time, end_time, min_freq, max_freq = pred
            f.write(str(beg_time))
            f.write('\t')
            f.write(str(end_time))
            f.write('\t')
            f.write(str(min_freq))
            f.write('\t')
            f.write(str(max_freq))
            f.write('\t')
            f.write(PRED_CLASS_NAME_MAP[klass])
            f.write('\t')
            f.write(str(score))
            
            f.write('\n')

        f.close()
    print("Done!")

In [None]:
make_selection_table(wav_fname, conv_preds)

## CLEANUP (After Evaluating a Model or Producing a Prediction TXT File)

In [None]:
"""RUN THIS CODE CHUNK WHEN YOU ARE DONE USING THE ENDPOINT TO AVOID ADDITIONAL CHARGES TO THE ACCOUNT"""
delete_model(model)

## Miscellaneous

Useful (but currently unused) function that can download anything from the S3 Bucket

In [None]:
# Specifies the S3 Bucket
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)

# Defines the function
def fetch_from_bucket(source_file_path, dest_file_name):
    bucket.download_file(source_file_path, dest_file_name)

Currently unused code that creates a selection table of hand-annotations for use in Raven. This is redundant since the resulting files should be identical to our annotation files, but this allows us to verify that our code works properly.

In [None]:
from os.path import exists

In [None]:
# Gets all .png files from the "data/" directory
files = glob.glob('data/*.png')
# Specifies the name of the WAV file that you want to produce a selection table for
wav_fname = "671658014.181008033412"
#wav_fname = "671658014.180930183532"
#wav_fname = "671658014.181008003414"
#wav_fname = "671658014.180929003601"

# Gets the path to every spectrogram for the WAV file (sorted in the proper order)
all_spec_paths = get_all_spec_paths(wav_fname, files)

In [None]:
def get_all_hand_annotated_boxes_spec(all_spectrogram_paths, lst_file_name, desired_classes):
    """
    Gets the list of hand annotations (from the specified LST file) for every spectrogram in 
    "all_spectrogram_paths". It is a shell that calls "get_hand_annotated_boxes_spec()" multiple times.
    
    PARAMETERS
    ----------
        all_spectrogram_paths: list of strings
            Contains the file paths to the spectrograms that the model should make predictions for

        lst_file_name: string
            Specifies the name of the LST file which contains the hand-annotation information for the performance evaluation
                (with the ".lst" portion).
            
        desired_classes: list of strings 
            Specifies class names whose predictions you want
    ----------
    
    RETURNS
    ----------
        all_hand_annots: three-dimensional list 
            The list contains smaller lists (i.e., "sublists"), and each sublist contains hand-annotations.
                Each hand-annotation is in the form of [klass, x0, y0, x1, y1].
                Each sublist contains the hand-annotations for the corresponding spectrogram.
            Together, the sublists contain the hand-annotations for every spectrogram in "all_spectrogram_paths".
    ----------
    """
    all_hand_annots = []
    for spectrogram_path in all_spectrogram_paths:
        spec_name = spectrogram_path.split("/")[-1]
        hand_annots = get_hand_annotated_boxes_spec(spec_name, 
                                                    lst_file_name, 
                                                    desired_classes = desired_classes)
        all_hand_annots.append(hand_annots)
    return all_hand_annots

In [None]:
all_hand_annots = get_all_hand_annotated_boxes_spec(all_spec_paths, lst_file_name, desired_classes)

In [None]:
def convert_hand_spec_coords_to_wav_coords(wav_fname, all_hand_annots, time_span = 10800, chunk_size = 30, spec_overlap = 3.0, spec_max_freq = 1455.749312):
    """
    Converts the location information for each hand-annotated box in "all_hand_annots" from being in terms of the spectrogram's edges (where all 
    location information is on a "0 to 1" scale) to being in terms of the entire WAV file (where "time information" can range across 
    the WAV file's full time span in seconds, and "frequency information" can range from 0 Hz to the maximum frequency visible on the 
    spectrograms.
    
    PARAMETERS:
    ----------
        wav_fname: string
            Contains the "numeric" portion of the WAV file's name (e.g., wav_fname = "671658014.181008033412")
        
        all_hand_annots: three-dimensional list 
            The list contains smaller lists (i.e., "sublists"), and each sublist contains hand-annotations.
                Each hand-annotation is in the form of [klass, x0, y0, x1, y1].
                Each sublist contains the desired, post-processed hand-annotated boxes for the corresponding spectrogram.
            Together, the sublists contain the hand-annotations for every spectrogram in "all_spectrogram_paths".
                       
        time_span: float
            The elapsed time (in seconds) that the entire WAV file spans.
            (EQUAL TO THE FINAL "end_s" PRINTED BY "extract_chunk()" WITHIN "ConvertWavToSpec.ipynb" WHEN THE SPECTROGRAMS WERE CREATED)
            
        chunk_size: float
            The amount of time (in seconds) that a single spectrogram covers.
            (EQUAL TO "CHUNK_SIZE_SEC" WITHIN "ConvertWavToSpec.ipynb" WHEN THE SPECTROGRAMS WERE CREATED)
            
        spec_overlap: float
            The number of seconds of overlap between one spectrogram and its subsequent spectrogram.
            (EQUAL TO THE FLOAT SPECIFIED IN "int(<FLOAT> * sr)" FOR THE CALCULATION OF "step" WITHIN "ConvertWavToSpec.ipynb" WHEN THE SPECTROGRAMS WERE CREATED)
            
        spec_max_freq: float
            The y-axis's maximum frequency on the spectrograms (in Hz)
            (RELATED TO THE VALUE SPECIFIED FOR "FREQUENCY_MAX" IN "ConvertWavToSpec.ipynb" WHEN THE SPECTROGRAMS WERE CREATED)
                (Example: spec_max_freq = 1455.749312 when FREQUENCY_MAX = 1600)
            (Currently, the only way to determine the exact value is to cross-reference hand-annotated boxes with the annotation TXT file.)
    ----------
    
    RETURNS:
    ----------
        conv_hand_annots: list of lists
            A list of hand-annotations that correspond to those in "all_hand_annots" with converted location information 
                (i.e., the format for a hand-annotated box is [klass, beg_time, end_time, min_freq, max_freq] instead of [klass, x0, y0, x1, y1])
    ----------
    """
    conv_hand_annots = []
    for i, cur_hand_annots in enumerate(all_hand_annots):
        
        start_of_spec = (chunk_size - spec_overlap)*i
        if i == len(all_hand_annots)-1:
            length_of_spec = time_span - start_of_spec
        else:
            length_of_spec = chunk_size
            
        for hand_annot in cur_hand_annots:
            klass, x0, y0, x1, y1 = hand_annot
        
            beg_time = start_of_spec + x0*length_of_spec
            end_time = start_of_spec + x1*length_of_spec
            min_freq = (1-y1)*spec_max_freq
            max_freq = (1-y0)*spec_max_freq
        
            cur_conv_hand_annot = [klass, beg_time, end_time, min_freq, max_freq]
            conv_hand_annots.append(cur_conv_hand_annot)
    return conv_hand_annots

In [None]:
conv_hand_annots = convert_hand_spec_coords_to_wav_coords(wav_fname, all_hand_annots, 10799.5225, 30, 3.0, 1455.749312)

In [None]:
def make_selection_table(wav_fname, conv_preds):
    """
    Makes a TXT file reflecting the model's hand-annotations for the specified WAV file (called "<wav_fname>-hands.txt").
    It will be formatted very similarly to the annotators' TXT files for use in Raven as a selection table.
    
    PARAMETERS:
    ----------
        wav_fname: string
            The "numeric" portion of the WAV file's name (e.g., wav_fname = "671658014.181008033412")
            
        conv_hand_annots: list of lists
            A list of all hand-annotations for the specified WAV file.
                Each hand-annotation is in the form of [klass, beg_time, end_time, min_freq, max_freq].
    ----------
    
    RETURNS:
    ----------
        N/A
    ----------
    """
    column_names = ["Selection", "View", "Channel", "Begin Time (s)", "End Time (s)", "Low Freq (Hz)", "High Freq (Hz)", "Species"]
    
    # Removes old TXT file with same name (if one exists)
    file_name = f"{wav_fname}-hands.txt"
    if exists(file_name):
        print(f"{file_name} exists, removing now")
        !rm $file_name
    
    with open(f"{wav_fname}-hands.txt", "a") as f:
        for col in column_names:
            f.write(col)
            if col == column_names[-1]:
                continue
            f.write('\t')
        f.write('\n')
        
        for i, hand_annot in enumerate(conv_hand_annots):
            
            f.write(str(i+1))
            f.write('\t')
            f.write("Spectrogram 1")
            f.write('\t')
            f.write('1')
            f.write('\t')
            
            klass, beg_time, end_time, min_freq, max_freq = hand_annot
            f.write(str(beg_time))
            f.write('\t')
            f.write(str(end_time))
            f.write('\t')
            f.write(str(min_freq))
            f.write('\t')
            f.write(str(max_freq))
            f.write('\t')
            f.write(HAND_CLASS_NAME_MAP[klass])
            
            f.write('\n')

        f.close()
    print("Done!")

In [None]:
make_selection_table(wav_fname, conv_hand_annots)