Evaluation Data:

Evaluate the precision and recall of the algorithm


In [3]:
#import basic libraries

import csv
import sys
import os
import math
import numpy as np
import numpy.linalg as npla
import scipy
from scipy import sparse
from scipy import linalg
import scipy.sparse.linalg as spla
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.mlab as mlab
from mpl_toolkits.mplot3d import axes3d
%matplotlib tk
import operator
sys.path.append("../Python_code") # go to parent dir
from reddit import *
from analytics_combined import *

In [13]:
def create_ground_truth(input_filename):
    '''
        Given the input file, create and return a dictionary of the ground truth for the
        pixel assignments.
        
        Each pixel's ID will be based upon its line number within the file
    '''
    line_number = 0
    updates = []
    ground_truth = dict()
    with open(input_filename,'r') as file_in:
    
        # Skip first line (header row)
        next(file_in, None)

        reader = csv.reader(file_in)
        for r in reader:
            time = int(r[0])
            user = r[1]
            x = int(r[2])
            y = int(r[3])
            color = int(r[4])
            pic_id = r[5]
            final_pixel = int(r[6])
            final_pixel_color = int(r[7])
            smallest_proj = int(r[8])
            
            # The ground truth pixel assignments will be based on the pixel assigned to the smallest project
            if smallest_proj:
                if pic_id not in ground_truth:
                    ground_truth[pic_id] = [line_number]
                else:
                    ground_truth[pic_id].append(line_number)
        
            updates.append([time, user, x, y, color, pic_id, final_pixel, final_pixel_color, smallest_proj])
            line_number += 1
    
    return ground_truth, updates
                
    
# calculate the intersection over union of list1 and list2
def calc_intersection_over_union(set1, set2):
    intersection = set1.intersection(set2)
    union = set1.union(set2)
    
    iou = len(intersection) / len(union)
    return iou

def get_region_borders(region, updates):
    '''
        Given a region (list of lists), return the min x, min y, max x, and max y
    '''
    min_x = sys.maxsize
    min_y = sys.maxsize
    max_x = 0
    max_y = 0
    for update_id in region:
        
        update = updates[int(update_id)]
        x = int(update[2])
        y = int(update[3])
        
        if x > max_x:
            max_x = x
        if x < min_x:
            min_x = x
        if y > max_y:
            max_y = y
        if y < min_y:
            min_y = y
            
    return min_x, min_y, max_x, max_y

def get_rectangle_overlap_area(min_x1, max_x1, min_y1, max_y1, min_x2, max_x2, min_y2, max_y2):
    '''
        Given the coordinates of the corners of two rectangles, return the area of the overlapping region
    '''
    
    # First, calculate a bounding box around the two rectangles
    bounding_box_area =  (max(max_x1, max_x2) - min(min_x1, min_x2)) * (max(max_y1, max_y2) - min(min_y1, min_y2))  
    
    
    overlap_max_x = min(max_x1, max_x2)
    overlap_max_y = min(max_y1, max_y2)
    overlap_min_x = max(min_x1, min_x2)
    overlap_min_y = max(min_y1, min_y2)
    
    if overlap_max_x > overlap_min_x and overlap_max_y > overlap_min_y:
        return ((overlap_max_x - overlap_min_x) * (overlap_max_y - overlap_min_y)) / bounding_box_area
    else:
        return 0

def get_max_iou(locations, region, updates, ground_truth, threshold = 0.50):
    '''
        Given a region, check and return the maximum the iou with every project in the ground truth.
    '''
    max_iou = 0
    max_pic_id = None
    for pic_id in ground_truth:
        project = locations[pic_id]
        min_x, min_y, max_x, max_y =  get_region_borders(region, updates)
        
        # only check this region with the pic_id if the bounding boxes overlap by at least the threshold
        overlap_area = get_rectangle_overlap_area(min_x, max_x, min_y, max_y, project.get_left(), project.get_right(), project.get_bottom(), project.get_top())
        
        if overlap_area > threshold:
            iou = calc_intersection_over_union(set(region), set(ground_truth[pic_id]))
            if iou > max_iou:
                max_iou = iou
                max_pic_id = pic_id
            
    return max_iou, max_pic_id
    
    
# Given a set_of_updates and the ground truth data, evaluate the precision and recall
# ground_truth is a dictionary of the following format:
'''
    "image_id" : [ list of update IDS belonging to this image ]
'''

# Each update tuple is assumed to be this format:
# (updateID, time, user, x, y, color, pic_id)
def evaluate(regions, updates, ground_truth, threshold = 0.50):
    
    locations = store_locations("../data/atlas_complete.json")
    image_assignment = dict()
    num_correct_counter = 0
    
    for region in regions:
        iou, pic_id = get_max_iou(locations, region, updates, ground_truth, threshold)
        if iou > threshold:
            num_correct_counter += 1

    
    precision = num_correct_counter / len(regions)
    
    recall = num_correct_counter / len(ground_truth)
    
    print("Total correct assignments:", num_correct_counter)
    print("Total assignments made:", len(regions))
    print("Precision:", precision)
    print("Recall:", recall)
    

In [9]:
ground_truth, updates = create_ground_truth("../data/sorted_tile_placements_proj.csv")

In [15]:


# Each list is a set of updates assigned to an image
# For now, we just made the list equal to a list of update IDs
# TODO: Assign a unique id to each update
fake_list_of_images = [
    [11,12,21,22,23,32,42,234,234,234,234,234,234],
    [31,41,43,44,51,52,53,61,62,63,71,72],
    [13,14,15,23,24,34,35,45,46,55,64],
    [62,63,73,74,81,82,83,84,85,86,91,92,93,94,95,234,234,234,234,234,243,234,23424,234,234,234,234],
    [56,57,65,66,67,68,69,75,76,77,78,79,87,88,89,96,97,98,99],
    [36,37,38,39,47,48,49,58,59,68,69,234,234,234,234,234,234,234,243],
    [17,18,19,27,28,29,38],
    [15,16,17,25,26,27],
    [54],
    [33]
]

evaluate(fake_list_of_images, updates, ground_truth, -1)

Total correct assignments: 10
Total assignments made: 10
Precision: 1.0
Recall: 0.006402048655569782


In [2]:
%%time
3 + 3

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 21.9 µs


6