Evaluation Data:

Evaluate the precision and recall of the algorithm


In [1]:
#import basic libraries

import csv
import sys
import os
import math
import numpy as np
import numpy.linalg as npla
import scipy
from scipy import sparse
from scipy import linalg
import scipy.sparse.linalg as spla
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.mlab as mlab
from mpl_toolkits.mplot3d import axes3d
%matplotlib tk
import operator
sys.path.append("../Python_code") # go to parent dir
from reddit import *
from analytics_combined import *

In [None]:
def create_ground_truth(input_filename):
    '''
        Given the input file, create and return a dictionary of the ground truth for the
        pixel assignments.
        
        Each pixel's ID will be based upon its line number within the file
    '''
    line_number = 0
    ground__truth = dict()
    with open(input_filename,'r') as file_in:
    
        # Skip first line (header row)
        next(file_in, None)

        reader = csv.reader(file_in)
        for r in reader:
            time = int(r[0])
            user = r[1]
            x = int(r[2])
            y = int(r[3])
            color = int(r[4])
            pic_id = r[5]
            final_pixel = int(r[6])
            final_pixel_color = int(r[7])
            smallest_proj = int(r[8])
            
            # The ground truth pixel assignments will be based on the pixel assigned to the smallest project
            if smallest_proj:
                if pic_id not in ground_truth:
                    ground_truth[pic_id] = [line_number]
                else:
                    ground_truth[pic_id].append(line_number)
        
            line_number += 1
    
    return ground_truth
                

In [None]:
# calculate the intersection over union of list1 and list2
def calc_intersection_over_union(list1, list2):
    intersection = [value for value in list1 if value in list2]
    union = set(list1.union(list2))
    
    iou = len(intersection) / len(union)
    return iou

In [None]:
def get_max_iou(region, ground_truth):
    '''
        Given a region, check and return the maximum the iou with every project in the ground truth.
    '''
    max_iou = 0
    max_pic_id = None
    for pic_id in ground_truth:
        iou = calc_intersection_over_union(region, ground_truth[pic_id])
        if iou > max_iou:
            max_iou = iou
            max_pic_id = pic_id
            
    return max_iou, max_pic_id
    

In [62]:
# Given a set_of_updates and the ground truth data, evaluate the precision and recall
# ground_truth is a dictionary of the following format:
'''
    "image_id" : [ list of update IDS belonging to this image ]
'''

# Each update tuple is assumed to be this format:
# (updateID, time, user, x, y, color, pic_id)
def evaluate(regions, ground_truth, threshold = 0.75):
    
    image_assignment = dict()
    num_correct_counter = 0
    
    for region in regions:
        iou, pic_id = get_max_iou(region, ground_truth)
        if iou > threshold:
            num_correct_counter += 1

    
    precision = num_correct_counter / len(regions)
    
    recall = num_correct_counter / len(ground_truth)
    
    print("Total correct assignments:", num_correct_counter)
    print("Total assignments made:", len(list_of_images))
    print("Precision:", precision)
    print("Recall:", recall)
    

In [63]:
# FOR TESTING

# The key is a image id. The value is a list of updates belonging to that image
# The update is represented by a unique update ID
fake_ground_truth = {
    "1":  [11,12,13,21,22,23,32],
    "2":  [31,41,42,43,51,52,53,61,62,71,72],
    "3":  [14,15,24,25,34,35,45,46,55,64,65],
    "4":  [63,73,74,75,81,82,83,84,85,86,91,92,93,94,95],
    "5":  [56,57,66,67,68,69,76,77,78,79,87,88,89,96,97,98,99],
    "6":  [36,37,38,39,47,48,49,58,59],
    "7":  [18,19,28,29],
    "8":  [16,17,26,27],
    "69": [44],
    "96": [54],
    "420":[33]
}

# Each list is a set of updates assigned to an image
# For now, we just made the list equal to a list of update IDs
# TODO: Assign a unique id to each update
fake_list_of_images = [
    [11,12,21,22,23,32,42,234,234,234,234,234,234],
    [31,41,43,44,51,52,53,61,62,63,71,72],
    [13,14,15,23,24,34,35,45,46,55,64],
    [62,63,73,74,81,82,83,84,85,86,91,92,93,94,95,234,234,234,234,234,243,234,23424,234,234,234,234],
    [56,57,65,66,67,68,69,75,76,77,78,79,87,88,89,96,97,98,99],
    [36,37,38,39,47,48,49,58,59,68,69,234,234,234,234,234,234,234,243],
    [17,18,19,27,28,29,38],
    [15,16,17,25,26,27],
    [54],
    [33]
]

evaluate(fake_list_of_images, fake_ground_truth)

Total correct assignments: 4
Total assignments made: 10
Precision: 0.4
Recall: 0.36363636363636365
