Evaluation Data:

Evaluate the precision and recall of the algorithm


In [1]:
#import basic libraries

import csv
import sys
import os
import math
import numpy as np
import numpy.linalg as npla
import scipy
from scipy import sparse
from scipy import linalg
import scipy.sparse.linalg as spla
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.mlab as mlab
from mpl_toolkits.mplot3d import axes3d
%matplotlib tk
import operator
sys.path.append("../Python_code") # go to parent dir
from reddit import *
from analytics_combined import *

In [38]:
# Turn a list of tuples into a list of only of one the tuple elements
def truncate_list_of_tuples(list_of_tuples):
    new_list = []
    for element in list_of_tuples:
        
        # TEMPORARILY assume the element is the unique update ID
        image_id = element
        new_list.append(image_id)
        
    return new_list

In [48]:
# calculate the intersection over union of set1 and set2
def calc_intersection_over_union(set1, set2):
    intersection = [value for value in set1 if value in set2]
    union = list(set(set1) | set(set2))
    
    iou = len(intersection) / len(union)
    return iou

In [62]:
# Given a set_of_updates and the ground truth data, evaluate the precision and recall
# ground_truth is a dictionary of the following format:
'''
    "image_id" : [ list of update IDS belonging to this image ]
'''

# Each update tuple is assumed to be this format:
# (updateID, time, user, x, y, color, pic_id)
def evaluate(list_of_images, ground_truth, threshold = 0.75):
    
    image_assignment = dict()
    for list_of_updates in list_of_images:
        
        for image_id in ground_truth:
            # For every image in the ground truth, check which ones is the Intersection over Union above threshold
            # set_of_updates and image are lists of tuples
            
            ground_truth_ids = ground_truth[image_id]
            
            # Turn the lists into sets so that no update will be repeated
            update_ids = set(truncate_list_of_tuples(list_of_updates))
            iou = calc_intersection_over_union(update_ids, ground_truth_ids)
            if iou >= threshold:
                # This list_of_updates matches the this image in the ground truth
                if image_assignment.get(image_id) is None:
                    image_assignment[image_id] = [ update_ids ]
                else:
                    image_assignment[image_id].append(update_ids)
    
    
    num_correct_counter = 0
    for image_id in image_assignment:
        ground_truth_ids = ground_truth[image_id]
        # FIX LATER not that efficient
        for list_of_updateIDs in image_assignment[image_id]:
            
            num_correct_counter += 1
    
    precision = num_correct_counter / len(list_of_images)
    
    recall = num_correct_counter / len(ground_truth)
    
    print("Total correct assignments:", num_correct_counter)
    print("Total assignments made:", len(list_of_images))
    print("Precision:", precision)
    print("Recall:", recall)
    

In [63]:
# FOR TESTING
fake_ground_truth = {
    "1":  [11,12,13,21,22,23,32],
    "2":  [31,41,42,43,51,52,53,61,62,71,72],
    "3":  [14,15,24,25,34,35,45,46,55,64,65],
    "4":  [63,73,74,75,81,82,83,84,85,86,91,92,93,94,95],
    "5":  [56,57,66,67,68,69,76,77,78,79,87,88,89,96,97,98,99],
    "6":  [36,37,38,39,47,48,49,58,59],
    "7":  [18,19,28,29],
    "8":  [16,17,26,27],
    "69": [44],
    "96": [54],
    "420":[33]
}

fake_list_of_images = [
    [11,12,21,22,23,32,42,234,234,234,234,234,234],
    [31,41,43,44,51,52,53,61,62,63,71,72],
    [13,14,15,23,24,34,35,45,46,55,64],
    [62,63,73,74,81,82,83,84,85,86,91,92,93,94,95,234,234,234,234,234,243,234,23424,234,234,234,234],
    [56,57,65,66,67,68,69,75,76,77,78,79,87,88,89,96,97,98,99],
    [36,37,38,39,47,48,49,58,59,68,69,234,234,234,234,234,234,234,243],
    [17,18,19,27,28,29,38],
    [15,16,17,25,26,27],
    [54],
    [33]
]

# fake_list_of_images = [
#     [11,12,21,22,23,32,42],
#     [31,41,43,44,51,52,53,61,62,63,71,72],
#     [13,14,15,23,24,34,35,45,46,55,64],
#     [62,63,73,74,81,82,83,84,85,86,91,92,93,94,95],
#     [56,57,65,66,67,68,69,75,76,77,78,79,87,88,89,96,97,98,99],
#     [36,37,38,39,47,48,49,58,59,68,69],
#     [17,18,19,27,28,29,38],
#     [15,16,17,25,26,27],
#     [],
#     [54],
#     [33]
# ]

evaluate(fake_list_of_images, fake_ground_truth)

Total correct assignments: 4
Total assignments made: 10
Precision: 0.4
Recall: 0.36363636363636365
