In [1]:
from __future__ import division
import argparse
import logging

import numpy as np
import torch.autograd
import torch.cuda
import torch.nn as nn
import torch.optim
from torch.utils.data import DataLoader
from tqdm import tqdm_notebook as tqdm
import random

import copy
from datasets.maps_alt import MAPSDataset

#from cnn_ws.transformations.homography_augmentation import HomographyAugmentation
from cnn_ws.losses.cosine_loss import CosineLoss

from cnn_ws.models.myphocnet import PHOCNet
from cnn_ws.evaluation.retrieval import map_from_feature_matrix, map_from_query_test_feature_matrices
from torch.utils.data.dataloader import _DataLoaderIter as DataLoaderIter
from torch.utils.data.sampler import WeightedRandomSampler

from cnn_ws.utils.save_load import my_torch_save, my_torch_load

#import matplotlib.pyplot as plt

In [2]:
word_filter_len = 1 # only words above this length are considered valid

In [3]:
if not torch.cuda.is_available():
    logger.warning('Could not find CUDA environment, using CPU mode')
    gpu_id = None
else:
    gpu_id = [0]
#torch.cuda.get_device_name(gpu_id[0])
pass

In [4]:
model_ = torch.load('models/PHOCNet_Nov13.pt')
cnn = model_.module#list(model_.named_parameters())
if gpu_id is not None:
        if len(gpu_id) > 1:
            cnn = nn.DataParallel(cnn, device_ids=gpu_id)
            cnn.cuda()
        else:
            cnn.cuda(gpu_id[0])
cnn.training = False

In [5]:
from strlocale import BasicLocale

def clean_words(words):
    lc = BasicLocale()
    for i, w in enumerate(words):
        try:
            words[i] = lc.represent(w).encode('ascii',errors='ignore')
        except:
            words[i] = w
    return words

# load before, after images and words, transforms and cleans them
# the function also assumes that ground truth words are the same before and after
# returns before_images, after_images, words
def load_and_transform(map_name):
    images_before = np.load('../../../detection_outputs_ready_for_test/ray_regions/'+map_name+'.npy')
    words_before = np.load('../../../detection_outputs_ready_for_test/ray_labels/'+map_name+'.npy')
    words_before = clean_words(words_before)
    images_before, words_before = clean_word_images(images_before, words_before)
    images_before = np.transpose(images_before, (0,3,1,2))
    
    images_after = np.load('../../../detection_outputs_ready_for_test/ray_regions_new/'+map_name+'.npy')
    words_after = np.load('../../../detection_outputs_ready_for_test/ray_labels_new/'+map_name+'.npy')
    words_after = clean_words(words_after)
    images_after, words_after = clean_word_images(images_after, words_after)
    images_after = np.transpose(images_after, (0,3,1,2))
    
    print 'Images Before Shape ', images_before.shape
    print 'Words Before Shape ', words_before.shape
    print 'Images After Shape ', images_after.shape
    print 'Words After Shape ', words_after.shape
    return images_before, images_after, words_after

def clean_word_images(images, words):
    selected_idx = [x for x in range(len(words)) if len(words[x]) > word_filter_len]
    images = images[selected_idx]
    words = words[selected_idx]
    return images, words

In [6]:
# convert image tnto embedding using the cnn model
def get_image_embeddings(cnn, images):
    outputs = []
    for i in tqdm(range(len(images))):
        word_img = images[i]
        word_img = 1 - word_img.astype(np.float32) / 255.0
        word_img = word_img.reshape((1,) + word_img.shape)
        word_img = torch.from_numpy(word_img).float()
        word_img = word_img.cuda(gpu_id[0])
        word_img = torch.autograd.Variable(word_img)
        output = torch.sigmoid(cnn(word_img))
        output = output.data.cpu().numpy().flatten()
        outputs.append(output)
    return outputs

In [7]:
# function to create word variations
# word_var is a dictionary that contains all variations as key and 0,1,-1 as value
# 0 denotes the root word, -1 denotes var = root_word[:-1], +1 denotes var = root_word[1:]
# root_word_var is a dict that stores original_word => all_variations
# enable_conf: boolean flag that controls if the confusion logic should be used.
# when enabled if a word is a root word as well as a word variation (happens if root words ar rand and grand)
# it marks it as to be extended and also stores it in the confusion list
def create_word_variations(words, enable_conf=False):
    word_var = {}
    root_word_var = {}
    # create the root word variation dict and set word_var as -1 or +1
    for w in words:
        if len(w) <= word_filter_len:
            continue
        root_var_list = [w, w.lower(), w.upper(), w.capitalize()]
        var_set = set()
        for var in root_var_list:
            word_var[var[1:]] = 1
            word_var[var[:-1]] = -1
            var_set.add(var)
            var_set.add(var[1:])
            var_set.add(var[:-1])
        root_word_var[w] = var_set
    # explicitly set all root words to have direction 0
    # mark the words that already have a direction set
    conf_words = set()
    for w in words:
        if len(w) <= word_filter_len:
            continue
        root_var_list = [w, w.lower(), w.upper(), w.capitalize()]
        for var in root_var_list:
            if var in word_var and word_var[var] != 0 and enable_conf:
                conf_words.add(var)
            else:
                word_var[var] = 0
    return word_var, root_word_var, conf_words

In [8]:
# compute the PHOC representation of the word itself
from cnn_ws.string_embeddings.phoc import build_phoc_descriptor
def get_word_phoc_representations(word_strings):
    unigrams = [chr(i) for i in range(ord('&'), ord('&')+1) + range(ord('A'), ord('Z')+1) + \
                    range(ord('a'), ord('z') + 1) + range(ord('0'), ord('9') + 1)]
    bigram_levels = None
    bigrams = None
    phoc_unigram_levels=(1, 2, 4, 8)
    word_var_dir, root_word_var, conf_words = create_word_variations(word_strings, enable_conf=True)
    
    word_var_strings = word_var_dir.keys()
    embedding_var = build_phoc_descriptor(words=word_var_strings,
                                  phoc_unigrams=unigrams,
                                  bigram_levels=bigram_levels,
                                  phoc_bigrams=bigrams,
                                  unigram_levels=phoc_unigram_levels)
    
    print('embedding variations:', embedding_var.shape)
    return (embedding_var, word_var_strings, word_var_dir, root_word_var, conf_words)

In [9]:
from scipy.spatial.distance import cdist, pdist, squareform

# gets the actual distances of all the ground truth word variations
def get_all_dist_gt(dist_mat, emb_info, words):
    # expand emb_info tuple
    embedding_var, word_var_strings, word_var_dir, root_word_var,_ = emb_info
    all_dist = []
    for i in range(len(words)):
        w_dist = []
        w_vars = root_word_var[words[i]]
        for j in range(len(word_var_strings)):
            if word_var_strings[j] in w_vars:
                w_dist.append((word_var_strings[j], dist_mat[i][j]))
        all_dist.append(w_dist)
    return all_dist

# the new report matches method that handles variations
def report_matches_with_variations(dist_mat, word_strings, emb_info, k):
    # expand emb_info tuple
    embedding_var, word_var_strings, word_var_dir, root_word_var,_ = emb_info
    gt_words_dist = get_all_dist_gt(dist_mat, emb_info, word_strings)
    retrieval_indices = np.argsort(dist_mat, axis=1)
    q = retrieval_indices[:,:k]
    count = 0
    matched_words = []
    img_dir = []
    words_len = []
    actual_dist = []
    # get all matched words
    for i in range(len(q)):
        matched = []
        for j in q[i]:
            actual_dist.append(dist_mat[i][j])
            matched.append(word_var_strings[j])
            curr_len = len(word_var_strings[j])
            curr_dir = word_var_dir[word_var_strings[j]]
            words_len.append(curr_len + abs(curr_dir))
            img_dir.append(curr_dir)
        matched_words.append(matched)
    
    # calculate accuracies
    is_correct = []
    for i in range(len(word_strings)):
        is_correct.append(0)
        if word_strings[i].lower() in [mw.lower() for mw in matched_words[i]]:
            is_correct[i] = 1
            count = count+1
        else:
            for w in matched_words[i]:
                if w in root_word_var[word_strings[i]]:
                    is_correct[i] = 2
                    count = count+1
                    break
    return (count, matched_words, img_dir, words_len, actual_dist, is_correct, gt_words_dist)

# For some images, the original predicted word os both a root word and a word_variation of another word 
# (common word problem). Due to this one cannot be sure, if these images should be extended or not.
# These images are handled by comparing distances before and after image extension and picking the minimum one
# the feature can be turned of by setting enable_conf = False
def update_dist_matrix(dist_mat_before, dist_mat_after, conf_idx):
    print('conf_idx', conf_idx)
    for i in conf_idx:
        dist = np.minimum(dist_mat_before[i], dist_mat_after[i])
        dist_mat_after[i] = dist

In [10]:
def generate_confusion_matrix(match_report_before, match_report_after, words):
    status = [0,1,2]
    conf_matrix = [['before/after','incorrect (0)','correct (1)','almost (2)'],
                   ['incorrect (0)',0,0,0],
                   ['correct (1)',0,0,0],
                   ['almost (2)',0,0,0]]
    for i in status:
        for j in status:
            count = 0
            for k in range(len(words)):
                if match_report_before[5][k] == i and match_report_after[5][k] == j:
                    count += 1
            conf_matrix[1+i][1+j] = count
    return conf_matrix

In [11]:
def save_before_after_preds(map_name, before_report, after_report, ground_truth):
    before_preds = [w[0] for w in before_report[1]]
    after_preds = [w[0] for w in after_report[1]]
    before_after_pred = np.array([before_preds, after_preds, ground_truth]).T
    np.save('../../../before_after_ext_pred/ray_detections/'+map_name+'.npy', before_after_pred)

def compare_images_before_after_ext(map_name, cnn, global_stats):
    images_before, images_after, words = load_and_transform(map_name)
    image_embs_before = get_image_embeddings(cnn, images_before)
    image_embs_after = get_image_embeddings(cnn, images_after)
    word_emb_info = get_word_phoc_representations(words)
    # get the distances between images and words
    dist_matrix_before = cdist(XA=image_embs_before, XB=word_emb_info[0], metric='cosine')
    dist_matrix_after = cdist(XA=image_embs_after, XB=word_emb_info[0], metric='cosine')
    # build the original report
    match_report_before = report_matches_with_variations(dist_matrix_before, words, word_emb_info, 1)
    # get the low confidence image index
    conf_idx = [i for i in range(len(match_report_before[1])) if match_report_before[1][i][0] in word_emb_info[4]]
    # update the dist_after matrix based for low confidence images
    update_dist_matrix(dist_matrix_before, dist_matrix_after, conf_idx)
    # build the report after extension
    match_report_after = report_matches_with_variations(dist_matrix_after, words, word_emb_info, 1)
    save_before_after_preds(map_name, match_report_before, match_report_after, words)
    global_stats['correct_before'] += match_report_before[0]
    global_stats['correct_after'] += match_report_after[0]
    global_stats['total'] += len(words)
    acc_before = match_report_before[0]/len(words)
    acc_after = match_report_after[0]/len(words)
    conf_matrix = generate_confusion_matrix(match_report_before, match_report_after, words)
    return (acc_before, acc_after, conf_matrix, match_report_before, match_report_after)

In [12]:
# find the file names
# f = open('../splits/test_files.txt', 'rb')
# A = f.readlines()
# f.close()
# A = [x.rstrip('\n') for x in A]
# # train maps to remove
# # A.remove('D0042-1070013')
# # test maps to remove
# A.remove('D5005-5028102')
A = ['D0042-1070001','D0042-1070002','D0042-1070006','D0042-1070007','D0117-5755018','D0117-5755035','D0117-5755036']

In [13]:
global_stats = {'correct_before':0, 'correct_after':0, 'total':0}
local_stats = []
for i in tqdm(range(len(A)), ascii=True, desc='Main Iteration'):
    print A[i]
    stats = compare_images_before_after_ext(A[i], cnn, global_stats)
    local_stats.append(stats)

HBox(children=(IntProgress(value=0, description=u'Main Iteration', max=7), HTML(value=u'')))

D0042-1070001
Images Before Shape  (1078, 3, 135, 487)
Words Before Shape  (1078,)
Images After Shape  (1078, 3, 135, 487)
Words After Shape  (1078,)


HBox(children=(IntProgress(value=0, max=1078), HTML(value=u'')))

HBox(children=(IntProgress(value=0, max=1078), HTML(value=u'')))

100%|██████████| 1813/1813 [00:00<00:00, 4272.88it/s]


('embedding variations:', (1813, 945))
('conf_idx', [61, 87, 95, 134, 169, 175, 178, 187, 194, 199, 200, 219, 229, 230, 246, 247, 248, 260, 267, 281, 314, 315, 411, 417, 440, 472, 492, 503, 505, 508, 534, 602, 634, 645, 668, 688, 730, 746, 770, 777, 876, 890, 898, 953, 959, 978, 980, 982, 1047, 1055])
D0042-1070002
Images Before Shape  (1139, 3, 135, 487)
Words Before Shape  (1139,)
Images After Shape  (1139, 3, 135, 487)
Words After Shape  (1139,)


HBox(children=(IntProgress(value=0, max=1139), HTML(value=u'')))

HBox(children=(IntProgress(value=0, max=1139), HTML(value=u'')))

100%|██████████| 2120/2120 [00:00<00:00, 4699.90it/s]


('embedding variations:', (2120, 945))
('conf_idx', [79, 91, 110, 142, 162, 210, 230, 246, 252, 256, 257, 260, 267, 268, 283, 311, 319, 331, 336, 348, 358, 368, 385, 409, 475, 519, 526, 533, 554, 564, 571, 597, 644, 657, 728, 790, 809, 871, 875, 905, 919, 933, 941, 942, 953, 1022, 1031, 1034, 1036, 1044, 1051, 1062, 1101])
D0042-1070006
Images Before Shape  (2339, 3, 135, 487)
Words Before Shape  (2339,)
Images After Shape  (2339, 3, 135, 487)
Words After Shape  (2339,)


HBox(children=(IntProgress(value=0, max=2339), HTML(value=u'')))

HBox(children=(IntProgress(value=0, max=2339), HTML(value=u'')))

100%|██████████| 2828/2828 [00:00<00:00, 5653.69it/s]


('embedding variations:', (2828, 945))
('conf_idx', [2, 3, 5, 47, 49, 68, 108, 116, 124, 149, 152, 155, 158, 161, 162, 163, 171, 173, 178, 180, 184, 188, 191, 195, 196, 207, 208, 212, 213, 214, 217, 218, 221, 224, 225, 236, 242, 279, 282, 288, 290, 292, 294, 297, 298, 299, 308, 313, 317, 323, 325, 327, 330, 331, 335, 343, 347, 349, 351, 352, 359, 361, 362, 365, 366, 367, 370, 374, 383, 384, 388, 393, 394, 398, 400, 426, 433, 436, 441, 444, 445, 450, 453, 473, 476, 484, 486, 491, 492, 494, 500, 510, 521, 531, 535, 540, 542, 558, 567, 571, 574, 581, 582, 594, 598, 621, 622, 638, 659, 678, 682, 684, 686, 689, 692, 696, 697, 724, 734, 735, 748, 752, 760, 791, 808, 809, 815, 818, 820, 822, 841, 847, 861, 890, 891, 892, 972, 992, 1007, 1016, 1027, 1028, 1034, 1035, 1049, 1053, 1062, 1063, 1078, 1079, 1080, 1101, 1114, 1116, 1131, 1152, 1153, 1160, 1163, 1169, 1172, 1174, 1175, 1180, 1181, 1194, 1207, 1208, 1209, 1214, 1219, 1224, 1239, 1242, 1243, 1244, 1245, 1247, 1253, 1255, 1260, 1265, 12

  if c in self._diacrit_dict: # Replace simple diacritic
  return [c if (c not in self.__filtered) else u'' for c in chars]


Images Before Shape  (1302, 3, 135, 487)
Words Before Shape  (1302,)
Images After Shape  (1302, 3, 135, 487)
Words After Shape  (1302,)


HBox(children=(IntProgress(value=0, max=1302), HTML(value=u'')))

HBox(children=(IntProgress(value=0, max=1302), HTML(value=u'')))

100%|██████████| 2037/2037 [00:00<00:00, 5397.94it/s]


('embedding variations:', (2037, 945))
('conf_idx', [38, 54, 82, 84, 100, 140, 161, 185, 219, 224, 230, 258, 265, 274, 277, 306, 310, 345, 354, 371, 410, 450, 557, 672, 695, 763, 834, 850, 859, 875, 880, 884, 897, 915, 937, 939, 940, 947, 967, 989, 1009, 1047, 1079, 1087, 1089, 1118, 1152, 1278])
D0117-5755018
Images Before Shape  (3501, 3, 135, 487)
Words Before Shape  (3501,)
Images After Shape  (3501, 3, 135, 487)
Words After Shape  (3501,)


HBox(children=(IntProgress(value=0, max=3501), HTML(value=u'')))

HBox(children=(IntProgress(value=0, max=3501), HTML(value=u'')))

100%|██████████| 5321/5321 [00:00<00:00, 5541.62it/s]


('embedding variations:', (5321, 945))
('conf_idx', [7, 9, 123, 151, 159, 161, 183, 187, 205, 220, 221, 222, 224, 226, 239, 243, 244, 246, 252, 253, 254, 260, 261, 267, 272, 279, 282, 297, 302, 305, 307, 309, 313, 314, 321, 326, 328, 330, 331, 332, 333, 337, 345, 360, 363, 369, 378, 382, 384, 386, 391, 392, 394, 399, 410, 411, 412, 418, 421, 422, 428, 429, 437, 438, 440, 444, 456, 458, 465, 468, 470, 474, 481, 483, 484, 489, 498, 505, 511, 512, 515, 520, 523, 527, 539, 542, 544, 545, 551, 562, 566, 571, 575, 580, 582, 584, 588, 591, 600, 604, 612, 628, 635, 637, 639, 647, 658, 660, 667, 668, 682, 687, 697, 698, 705, 707, 708, 714, 729, 731, 733, 749, 774, 790, 793, 813, 820, 830, 832, 846, 872, 891, 892, 901, 919, 920, 926, 929, 945, 950, 954, 956, 975, 977, 995, 998, 1005, 1006, 1008, 1013, 1021, 1028, 1041, 1045, 1049, 1069, 1084, 1093, 1100, 1103, 1116, 1126, 1142, 1149, 1167, 1193, 1202, 1208, 1214, 1215, 1219, 1236, 1254, 1265, 1288, 1299, 1323, 1327, 1340, 1349, 1361, 1368, 1369,

HBox(children=(IntProgress(value=0, max=2189), HTML(value=u'')))

HBox(children=(IntProgress(value=0, max=2189), HTML(value=u'')))

100%|██████████| 3616/3616 [00:00<00:00, 5860.33it/s]


('embedding variations:', (3616, 945))
('conf_idx', [33, 51, 73, 78, 89, 100, 106, 111, 123, 127, 134, 143, 149, 154, 161, 163, 165, 169, 176, 178, 183, 188, 190, 215, 218, 220, 235, 267, 270, 275, 303, 309, 318, 319, 320, 322, 323, 333, 351, 354, 356, 366, 368, 374, 388, 389, 393, 404, 418, 432, 439, 440, 445, 450, 452, 470, 472, 474, 476, 494, 497, 506, 527, 533, 556, 567, 613, 625, 629, 631, 633, 635, 648, 667, 686, 697, 725, 728, 758, 769, 782, 805, 838, 843, 852, 862, 868, 884, 941, 948, 954, 994, 1014, 1023, 1030, 1036, 1046, 1061, 1071, 1086, 1103, 1105, 1111, 1118, 1119, 1121, 1126, 1132, 1133, 1142, 1146, 1150, 1152, 1156, 1157, 1162, 1170, 1172, 1173, 1185, 1186, 1199, 1217, 1228, 1236, 1237, 1243, 1244, 1246, 1247, 1249, 1271, 1290, 1322, 1332, 1352, 1360, 1371, 1380, 1393, 1405, 1407, 1409, 1410, 1415, 1419, 1425, 1426, 1429, 1430, 1436, 1441, 1457, 1467, 1476, 1477, 1479, 1480, 1482, 1484, 1485, 1486, 1487, 1501, 1505, 1506, 1507, 1509, 1511, 1522, 1528, 1537, 1564, 1570, 

HBox(children=(IntProgress(value=0, max=2576), HTML(value=u'')))

HBox(children=(IntProgress(value=0, max=2576), HTML(value=u'')))

100%|██████████| 4247/4247 [00:00<00:00, 5954.07it/s]


('embedding variations:', (4247, 945))
('conf_idx', [92, 110, 129, 130, 133, 135, 146, 149, 161, 166, 168, 176, 179, 195, 204, 211, 221, 240, 262, 273, 284, 286, 306, 311, 313, 314, 316, 325, 331, 351, 354, 357, 374, 401, 405, 420, 421, 422, 434, 446, 448, 449, 451, 455, 468, 495, 506, 521, 523, 534, 536, 538, 542, 545, 553, 574, 579, 586, 603, 613, 619, 625, 631, 642, 646, 658, 664, 670, 677, 697, 702, 706, 719, 721, 734, 759, 798, 807, 840, 862, 867, 872, 879, 894, 899, 905, 907, 911, 916, 919, 921, 936, 942, 943, 957, 994, 999, 1001, 1005, 1022, 1031, 1109, 1134, 1166, 1187, 1191, 1198, 1237, 1239, 1267, 1270, 1274, 1277, 1291, 1292, 1295, 1299, 1300, 1309, 1312, 1314, 1319, 1321, 1323, 1324, 1327, 1328, 1339, 1340, 1344, 1345, 1354, 1356, 1358, 1365, 1368, 1371, 1375, 1385, 1386, 1387, 1388, 1390, 1394, 1398, 1399, 1405, 1409, 1410, 1411, 1419, 1422, 1425, 1436, 1439, 1445, 1446, 1447, 1455, 1456, 1457, 1464, 1527, 1540, 1590, 1593, 1598, 1600, 1630, 1652, 1653, 1655, 1663, 1664, 1

In [14]:
# print accuracies
for i in range(len(A)):
    print('Accuracy for image '+A[i])
    print "the accuracy before extension: " + str(local_stats[i][0])
    print "the accuracy after extension: "+str(local_stats[i][1])
    print('\n')
    
print 'Overall Accuracy Before ', global_stats['correct_before']/global_stats['total']
print 'Overall Accuracy After', global_stats['correct_after']/global_stats['total']

Accuracy for image D0042-1070001
the accuracy before extension: 0.49814471243
the accuracy after extension: 0.503710575139


Accuracy for image D0042-1070002
the accuracy before extension: 0.40825285338
the accuracy after extension: 0.412642669008


Accuracy for image D0042-1070006
the accuracy before extension: 0.351004702864
the accuracy after extension: 0.354852501069


Accuracy for image D0042-1070007
the accuracy before extension: 0.357910906298
the accuracy after extension: 0.375576036866


Accuracy for image D0117-5755018
the accuracy before extension: 0.469294487289
the accuracy after extension: 0.476435304199


Accuracy for image D0117-5755035
the accuracy before extension: 0.479671082686
the accuracy after extension: 0.480127912289


Accuracy for image D0117-5755036
the accuracy before extension: 0.503105590062
the accuracy after extension: 0.509316770186


Overall Accuracy Before  0.444491645426
Overall Accuracy After 0.450509770603


In [51]:
from IPython.display import HTML, display
def print_conf_matrix(conf_matrix):
    display(HTML(
    '<table><tr>{}</tr></table>'.format(
        '</tr><tr>'.join(
            '<td>{}</td>'.format('</td><td>'.join(str(_) for _ in row)) for row in conf_matrix)
        )
    ))

# print the confusion matrix
for i in range(len(A)):
    conf_matrix = local_stats[i][2]
    print('Confusion Matrix for Image ' + A[i])
    print_conf_matrix(conf_matrix)
    print('\n')

Confusion Matrix for Image D0090-5242001


0,1,2,3
before/after,incorrect (0),correct (1),almost (2)
incorrect (0),262,5,22
correct (1),0,101,0
almost (2),8,25,46




Confusion Matrix for Image D0117-5755018


0,1,2,3
before/after,incorrect (0),correct (1),almost (2)
incorrect (0),615,17,10
correct (1),0,413,0
almost (2),8,131,103




Confusion Matrix for Image D0117-5755024


0,1,2,3
before/after,incorrect (0),correct (1),almost (2)
incorrect (0),806,14,19
correct (1),0,418,0
almost (2),13,148,118




Confusion Matrix for Image D0117-5755025


0,1,2,3
before/after,incorrect (0),correct (1),almost (2)
incorrect (0),409,5,21
correct (1),0,283,0
almost (2),8,84,85




Confusion Matrix for Image D0117-5755033


0,1,2,3
before/after,incorrect (0),correct (1),almost (2)
incorrect (0),592,12,12
correct (1),0,331,0
almost (2),8,89,97






In [None]:
print "Incorrectly classified before and Incorrectly classified after"
import matplotlib.pyplot as plt
ground_truth = match_report_before[1]
count = 0
for i in range(len(words)):
    if match_report_before[5][i] == 0 \
    and match_report_after[5][i] == 0:
        count += 1
#         print "************************************************************************"
#         print "************************************************************************"
#         q = np.transpose(images_before[i],(1,2,0))
#         q1 = np.transpose(images_after[i],(1,2,0))
#         plt.imshow(q)
#         plt.show()
#         plt.imshow(q1)
#         plt.show()
#         print "Matched before: "+"$"+str(match_report_before[1][i][0])+"$"
#         print "Matched after:" + "$"+str(match_report_after[1][i][0])+"$"
#         print "Ground truth:" + str(words[i])
#         print "Image Dir before:" + str(match_report_before[2][i])
#         print "Image Dir after:" + str(match_report_after[2][i])
#         print "Distance before:" + str(match_report_before[4][i])
#         print "Distance after:" + str(match_report_after[4][i])
#         print "------------------------------------------------------------------------"
#         print "------------------------------------------------------------------------"
        
print count
        

In [None]:
print "Incorrectly classified before and Correctly classified after"
import matplotlib.pyplot as plt
count = 0
for i in range(len(words)):
    if match_report_before[5][i] == 0 \
    and match_report_after[5][i] == 1:
        count += 1
#         print "************************************************************************"
#         print "************************************************************************"
#         q = np.transpose(images_before[i],(1,2,0))
#         q1 = np.transpose(images_after[i],(1,2,0))
#         plt.imshow(q)
#         plt.show()
#         plt.imshow(q1)
#         plt.show()
#         print "Matched before: "+"$"+str(match_report_before[1][i][0])+"$"
#         print "Matched after:" + "$"+str(match_report_after[1][i][0])+"$"
#         print "Ground truth:" + str(words[i])
#         print "Image Dir before:" + str(match_report_before[2][i])
#         print "Image Dir after:" + str(match_report_after[2][i])
#         print "Distance before:" + str(match_report_before[4][i])
#         print "Distance after:" + str(match_report_after[4][i])
#         print "------------------------------------------------------------------------"
#         print "------------------------------------------------------------------------"
        
print count   

In [None]:
print "Incorrectly classified before and Almost correctly classified after"
import matplotlib.pyplot as plt
count = 0
for i in range(len(words)):
    if match_report_before[5][i] == 0 \
    and match_report_after[5][i] == 2:
        count += 1
#         print "************************************************************************"
#         print "************************************************************************"
#         q = np.transpose(images_before[i],(1,2,0))
#         q1 = np.transpose(images_after[i],(1,2,0))
#         plt.imshow(q)
#         plt.show()
#         plt.imshow(q1)
#         plt.show()
#         print "Matched before: "+"$"+str(match_report_before[1][i][0])+"$"
#         print "Matched after:" + "$"+str(match_report_after[1][i][0])+"$"
#         print "Ground truth:" + str(words[i])
#         print "Image Dir before:" + str(match_report_before[2][i])
#         print "Image Dir after:" + str(match_report_after[2][i])
#         print "Distance before:" + str(match_report_before[4][i])
#         print "Distance after:" + str(match_report_after[4][i])
#         print "------------------------------------------------------------------------"
#         print "------------------------------------------------------------------------"
        
print count

In [None]:
print "Correctly classified before and Incorrectly classified after"
import matplotlib.pyplot as plt
count = 0
for i in range(len(words)):
    if match_report_before[5][i] == 1 \
    and match_report_after[5][i] == 0:
        count += 1
        print "************************************************************************"
        print "************************************************************************"
        q = np.transpose(images_before[i],(1,2,0))
        q1 = np.transpose(images_after[i],(1,2,0))
        plt.imshow(q)
        plt.show()
        plt.imshow(q1)
        plt.show()
        print "Matched before: "+"$"+str(match_report_before[1][i][0])+"$"
        print "Matched after:" + "$"+str(match_report_after[1][i][0])+"$"
        print "Ground truth:" + str(words[i])
        print "Image Dir before:" + str(match_report_before[2][i])
        print "Image Dir after:" + str(match_report_after[2][i])
        print "Distance before:" + str(match_report_before[4][i])
        print "Distance after:" + str(match_report_after[4][i])
        print "------------------------------------------------------------------------"
        print "------------------------------------------------------------------------"
        
print count

In [None]:
print "Correctly classified before and Correctly classified after"
import matplotlib.pyplot as plt
qualified_ids = match_report_before[2]
ground_truth = match_report_before[6]
count = 0
for i in range(len(qualified_ids)):
    if match_report_before[5][i] == 1 \
    and match_report_after[5][i] == 1:
        count += 1
#         print "************************************************************************"
#         print "************************************************************************"
#         q = np.transpose(images_before[i],(1,2,0))
#         q1 = np.transpose(images_after[i],(1,2,0))
#         plt.imshow(q)
#         plt.show()
#         plt.imshow(q1)
#         plt.show()
#         print "Matched before: "+"$"+str(match_report_before[1][i][0])+"$"
#         print "Matched after:" + "$"+str(match_report_after[1][i][0])+"$"
#         print "Ground truth:" + str(words[i])
#         print "Image Dir before:" + str(match_report_before[2][i])
#         print "Image Dir after:" + str(match_report_after[2][i])
#         print "Distance before:" + str(match_report_before[4][i])
#         print "Distance after:" + str(match_report_after[4][i])
#         print "------------------------------------------------------------------------"
#         print "------------------------------------------------------------------------"
        
print count

In [None]:
print "Correctly classified before and Almost classified after"
import matplotlib.pyplot as plt
count = 0
for i in range(len(words)):
    if match_report_before[5][i] == 1 \
    and match_report_after[5][i] == 2:
        count += 1
#         print "************************************************************************"
#         print "************************************************************************"
#         q = np.transpose(images_before[i],(1,2,0))
#         q1 = np.transpose(images_after[i],(1,2,0))
#         plt.imshow(q)
#         plt.show()
#         plt.imshow(q1)
#         plt.show()
#         print "Matched before: "+"$"+str(match_report_before[1][i][0])+"$"
#         print "Matched after:" + "$"+str(match_report_after[1][i][0])+"$"
#         print "Ground truth:" + str(words[i])
#         print "Image Dir before:" + str(match_report_before[2][i])
#         print "Image Dir after:" + str(match_report_after[2][i])
#         print "Distance before:" + str(match_report_before[4][i])
#         print "Distance after:" + str(match_report_after[4][i])
#         print "------------------------------------------------------------------------"
#         print "------------------------------------------------------------------------"
        
print count

In [None]:
print "Almost classified before and In-correctly classified after"
import matplotlib.pyplot as plt
count = 0
for i in range(len(words)):
    if match_report_before[5][i] == 2 \
    and match_report_after[5][i] == 0:
        count += 1
#         print "************************************************************************"
#         print "************************************************************************"
#         q = np.transpose(images_before[i],(1,2,0))
#         q1 = np.transpose(images_after[i],(1,2,0))
#         plt.imshow(q)
#         plt.show()
#         plt.imshow(q1)
#         plt.show()
#         print "Image Index: " + str(i)
#         print "Matched before: "+"$"+str(match_report_before[1][i][0])+"$"
#         print "Matched after:" + "$"+str(match_report_after[1][i][0])+"$"
#         print "Ground truth:" + str(words[i])
#         print "Image Dir before:" + str(match_report_before[2][i])
#         print "Image Dir after:" + str(match_report_after[2][i])
#         print "Distance before:" + str(match_report_before[4][i])
#         print "Distance after:" + str(match_report_after[4][i])
#         print "\nAll Distances Before:" + str(match_report_before[6][i])
#         print "\nAll Distances After:" + str(match_report_after[6][i])
#         print "------------------------------------------------------------------------"
#         print "------------------------------------------------------------------------"
        
print count

In [None]:
print "Almost classified before and Correctly classified after"
import matplotlib.pyplot as plt
qualified_ids = match_report_before[2]
ground_truth = match_report_before[6]
count = 0
for i in range(len(qualified_ids)):
    if match_report_before[5][i] == 2 \
    and match_report_after[5][i] == 1:
        count += 1
#         print "************************************************************************"
#         print "************************************************************************"
#         q = np.transpose(images_before[qualified_ids[i]],(1,2,0))
#         q1 = np.transpose(images_after[i],(1,2,0))
#         plt.imshow(q)
#         plt.show()
#         plt.imshow(q1)
#         plt.show()
#         print "Matched before: "+"$"+str(match_report_before[1][i][0])+"$"
#         print "Matched after:" + "$"+str(match_report_after[1][i][0])+"$"
#         print "Ground truth:" + str(ground_truth[i])
#         print "Image Dir before:" + str(match_report_before[3][i])
#         print "Image Dir after:" + str(match_report_after[3][i])
#         print "Distance before:" + str(match_report_before[7][i])
#         print "Distance after:" + str(match_report_after[7][i])
#         print "------------------------------------------------------------------------"
#         print "------------------------------------------------------------------------"
        
print count

In [None]:
print "Almost classified before and Almost classified after"
import matplotlib.pyplot as plt
qualified_ids = match_report_before[2]
ground_truth = match_report_before[6]
count = 0
for i in range(len(qualified_ids)):
    if match_report_before[5][i] == 2 \
    and match_report_after[5][i] == 2:
        count += 1
#         print "************************************************************************"
#         print "************************************************************************"
#         q = np.transpose(images_before[qualified_ids[i]],(1,2,0))
#         q1 = np.transpose(images_after[i],(1,2,0))
#         plt.imshow(q)
#         plt.show()
#         plt.imshow(q1)
#         plt.show()
#         print "Matched before: "+"$"+str(match_report_before[1][i][0])+"$"
#         print "Matched after:" + "$"+str(match_report_after[1][i][0])+"$"
#         print "Ground truth:" + str(ground_truth[i])
#         print "Image Dir before:" + str(match_report_before[3][i])
#         print "Image Dir after:" + str(match_report_after[3][i])
#         print "Distance before:" + str(match_report_before[7][i])
#         print "Distance after:" + str(match_report_after[7][i])
#         print "------------------------------------------------------------------------"
#         print "------------------------------------------------------------------------"
        
print count