# Ground Truth

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle

plt.rcParams['figure.figsize'] = (8, 8)

### Pascal VOC

In [None]:
from data_voc import GTUtility
gt_util = GTUtility('data/VOC2007/')

In [None]:
idx, img, gt = gt_util.sample()
plt.imshow(img)
gt_util.plot_gt(gt)
plt.show()

In [None]:
print(gt_util)

### MS COCO

In [None]:
from data_coco import GTUtility
gt_util = GTUtility('data/COCO', validation=True)

In [None]:
idx, img, gt = gt_util.sample()
plt.imshow(img)
gt_util.plot_gt(gt)
plt.show()

In [None]:
print('image %i' % (gt_util.num_samples))
print('objects %i' % (gt_util.num_objects))
print('objects per image %.2f' % (gt_util.num_objects / gt_util.num_samples))

plt.figure(figsize=(12,16))
y = range(gt_util.num_classes)
plt.barh(y, gt_util.stats, align='center')
plt.yticks(y, gt_util.classes)
plt.ylim(y[0]-1, y[-1]+1)
plt.grid()
ax = plt.gca()
for i, v in enumerate(gt_util.stats):
    ax.text(v+200, i-.25, int(v), color='b')
plt.show()

In [None]:
gt = gt_util.convert_to_voc()

In [None]:
gt1, gt2 = gt.split(0.6)

In [None]:
gt3 = gt1.merge(gt2)

In [None]:
gt4 = gt1.subset(100,200)

In [None]:
gt.num_samples, gt3.num_samples, gt4.num_samples

### ICDAR2015 FST

In [None]:
from data_icdar2015fst import GTUtility
gt_util = GTUtility('data/ICDAR2015_FST/')
gt_util_test = GTUtility('data/ICDAR2015_FST/', test=True, polygon=False)

In [None]:
idx, img, gt = gt_util.sample() # TODO: index 83 is fucked up?
plt.imshow(img)
gt_util.plot_gt(gt, show_labels=False)
plt.show()
print(gt_util.text[idx])

In [None]:
print(gt_util.num_samples, gt_util_test.num_samples)

### ICDAR2015 IST

In [None]:
from data_icdar2015ist import GTUtility
gt_util = GTUtility('data/ICDAR2015_IST/')
gt_util_test = GTUtility('data/ICDAR2015_IST/', test=True)

In [None]:
idx, img, gt = gt_util.sample()
plt.imshow(img)
gt_util.plot_gt(gt, show_labels=False)
plt.show()
print(gt_util.text[idx])

In [None]:
print(gt_util.num_samples, gt_util_test.num_samples)

### MSRA TD500

In [None]:
from data_td500 import GTUtility
gt_util = GTUtility('data/MSRA-TD500/')

In [None]:
idx, img, gt = gt_util.sample()
plt.imshow(img)
gt_util.plot_gt(gt, show_labels=False)
plt.show()

### SVT

In [None]:
from data_svt import GTUtility
gt_util = GTUtility('data/SVT/')

In [None]:
idx, img, gt = gt_util.sample()
plt.imshow(img)
gt_util.plot_gt(gt, show_labels=False)
plt.show()

### COCO-Text

In [None]:
from data_cocotext import GTUtility
gt_util = GTUtility('data/COCO-Text', polygon=False)

In [None]:
idx, img, gt = gt_util.sample()
plt.imshow(img)
gt_util.plot_gt(gt, show_labels=False)
plt.show()

### SynthText

In [None]:
# for TextBoxes

from data_synthtext import GTUtility
gt_util = GTUtility('data/SynthText/', max_slope=10)

file_name = 'gt_util_synthtext_horizontal10.pkl'
pickle.dump(gt_util, open(file_name,'wb'))
#with open(file_name, 'rb') as f:
#    gt_util = pickle.load(f)

print(gt_util)

In [None]:
# for SegLink and TextBoxes++

from data_synthtext import GTUtility
gt_util = GTUtility('data/SynthText/', polygon=True)

file_name = 'gt_util_synthtext_seglink.pkl'
pickle.dump(gt_util, open(file_name,'wb'))
#with open(file_name, 'rb') as f:
#    gt_util = pickle.load(f)

print(gt_util)

In [None]:
gtu1, gtu2 = gt_util.split(0.001)

file_name = 'gt_util_synthtext_seglink_xxs.pkl'
pickle.dump(gtu1, open(file_name,'wb'))
#with open(file_name, 'rb') as f:
#    gtu1 = pickle.load(f)

In [None]:
idx, img, gt = gt_util.sample()
plt.imshow(img)
gt_util.plot_gt(gt, show_labels=False)
plt.show()