-
Notifications
You must be signed in to change notification settings - Fork 1
/
create_Eq_Assembler_Test_Data.m
52 lines (40 loc) · 1.54 KB
/
create_Eq_Assembler_Test_Data.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
% Jim Brewer
% EE368
% Project
% Nov 11, 2015
clear all;
%% Create Test Sets with "true" characters for equation/code assembly
% Data Structure:
% Each entry in equations(i) is a structure containing the segmentation
% data from fn_segment in ".characters" and the filename of the original
% equation in ".filename"
% Within each "character(i)" is (all x,y w.r.t. original image, origin UL):
% .centroid (1x2 vector of x,y position of centroid of character
% .boundingbox (1x4 vector of x,y coord of upper left and x,y lengths
% .img (matrix image of extracted character)
% .char (string of actual letter or character)
% To extract single equation for testing (as if received from fn_segment),
% run: eq = equations(i).characters;
path = 'LaTeX Equations/';
files = dir(strcat(path,'*.jpg'));
for i = 1:size(files,1)
eq = im2double(rgb2gray(imread(strcat(path,files(i).name))));
% Threshold image using Otsu's method
th = graythresh(eq);
eq_bin = eq;
eq_bin(eq <= th) = 0;
eq_bin(eq > th) = 1;
equations(i).filename = files(i).name;
equations(i).characters = fn_segment(eq_bin);
end
%% TODO - Manual Entry of correct Characters for each equation
% Currently use letter, number, or english spelling of each symbol
figure(1);
j=10;
numChars = size(equations(j).characters,2);
for i = 1:numChars
subplot(2,ceil(numChars/2),i);
imshow(equations(j).characters(i).img);
end
% Save out structure of equation data
% save('EquationTestData.mat','equations');