# OCR with CNN

OCR with own convolution and Flux neural network

## Dependencies

In [1]:
using DSP
using ImageView
using Flux
using Images
using MLDatasets
using Statistics
using FFTW

## Convolution

![eq](https://i.ibb.co/BBBTg6y/eq.png)

$$ a = \begin{bmatrix} 
       a_{0}  \\
       a_{1}  \\
        ...   \\
        a_{n-2} \\
        a_{n-1}
     \end{bmatrix} $$
     
 $$ \hat{a} = \begin{bmatrix} 
       a_{0}  \\
       a_{1}  \\
        ...   \\
        a_{n-1} \\
        0 \\
        ... \\
        0 \\
     \end{bmatrix} $$



where   *F*   is Fourier matrix


Before convolution  

![first](https://i.ibb.co/vqMhS38/Selection-002.png)

After applying Sobel and emboss filters

![second](https://i.ibb.co/WP99t1j/Selection-005.png)

In [2]:
function my_conv2(A::StridedMatrix{T}, B::StridedMatrix{T}) where T #implementation from docs
    sa, sb = size(A), size(B)
    At = zeros(T, sa[1]+sb[1]-1, sa[2]+sb[2]-1)
    Bt = zeros(T, sa[1]+sb[1]-1, sa[2]+sb[2]-1)
    At[1:sa[1], 1:sa[2]] = A
    Bt[1:sb[1], 1:sb[2]] = B
    p = plan_fft(At)
    C = ifft((p*At).*(p*Bt))
    if T <: Real
        return real(C)
    end
    return C
end

my_conv2 (generic function with 1 method)

In [3]:
function my_meanpool(A::AbstractArray, chunk_size::Int)::Array{Float64, 2}
    s = size(A)
    result = zeros(convert(Int,floor(s[1]/chunk_size)), convert(Int, floor(s[2]/chunk_size)))
    for i=1:size(result)[1]
        for j=1:size(result)[2]
            q1 = (i-1)*chunk_size+1
            q2 = (i-1)*chunk_size + chunk_size
            q3 = (j-1)*chunk_size+1
            q4 = (j-1)*chunk_size + chunk_size
            result[i,j] = mean(A[q1:q2,q3:q4])
        end
    end
    return result
end

my_meanpool (generic function with 1 method)

In [4]:
function convolution(mat::AbstractArray, pool_ker::Int = 2)::AbstractArray
    
    # appyling convolution (no relu)
    mat_emb = conv2(Emboss, mat)
    mat_sob = conv2(Sobel, mat)
    mat_compl = vcat(mat_emb, mat_sob)
    
    # pooling
    mat_pooled = my_meanpool(mat_compl, pool_ker)
    
    return mat_pooled
end

convolution (generic function with 2 methods)

### Convolution kernels

In [5]:
Sharp = [0.0 -1.0 0.0; -1.0 5.0 -1.0; 0.0 -1.0 0.0]

Edge = [-1.0 -1.0 -1.0; -1.0 8.0 -1.0; -1.0 -1.0 -1.0]

Emboss = [-2.0 -1.0 0.0; -1.0 1.0 1.0; 0.0 1.0 2.0]

Sobel = [1.0 2.0 1.0; 0.0 0.0 0.0; -1.0 -2.0 -1.0]

3×3 Array{Float64,2}:
  1.0   2.0   1.0
  0.0   0.0   0.0
 -1.0  -2.0  -1.0

## Data

Basic example of convolution need in dimention reduction.

Full image (28x28)

![first](https://i.ibb.co/vqMhS38/Selection-002.png)

After dimension reduction

![second](https://i.ibb.co/HDnZK2q/Selection-003.png)

The circle below is unrecognisable.

Convolution applied with same pooling

![third](https://i.ibb.co/hZsZ2GS/Selection-004.png)

Size (15x30)

In [7]:
function get_minst(index::Int)::AbstractArray
    
    a = MNIST.traintensor(index)
    a = convert(Array{Float64,2}, a)
    a = a'
    a = convert(Array{Float64,2}, a)
    return a
end

function get_test(index::Int)::AbstractArray
    
    a = MNIST.testtensor(index)
    a = convert(Array{Float64,2}, a)
    a = a'
    a = convert(Array{Float64,2}, a)
    return a
end

get_test (generic function with 1 method)

In [159]:
imshow(get_test(82));

In [139]:
imshow(my_meanpool(get_test(82), 3));

In [160]:
example = convolution(get_test(82), 3)
imshow(example);

In [151]:
my_data = []

for i=1:1000
    mnist = get_minst(i)
    c = convolution(mnist)
    label = zeros(10)
    label[MNIST.trainlabels(i)+1] = 1
    push!(my_data, (vec(c), label)) 
end

## Neural Network

In [152]:
# defining model
chain = Chain(Dense(450,10), softmax)
loss(x, y) = Flux.mse(chain(x), y)
opt = SGD(params(chain))

# learning
for i=1:1000
    Flux.train!(loss, my_data, opt)
end

In [157]:
# random try
digit = get_test(51) #index starts at 0, e.g.: 51 40 49 101
conv_digit = convolution(digit)
imshow(conv_digit)
chain(vec(conv_digit))

Tracked 10-element Array{Float64,1}:
 0.0003227535409099724 
 2.2447519860957771e-10
 3.532314616955245e-7  
 4.874306093847925e-6  
 7.280765383988794e-8  
 0.006814624845830757  
 0.9928553416917468    
 1.8223463821808493e-14
 1.9573360682013993e-6 
 2.2015741465956285e-8 

In [158]:
MNIST.testlabels(51)

6

## On test set

In [156]:
guessed = 0

for i=1:1000
    test_img = get_test(i)
    conv_img = convolution(test_img)
    res = chain(vec(conv_img))

    A_max = maximum(res)
    idx = findfirst(a->a==A_max, res)
    if MNIST.testlabels(i) == idx-1
        guessed += 1
    end
end

println("Success rate is: ", guessed/10, "%")

Success rate is: 82.4%
